Compare commits
1 Commits
0c179fb32d
...
32c63a254f
| Author | SHA1 | Date | |
|---|---|---|---|
| 32c63a254f |
@ -0,0 +1,24 @@
|
|||||||
|
package com.magamochi.ingestion.client;
|
||||||
|
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
import org.springframework.cloud.openfeign.FeignClient;
|
||||||
|
import org.springframework.http.MediaType;
|
||||||
|
import org.springframework.web.bind.annotation.PostMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RequestBody;
|
||||||
|
|
||||||
|
@FeignClient(name = "scrollable-scrapper", url = "${scrollable-scrapper.endpoint}")
|
||||||
|
public interface ScrollableScrapperClient {
|
||||||
|
@PostMapping(
|
||||||
|
consumes = MediaType.APPLICATION_JSON_VALUE,
|
||||||
|
produces = MediaType.APPLICATION_JSON_VALUE)
|
||||||
|
GetResponse get(@RequestBody GetRequest request);
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Builder
|
||||||
|
class GetRequest {
|
||||||
|
private final String url;
|
||||||
|
}
|
||||||
|
|
||||||
|
record GetResponse(String pageSource) {}
|
||||||
|
}
|
||||||
@ -11,12 +11,13 @@ import com.magamochi.ingestion.model.dto.MangaInfoDTO;
|
|||||||
import com.magamochi.ingestion.providers.ContentProvider;
|
import com.magamochi.ingestion.providers.ContentProvider;
|
||||||
import com.magamochi.ingestion.providers.PagedContentProvider;
|
import com.magamochi.ingestion.providers.PagedContentProvider;
|
||||||
import com.magamochi.ingestion.service.FlareService;
|
import com.magamochi.ingestion.service.FlareService;
|
||||||
|
import com.magamochi.ingestion.service.ScrollableScrapperService;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
|
import java.util.stream.IntStream;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.log4j.Log4j2;
|
import lombok.extern.log4j.Log4j2;
|
||||||
import org.apache.commons.lang3.NotImplementedException;
|
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
@ -27,6 +28,7 @@ public class TaimuProvider implements ContentProvider, PagedContentProvider {
|
|||||||
private final String baseUrl = "https://taimumangas.rzword.xyz";
|
private final String baseUrl = "https://taimumangas.rzword.xyz";
|
||||||
|
|
||||||
private final FlareService flareService;
|
private final FlareService flareService;
|
||||||
|
private final ScrollableScrapperService scrollableScrapperService;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<ContentInfoDTO> getAvailableChapters(MangaContentProvider provider) {
|
public List<ContentInfoDTO> getAvailableChapters(MangaContentProvider provider) {
|
||||||
@ -93,8 +95,26 @@ public class TaimuProvider implements ContentProvider, PagedContentProvider {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<ContentImageInfoDTO> getContentImages(String chapterUrl) {
|
public List<ContentImageInfoDTO> getContentImages(String chapterUrl) {
|
||||||
throw new NotImplementedException(
|
log.info("Getting images from {}, url {}", ContentProviders.TAIMU, chapterUrl);
|
||||||
"getContentImages is not implemented for " + ContentProviders.TAIMU);
|
|
||||||
|
try {
|
||||||
|
var document = scrollableScrapperService.getContentAsJsoupDocument(chapterUrl);
|
||||||
|
|
||||||
|
var chapterImages = document.select("img.w-full.h-auto.object-contain.cursor-pointer");
|
||||||
|
|
||||||
|
var imageUrls =
|
||||||
|
chapterImages.stream()
|
||||||
|
.map(chapterImagesElement -> chapterImagesElement.attr("src"))
|
||||||
|
.toList();
|
||||||
|
|
||||||
|
return IntStream.range(0, imageUrls.size())
|
||||||
|
.boxed()
|
||||||
|
.map(position -> new ContentImageInfoDTO(position, imageUrls.get(position)))
|
||||||
|
.toList();
|
||||||
|
} catch (NoSuchElementException e) {
|
||||||
|
log.error("Error parsing manga images from " + ContentProviders.TAIMU, e);
|
||||||
|
return List.of();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|||||||
@ -0,0 +1,22 @@
|
|||||||
|
package com.magamochi.ingestion.service;
|
||||||
|
|
||||||
|
import com.magamochi.ingestion.client.ScrollableScrapperClient;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class ScrollableScrapperService {
|
||||||
|
private final ScrollableScrapperClient client;
|
||||||
|
|
||||||
|
public Document getContentAsJsoupDocument(String url) {
|
||||||
|
return Jsoup.parse(getContent(url));
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getContent(String url) {
|
||||||
|
|
||||||
|
return client.get(ScrollableScrapperClient.GetRequest.builder().url(url).build()).pageSource();
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -22,9 +22,9 @@ spring:
|
|||||||
openfeign:
|
openfeign:
|
||||||
client:
|
client:
|
||||||
config:
|
config:
|
||||||
web-scrapper:
|
scrollable-scrapper:
|
||||||
connect-timeout: 240000
|
connect-timeout: 480000
|
||||||
read-timeout: 240000
|
read-timeout: 480000
|
||||||
rabbitmq:
|
rabbitmq:
|
||||||
host: ${RABBITMQ_HOST}
|
host: ${RABBITMQ_HOST}
|
||||||
port: ${RABBITMQ_PORT}
|
port: ${RABBITMQ_PORT}
|
||||||
@ -41,6 +41,9 @@ springdoc:
|
|||||||
flare-solverr:
|
flare-solverr:
|
||||||
endpoint: ${FLARESOLVERR_ENDPOINT}
|
endpoint: ${FLARESOLVERR_ENDPOINT}
|
||||||
|
|
||||||
|
scrollable-scrapper:
|
||||||
|
endpoint: ${SCROLLABLE_SCRAPPER_ENDPOINT}
|
||||||
|
|
||||||
minio:
|
minio:
|
||||||
endpoint: ${MINIO_ENDPOINT}
|
endpoint: ${MINIO_ENDPOINT}
|
||||||
accessKey: ${MINIO_USER}
|
accessKey: ${MINIO_USER}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user