Compare commits

..

No commits in common. "4a66797c79216913c176333c93c34d63d7ddc57e" and "0ad068ca78e88e7c21af281b483e70728b378468" have entirely different histories.

4 changed files with 51 additions and 5 deletions

View File

@ -0,0 +1,18 @@
package com.magamochi.mangamochi.client;
import org.springframework.cloud.openfeign.FeignClient;
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
@FeignClient(name = "web-scrapper", url = "${web-scrapper.endpoint}")
public interface WebScrapperClient {
@PostMapping(
consumes = MediaType.APPLICATION_JSON_VALUE,
produces = MediaType.APPLICATION_JSON_VALUE)
Response scrape(@RequestBody Request dto);
record Request(String url) {}
record Response(String page_source) {}
}

View File

@ -0,0 +1,23 @@
package com.magamochi.mangamochi.service;
import com.magamochi.mangamochi.client.WebScrapperClient;
import java.io.IOException;
import lombok.RequiredArgsConstructor;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.springframework.stereotype.Service;
@Service
@RequiredArgsConstructor
public class WebScrapperClientProxyService {
private final WebScrapperClient webScrapperClient;
public Document scrapeToJsoupDocument(String url) throws IOException {
var htmlContent = scrape(url);
return Jsoup.parse(htmlContent);
}
private String scrape(String url) {
return webScrapperClient.scrape(new WebScrapperClient.Request(url)).page_source();
}
}

View File

@ -4,6 +4,7 @@ import com.magamochi.mangamochi.model.dto.ContentProviderMangaChapterResponseDTO
import com.magamochi.mangamochi.model.dto.ContentProviderMangaInfoResponseDTO; import com.magamochi.mangamochi.model.dto.ContentProviderMangaInfoResponseDTO;
import com.magamochi.mangamochi.model.entity.MangaProvider; import com.magamochi.mangamochi.model.entity.MangaProvider;
import com.magamochi.mangamochi.model.enumeration.MangaStatus; import com.magamochi.mangamochi.model.enumeration.MangaStatus;
import com.magamochi.mangamochi.service.WebScrapperClientProxyService;
import com.magamochi.mangamochi.service.providers.ContentProvider; import com.magamochi.mangamochi.service.providers.ContentProvider;
import com.magamochi.mangamochi.service.providers.ContentProviders; import com.magamochi.mangamochi.service.providers.ContentProviders;
import com.magamochi.mangamochi.service.providers.PagedContentProvider; import com.magamochi.mangamochi.service.providers.PagedContentProvider;
@ -15,7 +16,6 @@ import java.util.stream.IntStream;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
@ -27,6 +27,8 @@ public class MangaLivreBlogProvider implements ContentProvider, PagedContentProv
private final String url = "https://mangalivre.blog/manga/"; private final String url = "https://mangalivre.blog/manga/";
private final WebScrapperClientProxyService webScrapperClientProxyService;
@Override @Override
public List<ContentProviderMangaChapterResponseDTO> getAvailableChapters( public List<ContentProviderMangaChapterResponseDTO> getAvailableChapters(
MangaProvider mangaProvider) { MangaProvider mangaProvider) {
@ -36,7 +38,7 @@ public class MangaLivreBlogProvider implements ContentProvider, PagedContentProv
mangaProvider.getManga().getTitle()); mangaProvider.getManga().getTitle());
try { try {
var document = Jsoup.connect(mangaProvider.getUrl()).get(); var document = webScrapperClientProxyService.scrapeToJsoupDocument(mangaProvider.getUrl());
var chapterList = document.getElementsByClass("chapters-list").getFirst(); var chapterList = document.getElementsByClass("chapters-list").getFirst();
var chapterItems = chapterList.getElementsByClass("chapter-item"); var chapterItems = chapterList.getElementsByClass("chapter-item");
@ -65,7 +67,7 @@ public class MangaLivreBlogProvider implements ContentProvider, PagedContentProv
log.info("Getting images from {}, url {}", ContentProviders.MANGA_LIVRE_BLOG, chapterUrl); log.info("Getting images from {}, url {}", ContentProviders.MANGA_LIVRE_BLOG, chapterUrl);
try { try {
var document = Jsoup.connect(chapterUrl).get(); var document = webScrapperClientProxyService.scrapeToJsoupDocument(chapterUrl);
var chapterImageContainers = document.getElementsByClass("chapter-image-container"); var chapterImageContainers = document.getElementsByClass("chapter-image-container");
var imageUrls = var imageUrls =
@ -105,7 +107,7 @@ public class MangaLivreBlogProvider implements ContentProvider, PagedContentProv
log.info("Getting mangas from {}, page {}", ContentProviders.MANGA_LIVRE_BLOG, page); log.info("Getting mangas from {}, page {}", ContentProviders.MANGA_LIVRE_BLOG, page);
try { try {
var document = Jsoup.connect(url + "page/" + page).get(); var document = webScrapperClientProxyService.scrapeToJsoupDocument(url + "page/" + page);
var mangaGrid = document.getElementsByClass("manga-grid").getFirst(); var mangaGrid = document.getElementsByClass("manga-grid").getFirst();
var mangaElements = mangaGrid.getElementsByTag("article"); var mangaElements = mangaGrid.getElementsByTag("article");
@ -158,7 +160,7 @@ public class MangaLivreBlogProvider implements ContentProvider, PagedContentProv
log.info("Getting total pages for {}", ContentProviders.MANGA_LIVRE_BLOG); log.info("Getting total pages for {}", ContentProviders.MANGA_LIVRE_BLOG);
try { try {
var document = Jsoup.connect(url).get(); var document = webScrapperClientProxyService.scrapeToJsoupDocument(url);
var navLinks = document.getElementsByClass("nav-links").getFirst(); var navLinks = document.getElementsByClass("nav-links").getFirst();
var links = navLinks.getElementsByTag("a"); var links = navLinks.getElementsByTag("a");

View File

@ -35,6 +35,9 @@ springdoc:
api-docs: api-docs:
path: /api-docs path: /api-docs
web-scrapper:
endpoint: ${WEBSCRAPPER_ENDPOINT}
flare-solverr: flare-solverr:
endpoint: ${FLARESOLVERR_ENDPOINT} endpoint: ${FLARESOLVERR_ENDPOINT}