Compare commits

..

2 Commits

Author SHA1 Message Date
rov
4a66797c79 Merge pull request 'feat(scrapper): use jsoup directly for the manga livre blog provider' (#22) from feature/flare-solverr into main
Reviewed-on: #22
2025-11-15 22:22:07 -03:00
cc8efc9b61 feat(scrapper): use jsoup directly for the manga livre blog provider
All checks were successful
ci/woodpecker/push/pipeline Pipeline was successful
2025-11-15 19:53:17 -03:00
4 changed files with 5 additions and 51 deletions

View File

@ -1,18 +0,0 @@
package com.magamochi.mangamochi.client;
import org.springframework.cloud.openfeign.FeignClient;
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
@FeignClient(name = "web-scrapper", url = "${web-scrapper.endpoint}")
public interface WebScrapperClient {
@PostMapping(
consumes = MediaType.APPLICATION_JSON_VALUE,
produces = MediaType.APPLICATION_JSON_VALUE)
Response scrape(@RequestBody Request dto);
record Request(String url) {}
record Response(String page_source) {}
}

View File

@ -1,23 +0,0 @@
package com.magamochi.mangamochi.service;
import com.magamochi.mangamochi.client.WebScrapperClient;
import java.io.IOException;
import lombok.RequiredArgsConstructor;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.springframework.stereotype.Service;
@Service
@RequiredArgsConstructor
public class WebScrapperClientProxyService {
private final WebScrapperClient webScrapperClient;
public Document scrapeToJsoupDocument(String url) throws IOException {
var htmlContent = scrape(url);
return Jsoup.parse(htmlContent);
}
private String scrape(String url) {
return webScrapperClient.scrape(new WebScrapperClient.Request(url)).page_source();
}
}

View File

@ -4,7 +4,6 @@ import com.magamochi.mangamochi.model.dto.ContentProviderMangaChapterResponseDTO
import com.magamochi.mangamochi.model.dto.ContentProviderMangaInfoResponseDTO;
import com.magamochi.mangamochi.model.entity.MangaProvider;
import com.magamochi.mangamochi.model.enumeration.MangaStatus;
import com.magamochi.mangamochi.service.WebScrapperClientProxyService;
import com.magamochi.mangamochi.service.providers.ContentProvider;
import com.magamochi.mangamochi.service.providers.ContentProviders;
import com.magamochi.mangamochi.service.providers.PagedContentProvider;
@ -16,6 +15,7 @@ import java.util.stream.IntStream;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.springframework.stereotype.Service;
@ -27,8 +27,6 @@ public class MangaLivreBlogProvider implements ContentProvider, PagedContentProv
private final String url = "https://mangalivre.blog/manga/";
private final WebScrapperClientProxyService webScrapperClientProxyService;
@Override
public List<ContentProviderMangaChapterResponseDTO> getAvailableChapters(
MangaProvider mangaProvider) {
@ -38,7 +36,7 @@ public class MangaLivreBlogProvider implements ContentProvider, PagedContentProv
mangaProvider.getManga().getTitle());
try {
var document = webScrapperClientProxyService.scrapeToJsoupDocument(mangaProvider.getUrl());
var document = Jsoup.connect(mangaProvider.getUrl()).get();
var chapterList = document.getElementsByClass("chapters-list").getFirst();
var chapterItems = chapterList.getElementsByClass("chapter-item");
@ -67,7 +65,7 @@ public class MangaLivreBlogProvider implements ContentProvider, PagedContentProv
log.info("Getting images from {}, url {}", ContentProviders.MANGA_LIVRE_BLOG, chapterUrl);
try {
var document = webScrapperClientProxyService.scrapeToJsoupDocument(chapterUrl);
var document = Jsoup.connect(chapterUrl).get();
var chapterImageContainers = document.getElementsByClass("chapter-image-container");
var imageUrls =
@ -107,7 +105,7 @@ public class MangaLivreBlogProvider implements ContentProvider, PagedContentProv
log.info("Getting mangas from {}, page {}", ContentProviders.MANGA_LIVRE_BLOG, page);
try {
var document = webScrapperClientProxyService.scrapeToJsoupDocument(url + "page/" + page);
var document = Jsoup.connect(url + "page/" + page).get();
var mangaGrid = document.getElementsByClass("manga-grid").getFirst();
var mangaElements = mangaGrid.getElementsByTag("article");
@ -160,7 +158,7 @@ public class MangaLivreBlogProvider implements ContentProvider, PagedContentProv
log.info("Getting total pages for {}", ContentProviders.MANGA_LIVRE_BLOG);
try {
var document = webScrapperClientProxyService.scrapeToJsoupDocument(url);
var document = Jsoup.connect(url).get();
var navLinks = document.getElementsByClass("nav-links").getFirst();
var links = navLinks.getElementsByTag("a");

View File

@ -35,9 +35,6 @@ springdoc:
api-docs:
path: /api-docs
web-scrapper:
endpoint: ${WEBSCRAPPER_ENDPOINT}
flare-solverr:
endpoint: ${FLARESOLVERR_ENDPOINT}