From 247788c3d04037da1c971ff8fe1f3e81c7a47bdc Mon Sep 17 00:00:00 2001 From: Rodrigo Verdiani Date: Sat, 15 Nov 2025 19:16:30 -0300 Subject: [PATCH] feat(scrapper): use flare-solverr instead of custom web scrapper --- .env | 1 + .../mangamochi/client/FlareClient.java | 73 +++++++++++++++++ .../controller/ManagementController.java | 12 +++ .../mangamochi/model/entity/FlareSession.java | 11 +++ .../registry/FlareSessionRegistry.java | 24 ++++++ .../mangamochi/service/FlareService.java | 27 +++++++ .../service/FlareSessionManager.java | 60 ++++++++++++++ .../service/providers/impl/BatoProvider.java | 80 +++++++++---------- .../impl/MangaLivreBlogProvider.java | 11 +++ .../providers/impl/MangaLivreProvider.java | 36 ++++++--- .../providers/impl/PinkRosaScanProvider.java | 34 +++++--- .../task/FlareSessionCleanupTask.java | 35 ++++++++ .../task/FlareStartupCleanupTask.java | 26 ++++++ .../mangamochi/task/UpdateMangaListTask.java | 20 ++++- src/main/resources/application.yml | 9 +++ 15 files changed, 392 insertions(+), 67 deletions(-) create mode 100644 src/main/java/com/magamochi/mangamochi/client/FlareClient.java create mode 100644 src/main/java/com/magamochi/mangamochi/model/entity/FlareSession.java create mode 100644 src/main/java/com/magamochi/mangamochi/registry/FlareSessionRegistry.java create mode 100644 src/main/java/com/magamochi/mangamochi/service/FlareService.java create mode 100644 src/main/java/com/magamochi/mangamochi/service/FlareSessionManager.java create mode 100644 src/main/java/com/magamochi/mangamochi/task/FlareSessionCleanupTask.java create mode 100644 src/main/java/com/magamochi/mangamochi/task/FlareStartupCleanupTask.java diff --git a/.env b/.env index bb405b1..1167246 100644 --- a/.env +++ b/.env @@ -6,6 +6,7 @@ MINIO_ENDPOINT=http://omv2.badger-pirarucu.ts.net:9000 MINIO_USER=rov MINIO_PASS=!E9v4i0v3 +FLARESOLVERR_ENDPOINT=https://flare-solverr.badger-pirarucu.ts.net WEBSCRAPPER_ENDPOINT=http://mangamochi.badger-pirarucu.ts.net:8090/url MANGAMATCHER_ENDPOINT=http://mangamochi.badger-pirarucu.ts.net:8000/match-title diff --git a/src/main/java/com/magamochi/mangamochi/client/FlareClient.java b/src/main/java/com/magamochi/mangamochi/client/FlareClient.java new file mode 100644 index 0000000..df4dc3a --- /dev/null +++ b/src/main/java/com/magamochi/mangamochi/client/FlareClient.java @@ -0,0 +1,73 @@ +package com.magamochi.mangamochi.client; + +import io.github.resilience4j.retry.annotation.Retry; +import java.util.List; +import lombok.Builder; +import lombok.Getter; +import org.springframework.cloud.openfeign.FeignClient; +import org.springframework.http.MediaType; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; + +@FeignClient(name = "flare-solverr", url = "${flare-solverr.endpoint}/v1") +@Retry(name = "FlareSolverrRetry") +public interface FlareClient { + @PostMapping( + consumes = MediaType.APPLICATION_JSON_VALUE, + produces = MediaType.APPLICATION_JSON_VALUE) + GetResponse get(@RequestBody GetRequest request); + + @PostMapping( + consumes = MediaType.APPLICATION_JSON_VALUE, + produces = MediaType.APPLICATION_JSON_VALUE) + SessionCreateResponse createSession(@RequestBody SessionCreateRequest request); + + @PostMapping( + consumes = MediaType.APPLICATION_JSON_VALUE, + produces = MediaType.APPLICATION_JSON_VALUE) + SessionDestroyResponse destroySession(@RequestBody SessionDestroyRequest request); + + @PostMapping( + consumes = MediaType.APPLICATION_JSON_VALUE, + produces = MediaType.APPLICATION_JSON_VALUE) + SessionListResponse listSessions(@RequestBody SessionListRequest request); + + @Getter + @Builder + class GetRequest { + @Builder.Default private final String cmd = "request.get"; + @Builder.Default private final Integer maxTimeout = 120000; + + private final String url; + private final String session; + } + + @Getter + @Builder + class SessionCreateRequest { + @Builder.Default private final String cmd = "sessions.create"; + } + + @Getter + @Builder + class SessionDestroyRequest { + @Builder.Default private final String cmd = "sessions.destroy"; + private final String session; + } + + @Getter + @Builder + class SessionListRequest { + @Builder.Default private final String cmd = "sessions.list"; + } + + record GetResponse(String status, String message, Solution solution) { + public record Solution(String url, Integer status, String response) {} + } + + record SessionCreateResponse(String status, String message, String session) {} + + record SessionDestroyResponse(String status, String message) {} + + record SessionListResponse(String status, String message, List sessions) {} +} diff --git a/src/main/java/com/magamochi/mangamochi/controller/ManagementController.java b/src/main/java/com/magamochi/mangamochi/controller/ManagementController.java index ed38fec..a1bc614 100644 --- a/src/main/java/com/magamochi/mangamochi/controller/ManagementController.java +++ b/src/main/java/com/magamochi/mangamochi/controller/ManagementController.java @@ -33,6 +33,18 @@ public class ManagementController { return DefaultResponseDTO.ok().build(); } + @Operation( + summary = "Queue update provider manga list", + description = "Queue the retrieval of the manga list for a specific provider", + tags = {"Management"}, + operationId = "updateProviderMangaList") + @PostMapping("update-provider-manga-list") + public DefaultResponseDTO updateProviderMangaList(@RequestParam Long providerId) { + updateMangaListTask.updateProviderMangaList(providerId); + + return DefaultResponseDTO.ok().build(); + } + @Operation( summary = "Cleanup unused S3 images", description = "Triggers the cleanup of untracked S3 images", diff --git a/src/main/java/com/magamochi/mangamochi/model/entity/FlareSession.java b/src/main/java/com/magamochi/mangamochi/model/entity/FlareSession.java new file mode 100644 index 0000000..af02dba --- /dev/null +++ b/src/main/java/com/magamochi/mangamochi/model/entity/FlareSession.java @@ -0,0 +1,11 @@ +package com.magamochi.mangamochi.model.entity; + +import java.time.Instant; +import lombok.Builder; + +@Builder +public record FlareSession(String sessionId, Instant lastAccess) { + public FlareSession updateLastAccess() { + return new FlareSession(this.sessionId, Instant.now()); + } +} diff --git a/src/main/java/com/magamochi/mangamochi/registry/FlareSessionRegistry.java b/src/main/java/com/magamochi/mangamochi/registry/FlareSessionRegistry.java new file mode 100644 index 0000000..33f6b68 --- /dev/null +++ b/src/main/java/com/magamochi/mangamochi/registry/FlareSessionRegistry.java @@ -0,0 +1,24 @@ +package com.magamochi.mangamochi.registry; + +import com.magamochi.mangamochi.model.entity.FlareSession; +import java.util.concurrent.ConcurrentHashMap; +import lombok.Getter; +import org.springframework.stereotype.Component; + +@Component +@Getter +public class FlareSessionRegistry { + private final ConcurrentHashMap sessions = new ConcurrentHashMap<>(); + + public FlareSession get(String provider) { + return sessions.get(provider); + } + + public void put(String provider, FlareSession session) { + sessions.put(provider, session); + } + + public void remove(String provider) { + sessions.remove(provider); + } +} diff --git a/src/main/java/com/magamochi/mangamochi/service/FlareService.java b/src/main/java/com/magamochi/mangamochi/service/FlareService.java new file mode 100644 index 0000000..bac79a2 --- /dev/null +++ b/src/main/java/com/magamochi/mangamochi/service/FlareService.java @@ -0,0 +1,27 @@ +package com.magamochi.mangamochi.service; + +import com.magamochi.mangamochi.client.FlareClient; +import lombok.RequiredArgsConstructor; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.springframework.stereotype.Service; + +@Service +@RequiredArgsConstructor +public class FlareService { + private final FlareClient client; + private final FlareSessionManager sessionManager; + + public Document getContentAsJsoupDocument(String url, String providerName) { + return Jsoup.parse(getContent(url, providerName)); + } + + private String getContent(String url, String providerName) { + var session = sessionManager.getOrCreateSession(providerName); + + return client + .get(FlareClient.GetRequest.builder().url(url).session(session).build()) + .solution() + .response(); + } +} diff --git a/src/main/java/com/magamochi/mangamochi/service/FlareSessionManager.java b/src/main/java/com/magamochi/mangamochi/service/FlareSessionManager.java new file mode 100644 index 0000000..97c16d8 --- /dev/null +++ b/src/main/java/com/magamochi/mangamochi/service/FlareSessionManager.java @@ -0,0 +1,60 @@ +package com.magamochi.mangamochi.service; + +import static java.util.Objects.isNull; +import static java.util.Objects.nonNull; + +import com.magamochi.mangamochi.client.FlareClient; +import com.magamochi.mangamochi.model.entity.FlareSession; +import com.magamochi.mangamochi.registry.FlareSessionRegistry; +import java.time.Duration; +import java.time.Instant; +import lombok.RequiredArgsConstructor; +import lombok.extern.log4j.Log4j2; +import org.springframework.stereotype.Service; + +@Log4j2 +@Service +@RequiredArgsConstructor +public class FlareSessionManager { + private static final Duration TIMEOUT = Duration.ofMinutes(15); + + private final FlareClient flareClient; + private final FlareSessionRegistry registry; + + public String getOrCreateSession(String provider) { + log.info("Getting session for {}", provider); + + var session = registry.get(provider); + + if (isNull(session) || isExpired(session)) { + if (nonNull(session)) { + log.info("Session expired for {}", provider); + + flareClient.destroySession( + FlareClient.SessionDestroyRequest.builder().session(session.sessionId()).build()); + + registry.remove(provider); + } else { + log.info("Session not found for {}", provider); + } + + log.info("Creating session for {}", provider); + + var newId = + flareClient.createSession(FlareClient.SessionCreateRequest.builder().build()).session(); + + session = new FlareSession(newId, Instant.now()); + registry.put(provider, session); + } else { + log.info("Got session for {}", provider); + } + + registry.put(provider, session.updateLastAccess()); + + return session.sessionId(); + } + + private boolean isExpired(FlareSession session) { + return Duration.between(session.lastAccess(), Instant.now()).compareTo(TIMEOUT) > 0; + } +} diff --git a/src/main/java/com/magamochi/mangamochi/service/providers/impl/BatoProvider.java b/src/main/java/com/magamochi/mangamochi/service/providers/impl/BatoProvider.java index 5be4ca7..0870b45 100644 --- a/src/main/java/com/magamochi/mangamochi/service/providers/impl/BatoProvider.java +++ b/src/main/java/com/magamochi/mangamochi/service/providers/impl/BatoProvider.java @@ -11,11 +11,10 @@ import com.magamochi.mangamochi.model.entity.Provider; import com.magamochi.mangamochi.model.enumeration.ProviderStatus; import com.magamochi.mangamochi.model.repository.MangaProviderRepository; import com.magamochi.mangamochi.model.repository.ProviderRepository; +import com.magamochi.mangamochi.service.FlareService; import com.magamochi.mangamochi.service.MangaCreationService; -import com.magamochi.mangamochi.service.WebScrapperClientProxyService; import com.magamochi.mangamochi.service.providers.ContentProvider; import com.magamochi.mangamochi.service.providers.ContentProviders; -import java.io.IOException; import java.util.*; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -27,7 +26,9 @@ import org.springframework.stereotype.Service; @Service(ContentProviders.BATO) @RequiredArgsConstructor public class BatoProvider implements ContentProvider { - private final WebScrapperClientProxyService webScrapperClientProxyService; + private static final String URL = "https://battwo.com"; + + private final FlareService flareService; private final MangaCreationService mangaCreationService; private final ProviderRepository providerRepository; private final MangaProviderRepository mangaProviderRepository; @@ -35,7 +36,8 @@ public class BatoProvider implements ContentProvider { @Override public List getAvailableChapters(MangaProvider provider) { try { - var document = webScrapperClientProxyService.scrapeToJsoupDocument(provider.getUrl()); + var document = + flareService.getContentAsJsoupDocument(provider.getUrl(), ContentProviders.BATO); // Direct selector for chapter links var chapterLinks = document.select("div.scrollable-panel a[href*=/title/]"); @@ -56,8 +58,8 @@ public class BatoProvider implements ContentProvider { public Map getChapterImagesUrls(String chapterUrl) { try { var document = - webScrapperClientProxyService.scrapeToJsoupDocument( - "https://battwo.com" + chapterUrl + "?load=2"); + flareService.getContentAsJsoupDocument( + URL + chapterUrl + "?load=2", ContentProviders.BATO); // Select all chapter page images var imgElements = document.select("img.z-10.w-full.h-full"); @@ -87,42 +89,38 @@ public class BatoProvider implements ContentProvider { } public ImportMangaResponseDTO importManga(String url) { - try { - var document = webScrapperClientProxyService.scrapeToJsoupDocument(url); - // Method 1: Look for the main title in the manga info section - var titleElement = document.selectFirst("h3 a[href*=/title/]"); - if (isNull(titleElement)) { - throw new UnprocessableException("Manga title not found for url: " + url); - } + var document = flareService.getContentAsJsoupDocument(url, ContentProviders.BATO); - var mangaTitle = titleElement.text(); - - var provider = - providerRepository - .findByNameIgnoreCase("Bato") - .orElseGet( - () -> - providerRepository.save( - Provider.builder().name("Bato").status(ProviderStatus.ACTIVE).build())); - - var manga = mangaCreationService.getOrCreateManga(mangaTitle, url, provider); - - if (isNull(manga)) { - throw new NotFoundException("Manga could not be found or created for url: " + url); - } - - mangaProviderRepository.save( - MangaProvider.builder() - .manga(manga) - .mangaTitle(mangaTitle) - .provider(provider) - .url(url) - .build()); - - return new ImportMangaResponseDTO(manga.getId()); - - } catch (IOException e) { - throw new RuntimeException(e); + // Method 1: Look for the main title in the manga info section + var titleElement = document.selectFirst("h3 a[href*=/title/]"); + if (isNull(titleElement)) { + throw new UnprocessableException("Manga title not found for url: " + url); } + + var mangaTitle = titleElement.text(); + + var provider = + providerRepository + .findByNameIgnoreCase("Bato") + .orElseGet( + () -> + providerRepository.save( + Provider.builder().name("Bato").status(ProviderStatus.ACTIVE).build())); + + var manga = mangaCreationService.getOrCreateManga(mangaTitle, url, provider); + + if (isNull(manga)) { + throw new NotFoundException("Manga could not be found or created for url: " + url); + } + + mangaProviderRepository.save( + MangaProvider.builder() + .manga(manga) + .mangaTitle(mangaTitle) + .provider(provider) + .url(url) + .build()); + + return new ImportMangaResponseDTO(manga.getId()); } } diff --git a/src/main/java/com/magamochi/mangamochi/service/providers/impl/MangaLivreBlogProvider.java b/src/main/java/com/magamochi/mangamochi/service/providers/impl/MangaLivreBlogProvider.java index eec5ba3..8a5426a 100644 --- a/src/main/java/com/magamochi/mangamochi/service/providers/impl/MangaLivreBlogProvider.java +++ b/src/main/java/com/magamochi/mangamochi/service/providers/impl/MangaLivreBlogProvider.java @@ -32,6 +32,11 @@ public class MangaLivreBlogProvider implements ContentProvider, PagedContentProv @Override public List getAvailableChapters( MangaProvider mangaProvider) { + log.info( + "Getting available chapters from {}, manga {}", + ContentProviders.MANGA_LIVRE_BLOG, + mangaProvider.getManga().getTitle()); + try { var document = webScrapperClientProxyService.scrapeToJsoupDocument(mangaProvider.getUrl()); @@ -59,6 +64,8 @@ public class MangaLivreBlogProvider implements ContentProvider, PagedContentProv @Override public Map getChapterImagesUrls(String chapterUrl) { + log.info("Getting images from {}, url {}", ContentProviders.MANGA_LIVRE_BLOG, chapterUrl); + try { var document = webScrapperClientProxyService.scrapeToJsoupDocument(chapterUrl); @@ -97,6 +104,8 @@ public class MangaLivreBlogProvider implements ContentProvider, PagedContentProv @Override public List getMangasFromPage(Integer page) { + log.info("Getting mangas from {}, page {}", ContentProviders.MANGA_LIVRE_BLOG, page); + try { var document = webScrapperClientProxyService.scrapeToJsoupDocument(url + "page/" + page); @@ -148,6 +157,8 @@ public class MangaLivreBlogProvider implements ContentProvider, PagedContentProv @Override public Integer getTotalPages() { + log.info("Getting total pages for {}", ContentProviders.MANGA_LIVRE_BLOG); + try { var document = webScrapperClientProxyService.scrapeToJsoupDocument(url); diff --git a/src/main/java/com/magamochi/mangamochi/service/providers/impl/MangaLivreProvider.java b/src/main/java/com/magamochi/mangamochi/service/providers/impl/MangaLivreProvider.java index f00dad1..96b28c2 100644 --- a/src/main/java/com/magamochi/mangamochi/service/providers/impl/MangaLivreProvider.java +++ b/src/main/java/com/magamochi/mangamochi/service/providers/impl/MangaLivreProvider.java @@ -4,11 +4,10 @@ import com.magamochi.mangamochi.model.dto.ContentProviderMangaChapterResponseDTO import com.magamochi.mangamochi.model.dto.ContentProviderMangaInfoResponseDTO; import com.magamochi.mangamochi.model.entity.MangaProvider; import com.magamochi.mangamochi.model.enumeration.MangaStatus; -import com.magamochi.mangamochi.service.WebScrapperClientProxyService; +import com.magamochi.mangamochi.service.FlareService; import com.magamochi.mangamochi.service.providers.ContentProvider; import com.magamochi.mangamochi.service.providers.ContentProviders; import com.magamochi.mangamochi.service.providers.PagedContentProvider; -import java.io.IOException; import java.util.*; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -22,12 +21,18 @@ import org.springframework.stereotype.Service; public class MangaLivreProvider implements ContentProvider, PagedContentProvider { private final String url = "https://mangalivre.tv/manga/"; - private final WebScrapperClientProxyService webScrapperClientProxyService; + private final FlareService flareService; @Override public List getAvailableChapters(MangaProvider provider) { + log.info( + "Getting available chapters from {}, manga {}", + ContentProviders.MANGA_LIVRE, + provider.getManga().getTitle()); + try { - var document = webScrapperClientProxyService.scrapeToJsoupDocument(provider.getUrl()); + var document = + flareService.getContentAsJsoupDocument(provider.getUrl(), ContentProviders.MANGA_LIVRE); var chapterItems = document.getElementsByClass("wp-manga-chapter"); @@ -40,7 +45,7 @@ public class MangaLivreProvider implements ContentProvider, PagedContentProvider linkElement.text(), linkElement.attr("href"), null, null); }) .toList(); - } catch (NoSuchElementException | IOException e) { + } catch (NoSuchElementException e) { log.error("Error parsing mangas from MangaLivre", e); return List.of(); } @@ -48,8 +53,11 @@ public class MangaLivreProvider implements ContentProvider, PagedContentProvider @Override public Map getChapterImagesUrls(String chapterUrl) { + log.info("Getting images from {}, url {}", ContentProviders.MANGA_LIVRE, chapterUrl); + try { - var document = webScrapperClientProxyService.scrapeToJsoupDocument(chapterUrl); + var document = + flareService.getContentAsJsoupDocument(chapterUrl, ContentProviders.MANGA_LIVRE); var chapterImagesContainer = document.getElementsByClass("chapter-images").getFirst(); var chapterImagesElements = chapterImagesContainer.getElementsByClass("page-break"); @@ -68,7 +76,7 @@ public class MangaLivreProvider implements ContentProvider, PagedContentProvider .collect( Collectors.toMap( i -> i, imageUrls::get, (existing, replacement) -> existing, LinkedHashMap::new)); - } catch (NoSuchElementException | IOException e) { + } catch (NoSuchElementException e) { log.error("Error parsing mangas from MangaLivre", e); return Map.of(); } @@ -76,8 +84,12 @@ public class MangaLivreProvider implements ContentProvider, PagedContentProvider @Override public List getMangasFromPage(Integer page) { + log.info("Getting mangas from {}, page {}", ContentProviders.MANGA_LIVRE, page); + try { - var document = webScrapperClientProxyService.scrapeToJsoupDocument(url + "page/" + page); + var document = + flareService.getContentAsJsoupDocument( + url + "page/" + page, ContentProviders.MANGA_LIVRE); var mangaElements = document.getElementsByClass("manga__item"); @@ -115,7 +127,7 @@ public class MangaLivreProvider implements ContentProvider, PagedContentProvider title, url, imgUrl, MangaStatus.UNKNOWN); }) .toList(); - } catch (NoSuchElementException | IOException e) { + } catch (NoSuchElementException e) { log.error("Error parsing mangas from MangaLivre", e); return List.of(); } @@ -123,8 +135,10 @@ public class MangaLivreProvider implements ContentProvider, PagedContentProvider @Override public Integer getTotalPages() { + log.info("Getting total pages for {}", ContentProviders.MANGA_LIVRE); + try { - var document = webScrapperClientProxyService.scrapeToJsoupDocument(url); + var document = flareService.getContentAsJsoupDocument(url, ContentProviders.MANGA_LIVRE); var navLinks = document.getElementsByClass("wp-pagenavi").getFirst(); var lastPageElement = navLinks.getElementsByClass("last").getFirst(); @@ -132,7 +146,7 @@ public class MangaLivreProvider implements ContentProvider, PagedContentProvider var totalPages = links.replaceAll("\\D+", ""); return Integer.parseInt(totalPages); - } catch (NoSuchElementException | IOException e) { + } catch (NoSuchElementException e) { log.error("Error parsing total pages from MangaLivre", e); return null; } diff --git a/src/main/java/com/magamochi/mangamochi/service/providers/impl/PinkRosaScanProvider.java b/src/main/java/com/magamochi/mangamochi/service/providers/impl/PinkRosaScanProvider.java index 578e971..0883c8c 100644 --- a/src/main/java/com/magamochi/mangamochi/service/providers/impl/PinkRosaScanProvider.java +++ b/src/main/java/com/magamochi/mangamochi/service/providers/impl/PinkRosaScanProvider.java @@ -7,11 +7,10 @@ import com.magamochi.mangamochi.model.dto.ContentProviderMangaChapterResponseDTO import com.magamochi.mangamochi.model.dto.ContentProviderMangaInfoResponseDTO; import com.magamochi.mangamochi.model.entity.MangaProvider; import com.magamochi.mangamochi.model.enumeration.MangaStatus; -import com.magamochi.mangamochi.service.WebScrapperClientProxyService; +import com.magamochi.mangamochi.service.FlareService; import com.magamochi.mangamochi.service.providers.ContentProvider; import com.magamochi.mangamochi.service.providers.ContentProviders; import com.magamochi.mangamochi.service.providers.PagedContentProvider; -import java.io.IOException; import java.util.*; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -23,12 +22,22 @@ import org.springframework.stereotype.Service; @Service(ContentProviders.PINK_ROSA_SCAN) @RequiredArgsConstructor public class PinkRosaScanProvider implements ContentProvider, PagedContentProvider { - private final WebScrapperClientProxyService webScrapperClientProxyService; + private static final String URL = + "https://scanpinkrosa.blogspot.com/search/label/Series?max-results=1000"; + + private final FlareService flareService; @Override public List getAvailableChapters(MangaProvider provider) { + log.info( + "Getting available chapters from {}, manga {}", + ContentProviders.PINK_ROSA_SCAN, + provider.getManga().getTitle()); + try { - var document = webScrapperClientProxyService.scrapeToJsoupDocument(provider.getUrl()); + var document = + flareService.getContentAsJsoupDocument( + provider.getUrl(), ContentProviders.PINK_ROSA_SCAN); var chapterList = document.getElementById("chapter-list"); if (isNull(chapterList)) { @@ -52,7 +61,7 @@ public class PinkRosaScanProvider implements ContentProvider, PagedContentProvid chapterTitleElement.text().trim(), chapterItemElement.attr("href"), null, null); }) .toList(); - } catch (NoSuchElementException | IOException e) { + } catch (NoSuchElementException e) { log.error("Error parsing mangas from Pink Rosa Scan", e); return List.of(); } @@ -60,8 +69,11 @@ public class PinkRosaScanProvider implements ContentProvider, PagedContentProvid @Override public Map getChapterImagesUrls(String chapterUrl) { + log.info("Getting images from {}, url {}", ContentProviders.PINK_ROSA_SCAN, chapterUrl); + try { - var document = webScrapperClientProxyService.scrapeToJsoupDocument(chapterUrl); + var document = + flareService.getContentAsJsoupDocument(chapterUrl, ContentProviders.PINK_ROSA_SCAN); var chapterImagesContainer = document.getElementById("pages"); if (isNull(chapterImagesContainer)) { @@ -87,7 +99,7 @@ public class PinkRosaScanProvider implements ContentProvider, PagedContentProvid .collect( Collectors.toMap( i -> i, imageUrls::get, (existing, replacement) -> existing, LinkedHashMap::new)); - } catch (NoSuchElementException | IOException e) { + } catch (NoSuchElementException e) { log.error("Error parsing mangas from Pink Rosa Scan", e); return Map.of(); } @@ -100,10 +112,10 @@ public class PinkRosaScanProvider implements ContentProvider, PagedContentProvid @Override public List getMangasFromPage(Integer page) { + log.info("Getting mangas from {}", ContentProviders.PINK_ROSA_SCAN); + try { - var document = - webScrapperClientProxyService.scrapeToJsoupDocument( - "https://scanpinkrosa.blogspot.com/search/label/Series?max-results=1000"); + var document = flareService.getContentAsJsoupDocument(URL, ContentProviders.PINK_ROSA_SCAN); var mangaElements = document.getElementsByClass("grid relative sm:gap-3.5 gap-[2.5vw] w-full h-fit"); @@ -130,7 +142,7 @@ public class PinkRosaScanProvider implements ContentProvider, PagedContentProvid title, url, null, MangaStatus.UNKNOWN); }) .toList(); - } catch (NoSuchElementException | IOException e) { + } catch (NoSuchElementException e) { log.error("Error parsing mangas from Pink Rosa Scan", e); return List.of(); } diff --git a/src/main/java/com/magamochi/mangamochi/task/FlareSessionCleanupTask.java b/src/main/java/com/magamochi/mangamochi/task/FlareSessionCleanupTask.java new file mode 100644 index 0000000..bf0efc9 --- /dev/null +++ b/src/main/java/com/magamochi/mangamochi/task/FlareSessionCleanupTask.java @@ -0,0 +1,35 @@ +package com.magamochi.mangamochi.task; + +import com.magamochi.mangamochi.client.FlareClient; +import com.magamochi.mangamochi.registry.FlareSessionRegistry; +import java.time.Duration; +import java.time.Instant; +import lombok.RequiredArgsConstructor; +import org.springframework.scheduling.annotation.Scheduled; +import org.springframework.stereotype.Component; + +@Component +@RequiredArgsConstructor +public class FlareSessionCleanupTask { + private static final Duration TIMEOUT = Duration.ofMinutes(15); + + private final FlareClient client; + private final FlareSessionRegistry registry; + + @Scheduled(fixedDelayString = "1m") + public void cleanExpiredSessions() { + registry + .getSessions() + .forEach( + (provider, session) -> { + if (Duration.between(session.lastAccess(), Instant.now()).compareTo(TIMEOUT) <= 0) { + return; + } + + client.destroySession( + FlareClient.SessionDestroyRequest.builder().session(session.sessionId()).build()); + + registry.remove(provider); + }); + } +} diff --git a/src/main/java/com/magamochi/mangamochi/task/FlareStartupCleanupTask.java b/src/main/java/com/magamochi/mangamochi/task/FlareStartupCleanupTask.java new file mode 100644 index 0000000..5ceb54d --- /dev/null +++ b/src/main/java/com/magamochi/mangamochi/task/FlareStartupCleanupTask.java @@ -0,0 +1,26 @@ +package com.magamochi.mangamochi.task; + +import com.magamochi.mangamochi.client.FlareClient; +import lombok.RequiredArgsConstructor; +import lombok.extern.log4j.Log4j2; +import org.springframework.boot.context.event.ApplicationReadyEvent; +import org.springframework.context.event.EventListener; +import org.springframework.stereotype.Component; + +@Log4j2 +@Component +@RequiredArgsConstructor +public class FlareStartupCleanupTask { + private final FlareClient client; + + @EventListener(ApplicationReadyEvent.class) + public void cleanupExistingSessions() { + var sessions = client.listSessions(FlareClient.SessionListRequest.builder().build()).sessions(); + + for (var sessionId : sessions) { + client.destroySession(FlareClient.SessionDestroyRequest.builder().session(sessionId).build()); + } + + log.info("FlareSolverr session cleanup completed on startup."); + } +} diff --git a/src/main/java/com/magamochi/mangamochi/task/UpdateMangaListTask.java b/src/main/java/com/magamochi/mangamochi/task/UpdateMangaListTask.java index fdec2da..99cb900 100644 --- a/src/main/java/com/magamochi/mangamochi/task/UpdateMangaListTask.java +++ b/src/main/java/com/magamochi/mangamochi/task/UpdateMangaListTask.java @@ -1,6 +1,8 @@ package com.magamochi.mangamochi.task; +import com.magamochi.mangamochi.exception.NotFoundException; import com.magamochi.mangamochi.model.dto.MangaListUpdateCommand; +import com.magamochi.mangamochi.model.repository.ProviderRepository; import com.magamochi.mangamochi.queue.UpdateMangaListProducer; import com.magamochi.mangamochi.service.providers.PagedContentProvider; import com.magamochi.mangamochi.service.providers.PagedContentProviderFactory; @@ -20,6 +22,7 @@ public class UpdateMangaListTask { private final PagedContentProviderFactory contentProviderFactory; private final UpdateMangaListProducer updateMangaListProducer; + private final ProviderRepository providerRepository; @Scheduled(cron = "${content-providers.cron-expression}") public void updateMangaListScheduled() { @@ -37,6 +40,16 @@ public class UpdateMangaListTask { contentProviders.forEach(this::updateProviderMangaList); } + public void updateProviderMangaList(Long providerId) { + var provider = + providerRepository + .findById(providerId) + .orElseThrow(() -> new NotFoundException("Provider not found")); + var contentProvider = contentProviderFactory.getPagedContentProvider(provider.getName()); + + updateProviderMangaList(provider.getName(), contentProvider); + } + private void updateProviderMangaList( String contentProviderName, PagedContentProvider contentProvider) { log.info("Getting total pages for provider {}", contentProviderName); @@ -45,10 +58,9 @@ public class UpdateMangaListTask { IntStream.rangeClosed(1, pages) .forEach( - page -> { - updateMangaListProducer.sendUpdateMangaListCommand( - new MangaListUpdateCommand(contentProviderName, page)); - }); + page -> + updateMangaListProducer.sendUpdateMangaListCommand( + new MangaListUpdateCommand(contentProviderName, page))); log.info("Manga list update queued for content provider {}.", contentProviderName); } diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 5370678..ce06f86 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -38,6 +38,9 @@ springdoc: web-scrapper: endpoint: ${WEBSCRAPPER_ENDPOINT} +flare-solverr: + endpoint: ${FLARESOLVERR_ENDPOINT} + minio: endpoint: ${MINIO_ENDPOINT} accessKey: ${MINIO_USER} @@ -59,6 +62,12 @@ manga-matcher: resilience4j: retry: instances: + FlareSolverrRetry: + max-attempts: 2 + wait-duration: + seconds: 5 + retry-exceptions: + - feign.FeignException MangaDexRetry: max-attempts: 5 wait-duration: -- 2.49.1