Merge pull request 'feat/taimu' (#44) from feat/taimu into main
Reviewed-on: #44
This commit is contained in:
commit
53cbde24d9
@ -48,54 +48,64 @@ public class MangaResolutionService {
|
||||
}
|
||||
|
||||
private Optional<ProviderResult> searchMangaOnAniList(String title) {
|
||||
var searchResults = aniListService.searchMangaByTitle(title);
|
||||
if (searchResults.isEmpty()) {
|
||||
try {
|
||||
var searchResults = aniListService.searchMangaByTitle(title);
|
||||
if (searchResults.isEmpty()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
var matchResponse =
|
||||
titleMatcherService.findBestMatch(
|
||||
TitleMatcherService.TitleMatchRequest.builder()
|
||||
.title(title)
|
||||
.options(searchResults.keySet())
|
||||
.build());
|
||||
|
||||
if (!matchResponse.matchFound()) {
|
||||
log.warn("No title match found for manga with title {} on AniList", title);
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
var matchedManga = searchResults.get(matchResponse.bestMatch());
|
||||
|
||||
var bestTitle =
|
||||
nonNull(matchedManga.title().romaji())
|
||||
? matchedManga.title().romaji()
|
||||
: matchedManga.title().english();
|
||||
|
||||
return Optional.of(new ProviderResult(bestTitle, matchedManga.id()));
|
||||
} catch (Exception e) {
|
||||
log.error("Error searching manga with title {} on AniList", title, e);
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
var matchResponse =
|
||||
titleMatcherService.findBestMatch(
|
||||
TitleMatcherService.TitleMatchRequest.builder()
|
||||
.title(title)
|
||||
.options(searchResults.keySet())
|
||||
.build());
|
||||
|
||||
if (!matchResponse.matchFound()) {
|
||||
log.warn("No title match found for manga with title {} on AniList", title);
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
var matchedManga = searchResults.get(matchResponse.bestMatch());
|
||||
|
||||
var bestTitle =
|
||||
nonNull(matchedManga.title().romaji())
|
||||
? matchedManga.title().romaji()
|
||||
: matchedManga.title().english();
|
||||
|
||||
return Optional.of(new ProviderResult(bestTitle, matchedManga.id()));
|
||||
}
|
||||
|
||||
private Optional<ProviderResult> searchMangaOnMyAnimeList(String title) {
|
||||
var searchResults = myAnimeListService.searchMangaByTitle(title);
|
||||
if (searchResults.isEmpty()) {
|
||||
try {
|
||||
var searchResults = myAnimeListService.searchMangaByTitle(title);
|
||||
if (searchResults.isEmpty()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
var matchResponse =
|
||||
titleMatcherService.findBestMatch(
|
||||
TitleMatcherService.TitleMatchRequest.builder()
|
||||
.title(title)
|
||||
.options(searchResults.keySet())
|
||||
.build());
|
||||
if (!matchResponse.matchFound()) {
|
||||
log.warn("No title match found for manga with title {} on MyAnimeList", title);
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
var bestTitle = matchResponse.bestMatch();
|
||||
var malId = searchResults.get(bestTitle);
|
||||
|
||||
return Optional.of(new ProviderResult(bestTitle, malId));
|
||||
} catch (Exception e) {
|
||||
log.error("Error searching manga with title {} on MyAnimeList", title, e);
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
var matchResponse =
|
||||
titleMatcherService.findBestMatch(
|
||||
TitleMatcherService.TitleMatchRequest.builder()
|
||||
.title(title)
|
||||
.options(searchResults.keySet())
|
||||
.build());
|
||||
if (!matchResponse.matchFound()) {
|
||||
log.warn("No title match found for manga with title {} on MyAnimeList", title);
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
var bestTitle = matchResponse.bestMatch();
|
||||
var malId = searchResults.get(bestTitle);
|
||||
|
||||
return Optional.of(new ProviderResult(bestTitle, malId));
|
||||
}
|
||||
|
||||
public Manga findOrCreateManga(Long aniListId, Long malId) {
|
||||
|
||||
@ -4,6 +4,7 @@ public class ContentProviders {
|
||||
public static final String MANGA_LIVRE_BLOG = "Manga Livre Blog";
|
||||
public static final String MANGA_LIVRE_TO = "Manga Livre.to";
|
||||
public static final String PINK_ROSA_SCAN = "Pink Rosa Scan";
|
||||
public static final String TAIMU = "Taimu";
|
||||
public static final String MANGA_DEX = "MangaDex";
|
||||
public static final String MANUAL_IMPORT = "Manual Import";
|
||||
}
|
||||
|
||||
@ -0,0 +1,24 @@
|
||||
package com.magamochi.ingestion.client;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import org.springframework.cloud.openfeign.FeignClient;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
|
||||
@FeignClient(name = "scrollable-scrapper", url = "${scrollable-scrapper.endpoint}")
|
||||
public interface ScrollableScrapperClient {
|
||||
@PostMapping(
|
||||
consumes = MediaType.APPLICATION_JSON_VALUE,
|
||||
produces = MediaType.APPLICATION_JSON_VALUE)
|
||||
GetResponse get(@RequestBody GetRequest request);
|
||||
|
||||
@Getter
|
||||
@Builder
|
||||
class GetRequest {
|
||||
private final String url;
|
||||
}
|
||||
|
||||
record GetResponse(String pageSource) {}
|
||||
}
|
||||
@ -0,0 +1,175 @@
|
||||
package com.magamochi.ingestion.providers.impl;
|
||||
|
||||
import static java.util.Objects.isNull;
|
||||
|
||||
import com.magamochi.catalog.model.entity.MangaContentProvider;
|
||||
import com.magamochi.common.ContentProviders;
|
||||
import com.magamochi.common.exception.UnprocessableException;
|
||||
import com.magamochi.ingestion.model.dto.ContentImageInfoDTO;
|
||||
import com.magamochi.ingestion.model.dto.ContentInfoDTO;
|
||||
import com.magamochi.ingestion.model.dto.MangaInfoDTO;
|
||||
import com.magamochi.ingestion.providers.ContentProvider;
|
||||
import com.magamochi.ingestion.providers.PagedContentProvider;
|
||||
import com.magamochi.ingestion.service.FlareService;
|
||||
import com.magamochi.ingestion.service.ScrollableScrapperService;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.stream.IntStream;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.log4j.Log4j2;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Log4j2
|
||||
@Service(ContentProviders.TAIMU)
|
||||
@RequiredArgsConstructor
|
||||
public class TaimuProvider implements ContentProvider, PagedContentProvider {
|
||||
private final String baseUrl = "https://taimumangas.rzword.xyz";
|
||||
|
||||
private final FlareService flareService;
|
||||
private final ScrollableScrapperService scrollableScrapperService;
|
||||
|
||||
@Override
|
||||
public List<ContentInfoDTO> getAvailableChapters(MangaContentProvider provider) {
|
||||
log.info(
|
||||
"Getting available chapters from {}, manga {}",
|
||||
ContentProviders.TAIMU,
|
||||
provider.getManga().getTitle());
|
||||
|
||||
try {
|
||||
var document =
|
||||
flareService.getContentAsJsoupDocument(
|
||||
provider.getUrl() + "?page=1&order=desc", ContentProviders.TAIMU);
|
||||
|
||||
var totalPages = extractContentPagesFromDocument(document);
|
||||
var contentInfoList = new ArrayList<>(extractContentInfoFromDocument(document));
|
||||
|
||||
for (int page = 2; page <= totalPages; page++) {
|
||||
var pageDocument =
|
||||
flareService.getContentAsJsoupDocument(
|
||||
provider.getUrl() + "?page=" + page + "&order=desc", ContentProviders.TAIMU);
|
||||
contentInfoList.addAll(extractContentInfoFromDocument(pageDocument));
|
||||
}
|
||||
|
||||
return contentInfoList;
|
||||
} catch (NoSuchElementException e) {
|
||||
log.error("Error parsing mangas from MangaLivre", e);
|
||||
return List.of();
|
||||
}
|
||||
}
|
||||
|
||||
private int extractContentPagesFromDocument(Document document) {
|
||||
try {
|
||||
var buttonsContainer =
|
||||
document.selectFirst(
|
||||
"div.flex.items-center.justify-between.gap-3.mt-4.pt-4.border-t div.flex.items-center.gap-1");
|
||||
var highNumberButton = buttonsContainer.selectFirst("button:nth-last-child(2)");
|
||||
return Integer.parseInt(highNumberButton.text());
|
||||
} catch (Exception e) {
|
||||
// In case of any error during parsing, we assume there is only one page of content
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
private List<ContentInfoDTO> extractContentInfoFromDocument(Document document) {
|
||||
try {
|
||||
var grid = document.selectFirst("div.grid.grid-cols-1.gap-1");
|
||||
var chapters = grid.select("a");
|
||||
|
||||
return chapters.stream()
|
||||
.map(
|
||||
chapter -> {
|
||||
var chapterUrl = baseUrl + chapter.attr("href");
|
||||
var title =
|
||||
chapter.selectFirst("div.flex-1.min-w-0 div.flex.items-center.gap-2 p").text();
|
||||
|
||||
return new ContentInfoDTO(title, chapterUrl.trim(), "pt-BR");
|
||||
})
|
||||
.toList();
|
||||
} catch (Exception e) {
|
||||
log.error("Error parsing content info from " + ContentProviders.TAIMU, e);
|
||||
return List.of();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ContentImageInfoDTO> getContentImages(String chapterUrl) {
|
||||
log.info("Getting images from {}, url {}", ContentProviders.TAIMU, chapterUrl);
|
||||
|
||||
try {
|
||||
var document = scrollableScrapperService.getContentAsJsoupDocument(chapterUrl);
|
||||
|
||||
var chapterImages = document.select("img.w-full.h-auto.object-contain.cursor-pointer");
|
||||
|
||||
var imageUrls =
|
||||
chapterImages.stream()
|
||||
.map(chapterImagesElement -> chapterImagesElement.attr("src"))
|
||||
.toList();
|
||||
|
||||
return IntStream.range(0, imageUrls.size())
|
||||
.boxed()
|
||||
.map(position -> new ContentImageInfoDTO(position, imageUrls.get(position)))
|
||||
.toList();
|
||||
} catch (NoSuchElementException e) {
|
||||
log.error("Error parsing manga images from " + ContentProviders.TAIMU, e);
|
||||
return List.of();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<MangaInfoDTO> getMangasFromPage(int page) {
|
||||
log.info("Getting mangas from {}, page {}", ContentProviders.TAIMU, page);
|
||||
|
||||
try {
|
||||
var document =
|
||||
flareService.getContentAsJsoupDocument(
|
||||
baseUrl + "/biblioteca?page=" + page, ContentProviders.TAIMU);
|
||||
|
||||
var mangas = document.select("a.group");
|
||||
|
||||
return mangas.stream()
|
||||
.map(
|
||||
element -> {
|
||||
var mangaUrl = element.attr("href");
|
||||
var title = element.selectFirst("div h3").text();
|
||||
|
||||
return new MangaInfoDTO(title.trim(), baseUrl + mangaUrl.trim());
|
||||
})
|
||||
.toList();
|
||||
} catch (NoSuchElementException e) {
|
||||
log.error("Error parsing mangas from " + ContentProviders.TAIMU, e);
|
||||
return List.of();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getTotalPages() {
|
||||
log.info("Getting total pages for {}", ContentProviders.TAIMU);
|
||||
|
||||
try {
|
||||
var document =
|
||||
flareService.getContentAsJsoupDocument(baseUrl + "/biblioteca", ContentProviders.TAIMU);
|
||||
|
||||
var container = document.selectFirst("div.flex.flex-col.items-center.space-y-3.mt-6");
|
||||
var pagination = container.selectFirst("div.flex.items-center.gap-2");
|
||||
var buttonsContainer = pagination.selectFirst("div.flex.items-center.gap-1");
|
||||
|
||||
var lastButton = buttonsContainer.select("button").last();
|
||||
|
||||
if (isNull(lastButton)) {
|
||||
throw new UnprocessableException(
|
||||
"Pagination buttons not found in " + ContentProviders.TAIMU);
|
||||
}
|
||||
|
||||
var buttonText = lastButton.text();
|
||||
return Integer.parseInt(buttonText);
|
||||
} catch (Exception e) {
|
||||
log.error(
|
||||
"Error parsing total pages from "
|
||||
+ ContentProviders.TAIMU
|
||||
+ ": pagination container not found");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,22 @@
|
||||
package com.magamochi.ingestion.service;
|
||||
|
||||
import com.magamochi.ingestion.client.ScrollableScrapperClient;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class ScrollableScrapperService {
|
||||
private final ScrollableScrapperClient client;
|
||||
|
||||
public Document getContentAsJsoupDocument(String url) {
|
||||
return Jsoup.parse(getContent(url));
|
||||
}
|
||||
|
||||
private String getContent(String url) {
|
||||
|
||||
return client.get(ScrollableScrapperClient.GetRequest.builder().url(url).build()).pageSource();
|
||||
}
|
||||
}
|
||||
@ -22,9 +22,9 @@ spring:
|
||||
openfeign:
|
||||
client:
|
||||
config:
|
||||
web-scrapper:
|
||||
connect-timeout: 240000
|
||||
read-timeout: 240000
|
||||
scrollable-scrapper:
|
||||
connect-timeout: 480000
|
||||
read-timeout: 480000
|
||||
rabbitmq:
|
||||
host: ${RABBITMQ_HOST}
|
||||
port: ${RABBITMQ_PORT}
|
||||
@ -41,6 +41,9 @@ springdoc:
|
||||
flare-solverr:
|
||||
endpoint: ${FLARESOLVERR_ENDPOINT}
|
||||
|
||||
scrollable-scrapper:
|
||||
endpoint: ${SCROLLABLE_SCRAPPER_ENDPOINT}
|
||||
|
||||
minio:
|
||||
endpoint: ${MINIO_ENDPOINT}
|
||||
accessKey: ${MINIO_USER}
|
||||
|
||||
@ -0,0 +1,2 @@
|
||||
INSERT INTO content_providers(name, url, active, supports_content_fetch, manual_import)
|
||||
VALUES ('Taimu', 'https://taimumangas.rzword.xyz', TRUE, TRUE, FALSE);
|
||||
Loading…
x
Reference in New Issue
Block a user