Merge pull request 'feat/taimu' (#44) from feat/taimu into main
Reviewed-on: #44
This commit is contained in:
commit
53cbde24d9
@ -48,54 +48,64 @@ public class MangaResolutionService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private Optional<ProviderResult> searchMangaOnAniList(String title) {
|
private Optional<ProviderResult> searchMangaOnAniList(String title) {
|
||||||
var searchResults = aniListService.searchMangaByTitle(title);
|
try {
|
||||||
if (searchResults.isEmpty()) {
|
var searchResults = aniListService.searchMangaByTitle(title);
|
||||||
|
if (searchResults.isEmpty()) {
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
var matchResponse =
|
||||||
|
titleMatcherService.findBestMatch(
|
||||||
|
TitleMatcherService.TitleMatchRequest.builder()
|
||||||
|
.title(title)
|
||||||
|
.options(searchResults.keySet())
|
||||||
|
.build());
|
||||||
|
|
||||||
|
if (!matchResponse.matchFound()) {
|
||||||
|
log.warn("No title match found for manga with title {} on AniList", title);
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
var matchedManga = searchResults.get(matchResponse.bestMatch());
|
||||||
|
|
||||||
|
var bestTitle =
|
||||||
|
nonNull(matchedManga.title().romaji())
|
||||||
|
? matchedManga.title().romaji()
|
||||||
|
: matchedManga.title().english();
|
||||||
|
|
||||||
|
return Optional.of(new ProviderResult(bestTitle, matchedManga.id()));
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Error searching manga with title {} on AniList", title, e);
|
||||||
return Optional.empty();
|
return Optional.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
var matchResponse =
|
|
||||||
titleMatcherService.findBestMatch(
|
|
||||||
TitleMatcherService.TitleMatchRequest.builder()
|
|
||||||
.title(title)
|
|
||||||
.options(searchResults.keySet())
|
|
||||||
.build());
|
|
||||||
|
|
||||||
if (!matchResponse.matchFound()) {
|
|
||||||
log.warn("No title match found for manga with title {} on AniList", title);
|
|
||||||
return Optional.empty();
|
|
||||||
}
|
|
||||||
|
|
||||||
var matchedManga = searchResults.get(matchResponse.bestMatch());
|
|
||||||
|
|
||||||
var bestTitle =
|
|
||||||
nonNull(matchedManga.title().romaji())
|
|
||||||
? matchedManga.title().romaji()
|
|
||||||
: matchedManga.title().english();
|
|
||||||
|
|
||||||
return Optional.of(new ProviderResult(bestTitle, matchedManga.id()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private Optional<ProviderResult> searchMangaOnMyAnimeList(String title) {
|
private Optional<ProviderResult> searchMangaOnMyAnimeList(String title) {
|
||||||
var searchResults = myAnimeListService.searchMangaByTitle(title);
|
try {
|
||||||
if (searchResults.isEmpty()) {
|
var searchResults = myAnimeListService.searchMangaByTitle(title);
|
||||||
|
if (searchResults.isEmpty()) {
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
var matchResponse =
|
||||||
|
titleMatcherService.findBestMatch(
|
||||||
|
TitleMatcherService.TitleMatchRequest.builder()
|
||||||
|
.title(title)
|
||||||
|
.options(searchResults.keySet())
|
||||||
|
.build());
|
||||||
|
if (!matchResponse.matchFound()) {
|
||||||
|
log.warn("No title match found for manga with title {} on MyAnimeList", title);
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
var bestTitle = matchResponse.bestMatch();
|
||||||
|
var malId = searchResults.get(bestTitle);
|
||||||
|
|
||||||
|
return Optional.of(new ProviderResult(bestTitle, malId));
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Error searching manga with title {} on MyAnimeList", title, e);
|
||||||
return Optional.empty();
|
return Optional.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
var matchResponse =
|
|
||||||
titleMatcherService.findBestMatch(
|
|
||||||
TitleMatcherService.TitleMatchRequest.builder()
|
|
||||||
.title(title)
|
|
||||||
.options(searchResults.keySet())
|
|
||||||
.build());
|
|
||||||
if (!matchResponse.matchFound()) {
|
|
||||||
log.warn("No title match found for manga with title {} on MyAnimeList", title);
|
|
||||||
return Optional.empty();
|
|
||||||
}
|
|
||||||
|
|
||||||
var bestTitle = matchResponse.bestMatch();
|
|
||||||
var malId = searchResults.get(bestTitle);
|
|
||||||
|
|
||||||
return Optional.of(new ProviderResult(bestTitle, malId));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Manga findOrCreateManga(Long aniListId, Long malId) {
|
public Manga findOrCreateManga(Long aniListId, Long malId) {
|
||||||
|
|||||||
@ -4,6 +4,7 @@ public class ContentProviders {
|
|||||||
public static final String MANGA_LIVRE_BLOG = "Manga Livre Blog";
|
public static final String MANGA_LIVRE_BLOG = "Manga Livre Blog";
|
||||||
public static final String MANGA_LIVRE_TO = "Manga Livre.to";
|
public static final String MANGA_LIVRE_TO = "Manga Livre.to";
|
||||||
public static final String PINK_ROSA_SCAN = "Pink Rosa Scan";
|
public static final String PINK_ROSA_SCAN = "Pink Rosa Scan";
|
||||||
|
public static final String TAIMU = "Taimu";
|
||||||
public static final String MANGA_DEX = "MangaDex";
|
public static final String MANGA_DEX = "MangaDex";
|
||||||
public static final String MANUAL_IMPORT = "Manual Import";
|
public static final String MANUAL_IMPORT = "Manual Import";
|
||||||
}
|
}
|
||||||
|
|||||||
@ -0,0 +1,24 @@
|
|||||||
|
package com.magamochi.ingestion.client;
|
||||||
|
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
import org.springframework.cloud.openfeign.FeignClient;
|
||||||
|
import org.springframework.http.MediaType;
|
||||||
|
import org.springframework.web.bind.annotation.PostMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RequestBody;
|
||||||
|
|
||||||
|
@FeignClient(name = "scrollable-scrapper", url = "${scrollable-scrapper.endpoint}")
|
||||||
|
public interface ScrollableScrapperClient {
|
||||||
|
@PostMapping(
|
||||||
|
consumes = MediaType.APPLICATION_JSON_VALUE,
|
||||||
|
produces = MediaType.APPLICATION_JSON_VALUE)
|
||||||
|
GetResponse get(@RequestBody GetRequest request);
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Builder
|
||||||
|
class GetRequest {
|
||||||
|
private final String url;
|
||||||
|
}
|
||||||
|
|
||||||
|
record GetResponse(String pageSource) {}
|
||||||
|
}
|
||||||
@ -0,0 +1,175 @@
|
|||||||
|
package com.magamochi.ingestion.providers.impl;
|
||||||
|
|
||||||
|
import static java.util.Objects.isNull;
|
||||||
|
|
||||||
|
import com.magamochi.catalog.model.entity.MangaContentProvider;
|
||||||
|
import com.magamochi.common.ContentProviders;
|
||||||
|
import com.magamochi.common.exception.UnprocessableException;
|
||||||
|
import com.magamochi.ingestion.model.dto.ContentImageInfoDTO;
|
||||||
|
import com.magamochi.ingestion.model.dto.ContentInfoDTO;
|
||||||
|
import com.magamochi.ingestion.model.dto.MangaInfoDTO;
|
||||||
|
import com.magamochi.ingestion.providers.ContentProvider;
|
||||||
|
import com.magamochi.ingestion.providers.PagedContentProvider;
|
||||||
|
import com.magamochi.ingestion.service.FlareService;
|
||||||
|
import com.magamochi.ingestion.service.ScrollableScrapperService;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
|
import java.util.stream.IntStream;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.log4j.Log4j2;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
@Log4j2
|
||||||
|
@Service(ContentProviders.TAIMU)
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class TaimuProvider implements ContentProvider, PagedContentProvider {
|
||||||
|
private final String baseUrl = "https://taimumangas.rzword.xyz";
|
||||||
|
|
||||||
|
private final FlareService flareService;
|
||||||
|
private final ScrollableScrapperService scrollableScrapperService;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<ContentInfoDTO> getAvailableChapters(MangaContentProvider provider) {
|
||||||
|
log.info(
|
||||||
|
"Getting available chapters from {}, manga {}",
|
||||||
|
ContentProviders.TAIMU,
|
||||||
|
provider.getManga().getTitle());
|
||||||
|
|
||||||
|
try {
|
||||||
|
var document =
|
||||||
|
flareService.getContentAsJsoupDocument(
|
||||||
|
provider.getUrl() + "?page=1&order=desc", ContentProviders.TAIMU);
|
||||||
|
|
||||||
|
var totalPages = extractContentPagesFromDocument(document);
|
||||||
|
var contentInfoList = new ArrayList<>(extractContentInfoFromDocument(document));
|
||||||
|
|
||||||
|
for (int page = 2; page <= totalPages; page++) {
|
||||||
|
var pageDocument =
|
||||||
|
flareService.getContentAsJsoupDocument(
|
||||||
|
provider.getUrl() + "?page=" + page + "&order=desc", ContentProviders.TAIMU);
|
||||||
|
contentInfoList.addAll(extractContentInfoFromDocument(pageDocument));
|
||||||
|
}
|
||||||
|
|
||||||
|
return contentInfoList;
|
||||||
|
} catch (NoSuchElementException e) {
|
||||||
|
log.error("Error parsing mangas from MangaLivre", e);
|
||||||
|
return List.of();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private int extractContentPagesFromDocument(Document document) {
|
||||||
|
try {
|
||||||
|
var buttonsContainer =
|
||||||
|
document.selectFirst(
|
||||||
|
"div.flex.items-center.justify-between.gap-3.mt-4.pt-4.border-t div.flex.items-center.gap-1");
|
||||||
|
var highNumberButton = buttonsContainer.selectFirst("button:nth-last-child(2)");
|
||||||
|
return Integer.parseInt(highNumberButton.text());
|
||||||
|
} catch (Exception e) {
|
||||||
|
// In case of any error during parsing, we assume there is only one page of content
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<ContentInfoDTO> extractContentInfoFromDocument(Document document) {
|
||||||
|
try {
|
||||||
|
var grid = document.selectFirst("div.grid.grid-cols-1.gap-1");
|
||||||
|
var chapters = grid.select("a");
|
||||||
|
|
||||||
|
return chapters.stream()
|
||||||
|
.map(
|
||||||
|
chapter -> {
|
||||||
|
var chapterUrl = baseUrl + chapter.attr("href");
|
||||||
|
var title =
|
||||||
|
chapter.selectFirst("div.flex-1.min-w-0 div.flex.items-center.gap-2 p").text();
|
||||||
|
|
||||||
|
return new ContentInfoDTO(title, chapterUrl.trim(), "pt-BR");
|
||||||
|
})
|
||||||
|
.toList();
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Error parsing content info from " + ContentProviders.TAIMU, e);
|
||||||
|
return List.of();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<ContentImageInfoDTO> getContentImages(String chapterUrl) {
|
||||||
|
log.info("Getting images from {}, url {}", ContentProviders.TAIMU, chapterUrl);
|
||||||
|
|
||||||
|
try {
|
||||||
|
var document = scrollableScrapperService.getContentAsJsoupDocument(chapterUrl);
|
||||||
|
|
||||||
|
var chapterImages = document.select("img.w-full.h-auto.object-contain.cursor-pointer");
|
||||||
|
|
||||||
|
var imageUrls =
|
||||||
|
chapterImages.stream()
|
||||||
|
.map(chapterImagesElement -> chapterImagesElement.attr("src"))
|
||||||
|
.toList();
|
||||||
|
|
||||||
|
return IntStream.range(0, imageUrls.size())
|
||||||
|
.boxed()
|
||||||
|
.map(position -> new ContentImageInfoDTO(position, imageUrls.get(position)))
|
||||||
|
.toList();
|
||||||
|
} catch (NoSuchElementException e) {
|
||||||
|
log.error("Error parsing manga images from " + ContentProviders.TAIMU, e);
|
||||||
|
return List.of();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<MangaInfoDTO> getMangasFromPage(int page) {
|
||||||
|
log.info("Getting mangas from {}, page {}", ContentProviders.TAIMU, page);
|
||||||
|
|
||||||
|
try {
|
||||||
|
var document =
|
||||||
|
flareService.getContentAsJsoupDocument(
|
||||||
|
baseUrl + "/biblioteca?page=" + page, ContentProviders.TAIMU);
|
||||||
|
|
||||||
|
var mangas = document.select("a.group");
|
||||||
|
|
||||||
|
return mangas.stream()
|
||||||
|
.map(
|
||||||
|
element -> {
|
||||||
|
var mangaUrl = element.attr("href");
|
||||||
|
var title = element.selectFirst("div h3").text();
|
||||||
|
|
||||||
|
return new MangaInfoDTO(title.trim(), baseUrl + mangaUrl.trim());
|
||||||
|
})
|
||||||
|
.toList();
|
||||||
|
} catch (NoSuchElementException e) {
|
||||||
|
log.error("Error parsing mangas from " + ContentProviders.TAIMU, e);
|
||||||
|
return List.of();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getTotalPages() {
|
||||||
|
log.info("Getting total pages for {}", ContentProviders.TAIMU);
|
||||||
|
|
||||||
|
try {
|
||||||
|
var document =
|
||||||
|
flareService.getContentAsJsoupDocument(baseUrl + "/biblioteca", ContentProviders.TAIMU);
|
||||||
|
|
||||||
|
var container = document.selectFirst("div.flex.flex-col.items-center.space-y-3.mt-6");
|
||||||
|
var pagination = container.selectFirst("div.flex.items-center.gap-2");
|
||||||
|
var buttonsContainer = pagination.selectFirst("div.flex.items-center.gap-1");
|
||||||
|
|
||||||
|
var lastButton = buttonsContainer.select("button").last();
|
||||||
|
|
||||||
|
if (isNull(lastButton)) {
|
||||||
|
throw new UnprocessableException(
|
||||||
|
"Pagination buttons not found in " + ContentProviders.TAIMU);
|
||||||
|
}
|
||||||
|
|
||||||
|
var buttonText = lastButton.text();
|
||||||
|
return Integer.parseInt(buttonText);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error(
|
||||||
|
"Error parsing total pages from "
|
||||||
|
+ ContentProviders.TAIMU
|
||||||
|
+ ": pagination container not found");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,22 @@
|
|||||||
|
package com.magamochi.ingestion.service;
|
||||||
|
|
||||||
|
import com.magamochi.ingestion.client.ScrollableScrapperClient;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class ScrollableScrapperService {
|
||||||
|
private final ScrollableScrapperClient client;
|
||||||
|
|
||||||
|
public Document getContentAsJsoupDocument(String url) {
|
||||||
|
return Jsoup.parse(getContent(url));
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getContent(String url) {
|
||||||
|
|
||||||
|
return client.get(ScrollableScrapperClient.GetRequest.builder().url(url).build()).pageSource();
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -22,9 +22,9 @@ spring:
|
|||||||
openfeign:
|
openfeign:
|
||||||
client:
|
client:
|
||||||
config:
|
config:
|
||||||
web-scrapper:
|
scrollable-scrapper:
|
||||||
connect-timeout: 240000
|
connect-timeout: 480000
|
||||||
read-timeout: 240000
|
read-timeout: 480000
|
||||||
rabbitmq:
|
rabbitmq:
|
||||||
host: ${RABBITMQ_HOST}
|
host: ${RABBITMQ_HOST}
|
||||||
port: ${RABBITMQ_PORT}
|
port: ${RABBITMQ_PORT}
|
||||||
@ -41,6 +41,9 @@ springdoc:
|
|||||||
flare-solverr:
|
flare-solverr:
|
||||||
endpoint: ${FLARESOLVERR_ENDPOINT}
|
endpoint: ${FLARESOLVERR_ENDPOINT}
|
||||||
|
|
||||||
|
scrollable-scrapper:
|
||||||
|
endpoint: ${SCROLLABLE_SCRAPPER_ENDPOINT}
|
||||||
|
|
||||||
minio:
|
minio:
|
||||||
endpoint: ${MINIO_ENDPOINT}
|
endpoint: ${MINIO_ENDPOINT}
|
||||||
accessKey: ${MINIO_USER}
|
accessKey: ${MINIO_USER}
|
||||||
|
|||||||
@ -0,0 +1,2 @@
|
|||||||
|
INSERT INTO content_providers(name, url, active, supports_content_fetch, manual_import)
|
||||||
|
VALUES ('Taimu', 'https://taimumangas.rzword.xyz', TRUE, TRUE, FALSE);
|
||||||
Loading…
x
Reference in New Issue
Block a user