feat: add Taimu content provider

This commit is contained in:
Rodrigo Verdiani 2026-03-30 23:11:30 -03:00
parent e6c96c5529
commit 32c63a254f
6 changed files with 230 additions and 3 deletions

View File

@ -4,6 +4,7 @@ public class ContentProviders {
public static final String MANGA_LIVRE_BLOG = "Manga Livre Blog";
public static final String MANGA_LIVRE_TO = "Manga Livre.to";
public static final String PINK_ROSA_SCAN = "Pink Rosa Scan";
public static final String TAIMU = "Taimu";
public static final String MANGA_DEX = "MangaDex";
public static final String MANUAL_IMPORT = "Manual Import";
}

View File

@ -0,0 +1,24 @@
package com.magamochi.ingestion.client;
import lombok.Builder;
import lombok.Getter;
import org.springframework.cloud.openfeign.FeignClient;
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
@FeignClient(name = "scrollable-scrapper", url = "${scrollable-scrapper.endpoint}")
public interface ScrollableScrapperClient {
@PostMapping(
consumes = MediaType.APPLICATION_JSON_VALUE,
produces = MediaType.APPLICATION_JSON_VALUE)
GetResponse get(@RequestBody GetRequest request);
@Getter
@Builder
class GetRequest {
private final String url;
}
record GetResponse(String pageSource) {}
}

View File

@ -0,0 +1,175 @@
package com.magamochi.ingestion.providers.impl;
import static java.util.Objects.isNull;
import com.magamochi.catalog.model.entity.MangaContentProvider;
import com.magamochi.common.ContentProviders;
import com.magamochi.common.exception.UnprocessableException;
import com.magamochi.ingestion.model.dto.ContentImageInfoDTO;
import com.magamochi.ingestion.model.dto.ContentInfoDTO;
import com.magamochi.ingestion.model.dto.MangaInfoDTO;
import com.magamochi.ingestion.providers.ContentProvider;
import com.magamochi.ingestion.providers.PagedContentProvider;
import com.magamochi.ingestion.service.FlareService;
import com.magamochi.ingestion.service.ScrollableScrapperService;
import java.util.ArrayList;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.stream.IntStream;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
import org.jsoup.nodes.Document;
import org.springframework.stereotype.Service;
@Log4j2
@Service(ContentProviders.TAIMU)
@RequiredArgsConstructor
public class TaimuProvider implements ContentProvider, PagedContentProvider {
private final String baseUrl = "https://taimumangas.rzword.xyz";
private final FlareService flareService;
private final ScrollableScrapperService scrollableScrapperService;
@Override
public List<ContentInfoDTO> getAvailableChapters(MangaContentProvider provider) {
log.info(
"Getting available chapters from {}, manga {}",
ContentProviders.TAIMU,
provider.getManga().getTitle());
try {
var document =
flareService.getContentAsJsoupDocument(
provider.getUrl() + "?page=1&order=desc", ContentProviders.TAIMU);
var totalPages = extractContentPagesFromDocument(document);
var contentInfoList = new ArrayList<>(extractContentInfoFromDocument(document));
for (int page = 2; page <= totalPages; page++) {
var pageDocument =
flareService.getContentAsJsoupDocument(
provider.getUrl() + "?page=" + page + "&order=desc", ContentProviders.TAIMU);
contentInfoList.addAll(extractContentInfoFromDocument(pageDocument));
}
return contentInfoList;
} catch (NoSuchElementException e) {
log.error("Error parsing mangas from MangaLivre", e);
return List.of();
}
}
private int extractContentPagesFromDocument(Document document) {
try {
var buttonsContainer =
document.selectFirst(
"div.flex.items-center.justify-between.gap-3.mt-4.pt-4.border-t div.flex.items-center.gap-1");
var highNumberButton = buttonsContainer.selectFirst("button:nth-last-child(2)");
return Integer.parseInt(highNumberButton.text());
} catch (Exception e) {
// In case of any error during parsing, we assume there is only one page of content
return 1;
}
}
private List<ContentInfoDTO> extractContentInfoFromDocument(Document document) {
try {
var grid = document.selectFirst("div.grid.grid-cols-1.gap-1");
var chapters = grid.select("a");
return chapters.stream()
.map(
chapter -> {
var chapterUrl = baseUrl + chapter.attr("href");
var title =
chapter.selectFirst("div.flex-1.min-w-0 div.flex.items-center.gap-2 p").text();
return new ContentInfoDTO(title, chapterUrl.trim(), "pt-BR");
})
.toList();
} catch (Exception e) {
log.error("Error parsing content info from " + ContentProviders.TAIMU, e);
return List.of();
}
}
@Override
public List<ContentImageInfoDTO> getContentImages(String chapterUrl) {
log.info("Getting images from {}, url {}", ContentProviders.TAIMU, chapterUrl);
try {
var document = scrollableScrapperService.getContentAsJsoupDocument(chapterUrl);
var chapterImages = document.select("img.w-full.h-auto.object-contain.cursor-pointer");
var imageUrls =
chapterImages.stream()
.map(chapterImagesElement -> chapterImagesElement.attr("src"))
.toList();
return IntStream.range(0, imageUrls.size())
.boxed()
.map(position -> new ContentImageInfoDTO(position, imageUrls.get(position)))
.toList();
} catch (NoSuchElementException e) {
log.error("Error parsing manga images from " + ContentProviders.TAIMU, e);
return List.of();
}
}
@Override
public List<MangaInfoDTO> getMangasFromPage(int page) {
log.info("Getting mangas from {}, page {}", ContentProviders.TAIMU, page);
try {
var document =
flareService.getContentAsJsoupDocument(
baseUrl + "/biblioteca?page=" + page, ContentProviders.TAIMU);
var mangas = document.select("a.group");
return mangas.stream()
.map(
element -> {
var mangaUrl = element.attr("href");
var title = element.selectFirst("div h3").text();
return new MangaInfoDTO(title.trim(), baseUrl + mangaUrl.trim());
})
.toList();
} catch (NoSuchElementException e) {
log.error("Error parsing mangas from " + ContentProviders.TAIMU, e);
return List.of();
}
}
@Override
public int getTotalPages() {
log.info("Getting total pages for {}", ContentProviders.TAIMU);
try {
var document =
flareService.getContentAsJsoupDocument(baseUrl + "/biblioteca", ContentProviders.TAIMU);
var container = document.selectFirst("div.flex.flex-col.items-center.space-y-3.mt-6");
var pagination = container.selectFirst("div.flex.items-center.gap-2");
var buttonsContainer = pagination.selectFirst("div.flex.items-center.gap-1");
var lastButton = buttonsContainer.select("button").last();
if (isNull(lastButton)) {
throw new UnprocessableException(
"Pagination buttons not found in " + ContentProviders.TAIMU);
}
var buttonText = lastButton.text();
return Integer.parseInt(buttonText);
} catch (Exception e) {
log.error(
"Error parsing total pages from "
+ ContentProviders.TAIMU
+ ": pagination container not found");
return 0;
}
}
}

View File

@ -0,0 +1,22 @@
package com.magamochi.ingestion.service;
import com.magamochi.ingestion.client.ScrollableScrapperClient;
import lombok.RequiredArgsConstructor;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.springframework.stereotype.Service;
@Service
@RequiredArgsConstructor
public class ScrollableScrapperService {
private final ScrollableScrapperClient client;
public Document getContentAsJsoupDocument(String url) {
return Jsoup.parse(getContent(url));
}
private String getContent(String url) {
return client.get(ScrollableScrapperClient.GetRequest.builder().url(url).build()).pageSource();
}
}

View File

@ -22,9 +22,9 @@ spring:
openfeign:
client:
config:
web-scrapper:
connect-timeout: 240000
read-timeout: 240000
scrollable-scrapper:
connect-timeout: 480000
read-timeout: 480000
rabbitmq:
host: ${RABBITMQ_HOST}
port: ${RABBITMQ_PORT}
@ -41,6 +41,9 @@ springdoc:
flare-solverr:
endpoint: ${FLARESOLVERR_ENDPOINT}
scrollable-scrapper:
endpoint: ${SCROLLABLE_SCRAPPER_ENDPOINT}
minio:
endpoint: ${MINIO_ENDPOINT}
accessKey: ${MINIO_USER}

View File

@ -0,0 +1,2 @@
INSERT INTO content_providers(name, url, active, supports_content_fetch, manual_import)
VALUES ('Taimu', 'https://taimumangas.rzword.xyz', TRUE, TRUE, FALSE);