feat(providers): add taimu provider

This commit is contained in:
Rodrigo Verdiani 2025-12-18 11:30:07 -03:00
parent d2d2b52898
commit 138eea1f0a
2 changed files with 197 additions and 0 deletions

View File

@ -6,4 +6,5 @@ public class ContentProviders {
public static final String MANGA_DEX = "MangaDex";
public static final String PINK_ROSA_SCAN = "Pink Rosa Scan";
public static final String BATO = "Bato";
public static final String TAIMU = "Taimu";
}

View File

@ -0,0 +1,196 @@
package com.magamochi.mangamochi.service.providers.impl;
import com.magamochi.mangamochi.model.dto.ContentProviderMangaChapterResponseDTO;
import com.magamochi.mangamochi.model.dto.ContentProviderMangaInfoResponseDTO;
import com.magamochi.mangamochi.model.entity.MangaProvider;
import com.magamochi.mangamochi.model.enumeration.MangaStatus;
import com.magamochi.mangamochi.service.FlareService;
import com.magamochi.mangamochi.service.providers.ContentProvider;
import com.magamochi.mangamochi.service.providers.ContentProviders;
import com.magamochi.mangamochi.service.providers.PagedContentProvider;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
import org.jsoup.nodes.Document;
import org.springframework.stereotype.Service;
import java.util.*;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import static java.util.Objects.nonNull;
@Log4j2
@Service(ContentProviders.TAIMU)
@RequiredArgsConstructor
public class TaimuProvider implements ContentProvider, PagedContentProvider {
private final String url = "https://taimumangas.rzword.xyz";
private final int CHAPTERS_PER_PAGE = 20;
private static final Pattern CHAPTER_NUMBER_PATTERN = Pattern.compile("(\\d+)");
private final FlareService flareService;
@Override
public List<ContentProviderMangaChapterResponseDTO> getAvailableChapters(MangaProvider provider) {
log.info(
"Getting available chapters from {}, manga {}",
ContentProviders.TAIMU,
provider.getManga().getTitle());
try {
var document = flareService.getContentAsJsoupDocument(provider.getUrl(), ContentProviders.TAIMU);
// 2. Select the h1 element that contains the text "Capítulos"
// The selector ':contains(Capítulos)' finds the h1 element that contains that text.
var chapterHeader = document.select("h1.font-bold:contains(Capítulos)");
if (chapterHeader.isEmpty()) {
throw new NoSuchElementException("Could not find the chapters header.");
}
// 3. Get the text content of the element
var fullText = chapterHeader.first().text();
// 4. Use a regular expression to extract the leading number
var pattern = Pattern.compile("^\\s*(\\d+).*");
var matcher = pattern.matcher(fullText);
if (!matcher.find()) {
throw new NoSuchElementException("Could not find the chapters number in the header.");
}
var chapterNumber = Integer.parseInt(matcher.group(1));
var totalChapterPages = (int) Math.ceil((double) chapterNumber / CHAPTERS_PER_PAGE);
var chapterList = new ArrayList<>(getChaptersFromPage(document));
for (int i = 2; i <= totalChapterPages; i++) {
document = flareService.getContentAsJsoupDocument(provider.getUrl() + "?page=" + i + "order=desc", ContentProviders.TAIMU);
chapterList.addAll(getChaptersFromPage(document));
}
chapterList.sort(Comparator.comparingInt((ContentProviderMangaChapterResponseDTO chapter) -> {
try {
return Integer.parseInt(chapter.chapter());
} catch (Exception e) {
return 0;
}
}).reversed());
return chapterList;
} catch (NoSuchElementException e) {
log.error("Error parsing mangas from {}", ContentProviders.TAIMU, e);
return List.of();
}
}
private List<ContentProviderMangaChapterResponseDTO> getChaptersFromPage(Document document) {
// 1. Select the main container for the chapter list.
// The chapters are anchor (<a>) tags inside a div with class 'grid grid-cols-1'.
var chapterElements = document.select("div.grid.grid-cols-1 a");
// 2. Iterate through each chapter element (the <a> tag)
return chapterElements.stream()
.filter(chapterLink -> chapterLink.attr("href").contains("/reader/")).map(chapterLink -> {
// a. Extract the URL from the 'href' attribute
var chapterUrl = url + chapterLink.attr("href").trim();
// b. Extract the title from the nested <p> tag
// We select the <p> element with specific classes inside the current <a> tag.
var titleElement = chapterLink.selectFirst("p.font-semibold");
var title = nonNull(titleElement) ? titleElement.text() : "Title Not Found";
var matcher = CHAPTER_NUMBER_PATTERN.matcher(title);
var chapterNumber = 0;
if (matcher.find()) {
chapterNumber = Integer.parseInt(matcher.group(1));
}
return new ContentProviderMangaChapterResponseDTO(title, chapterUrl, String.valueOf(chapterNumber), "pt-br");
}).collect(
Collectors.collectingAndThen(
// Collect all DTOs into a Set to automatically eliminate duplicates
Collectors.toCollection(() -> new java.util.TreeSet<>(java.util.Comparator.comparing(ContentProviderMangaChapterResponseDTO::chapterUrl))),
// Convert the resulting Set back into a List
java.util.ArrayList::new
)
);
}
@Override
public Map<Integer, String> getChapterImagesUrls(String chapterUrl) {
log.info("Getting images from {}, url {}", ContentProviders.TAIMU, chapterUrl);
try {
var document =
flareService.getContentAsJsoupDocument(chapterUrl, ContentProviders.TAIMU);
var chapterImagesElements = document.select("div.reading-content img.wp-manga-chapter-img");
var imageUrls =
chapterImagesElements.stream()
.map(
chapterImagesElement -> {
return chapterImagesElement.attr("src");
})
.toList();
return IntStream.range(0, imageUrls.size())
.boxed()
.collect(
Collectors.toMap(
i -> i, imageUrls::get, (existing, replacement) -> existing, LinkedHashMap::new));
} catch (NoSuchElementException e) {
log.error("Error parsing manga images from MangaLivre", e);
return Map.of();
}
}
@Override
public List<ContentProviderMangaInfoResponseDTO> getMangasFromPage(Integer page) {
log.info("Getting mangas from {}, page {}", ContentProviders.TAIMU, page);
try {
var document =
flareService.getContentAsJsoupDocument(
url + "/biblioteca?sort_by=created_at&sort_order=desc&page=" + page, ContentProviders.TAIMU);
var mangaElements = document.select("a.group");
return mangaElements.stream()
.map(
element -> {
var mangaUrl = url + element.attr("href").trim();
var title = element.selectFirst("h3.font-semibold").text();
return new ContentProviderMangaInfoResponseDTO(
title.trim(), mangaUrl, null, MangaStatus.UNKNOWN);
})
.toList();
} catch (NoSuchElementException e) {
log.error("Error parsing mangas from MangaLivre", e);
return List.of();
}
}
@Override
public Integer getTotalPages() {
log.info("Getting total pages for {}", ContentProviders.TAIMU);
try {
var document = flareService.getContentAsJsoupDocument(url + "/biblioteca", ContentProviders.TAIMU);
return document.select("button:not([disabled])")
.stream()
.map(el -> el.text().trim())
.filter(t -> t.matches("\\d+"))
.mapToInt(Integer::parseInt)
.max()
.orElse(1);
} catch (NoSuchElementException e) {
log.error("Error parsing total pages {}", ContentProviders.TAIMU, e);
return null;
}
}
}