feat(providers): add taimu provider
This commit is contained in:
parent
d2d2b52898
commit
138eea1f0a
@ -6,4 +6,5 @@ public class ContentProviders {
|
||||
public static final String MANGA_DEX = "MangaDex";
|
||||
public static final String PINK_ROSA_SCAN = "Pink Rosa Scan";
|
||||
public static final String BATO = "Bato";
|
||||
public static final String TAIMU = "Taimu";
|
||||
}
|
||||
|
||||
@ -0,0 +1,196 @@
|
||||
package com.magamochi.mangamochi.service.providers.impl;
|
||||
|
||||
import com.magamochi.mangamochi.model.dto.ContentProviderMangaChapterResponseDTO;
|
||||
import com.magamochi.mangamochi.model.dto.ContentProviderMangaInfoResponseDTO;
|
||||
import com.magamochi.mangamochi.model.entity.MangaProvider;
|
||||
import com.magamochi.mangamochi.model.enumeration.MangaStatus;
|
||||
import com.magamochi.mangamochi.service.FlareService;
|
||||
import com.magamochi.mangamochi.service.providers.ContentProvider;
|
||||
import com.magamochi.mangamochi.service.providers.ContentProviders;
|
||||
import com.magamochi.mangamochi.service.providers.PagedContentProvider;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.log4j.Log4j2;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import static java.util.Objects.nonNull;
|
||||
|
||||
@Log4j2
|
||||
@Service(ContentProviders.TAIMU)
|
||||
@RequiredArgsConstructor
|
||||
public class TaimuProvider implements ContentProvider, PagedContentProvider {
|
||||
private final String url = "https://taimumangas.rzword.xyz";
|
||||
private final int CHAPTERS_PER_PAGE = 20;
|
||||
private static final Pattern CHAPTER_NUMBER_PATTERN = Pattern.compile("(\\d+)");
|
||||
|
||||
private final FlareService flareService;
|
||||
|
||||
@Override
|
||||
public List<ContentProviderMangaChapterResponseDTO> getAvailableChapters(MangaProvider provider) {
|
||||
log.info(
|
||||
"Getting available chapters from {}, manga {}",
|
||||
ContentProviders.TAIMU,
|
||||
provider.getManga().getTitle());
|
||||
|
||||
try {
|
||||
var document = flareService.getContentAsJsoupDocument(provider.getUrl(), ContentProviders.TAIMU);
|
||||
|
||||
// 2. Select the h1 element that contains the text "Capítulos"
|
||||
// The selector ':contains(Capítulos)' finds the h1 element that contains that text.
|
||||
var chapterHeader = document.select("h1.font-bold:contains(Capítulos)");
|
||||
|
||||
if (chapterHeader.isEmpty()) {
|
||||
throw new NoSuchElementException("Could not find the chapters header.");
|
||||
}
|
||||
|
||||
// 3. Get the text content of the element
|
||||
var fullText = chapterHeader.first().text();
|
||||
|
||||
// 4. Use a regular expression to extract the leading number
|
||||
var pattern = Pattern.compile("^\\s*(\\d+).*");
|
||||
var matcher = pattern.matcher(fullText);
|
||||
|
||||
if (!matcher.find()) {
|
||||
throw new NoSuchElementException("Could not find the chapters number in the header.");
|
||||
}
|
||||
|
||||
var chapterNumber = Integer.parseInt(matcher.group(1));
|
||||
var totalChapterPages = (int) Math.ceil((double) chapterNumber / CHAPTERS_PER_PAGE);
|
||||
|
||||
var chapterList = new ArrayList<>(getChaptersFromPage(document));
|
||||
|
||||
for (int i = 2; i <= totalChapterPages; i++) {
|
||||
document = flareService.getContentAsJsoupDocument(provider.getUrl() + "?page=" + i + "order=desc", ContentProviders.TAIMU);
|
||||
chapterList.addAll(getChaptersFromPage(document));
|
||||
}
|
||||
|
||||
chapterList.sort(Comparator.comparingInt((ContentProviderMangaChapterResponseDTO chapter) -> {
|
||||
try {
|
||||
return Integer.parseInt(chapter.chapter());
|
||||
} catch (Exception e) {
|
||||
return 0;
|
||||
}
|
||||
}).reversed());
|
||||
|
||||
return chapterList;
|
||||
} catch (NoSuchElementException e) {
|
||||
log.error("Error parsing mangas from {}", ContentProviders.TAIMU, e);
|
||||
return List.of();
|
||||
}
|
||||
}
|
||||
|
||||
private List<ContentProviderMangaChapterResponseDTO> getChaptersFromPage(Document document) {
|
||||
// 1. Select the main container for the chapter list.
|
||||
// The chapters are anchor (<a>) tags inside a div with class 'grid grid-cols-1'.
|
||||
var chapterElements = document.select("div.grid.grid-cols-1 a");
|
||||
|
||||
// 2. Iterate through each chapter element (the <a> tag)
|
||||
return chapterElements.stream()
|
||||
.filter(chapterLink -> chapterLink.attr("href").contains("/reader/")).map(chapterLink -> {
|
||||
// a. Extract the URL from the 'href' attribute
|
||||
var chapterUrl = url + chapterLink.attr("href").trim();
|
||||
|
||||
// b. Extract the title from the nested <p> tag
|
||||
// We select the <p> element with specific classes inside the current <a> tag.
|
||||
var titleElement = chapterLink.selectFirst("p.font-semibold");
|
||||
var title = nonNull(titleElement) ? titleElement.text() : "Title Not Found";
|
||||
|
||||
var matcher = CHAPTER_NUMBER_PATTERN.matcher(title);
|
||||
var chapterNumber = 0;
|
||||
if (matcher.find()) {
|
||||
chapterNumber = Integer.parseInt(matcher.group(1));
|
||||
}
|
||||
|
||||
return new ContentProviderMangaChapterResponseDTO(title, chapterUrl, String.valueOf(chapterNumber), "pt-br");
|
||||
}).collect(
|
||||
Collectors.collectingAndThen(
|
||||
// Collect all DTOs into a Set to automatically eliminate duplicates
|
||||
Collectors.toCollection(() -> new java.util.TreeSet<>(java.util.Comparator.comparing(ContentProviderMangaChapterResponseDTO::chapterUrl))),
|
||||
// Convert the resulting Set back into a List
|
||||
java.util.ArrayList::new
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<Integer, String> getChapterImagesUrls(String chapterUrl) {
|
||||
log.info("Getting images from {}, url {}", ContentProviders.TAIMU, chapterUrl);
|
||||
|
||||
try {
|
||||
var document =
|
||||
flareService.getContentAsJsoupDocument(chapterUrl, ContentProviders.TAIMU);
|
||||
|
||||
var chapterImagesElements = document.select("div.reading-content img.wp-manga-chapter-img");
|
||||
|
||||
var imageUrls =
|
||||
chapterImagesElements.stream()
|
||||
.map(
|
||||
chapterImagesElement -> {
|
||||
return chapterImagesElement.attr("src");
|
||||
})
|
||||
.toList();
|
||||
|
||||
return IntStream.range(0, imageUrls.size())
|
||||
.boxed()
|
||||
.collect(
|
||||
Collectors.toMap(
|
||||
i -> i, imageUrls::get, (existing, replacement) -> existing, LinkedHashMap::new));
|
||||
} catch (NoSuchElementException e) {
|
||||
log.error("Error parsing manga images from MangaLivre", e);
|
||||
return Map.of();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ContentProviderMangaInfoResponseDTO> getMangasFromPage(Integer page) {
|
||||
log.info("Getting mangas from {}, page {}", ContentProviders.TAIMU, page);
|
||||
|
||||
try {
|
||||
var document =
|
||||
flareService.getContentAsJsoupDocument(
|
||||
url + "/biblioteca?sort_by=created_at&sort_order=desc&page=" + page, ContentProviders.TAIMU);
|
||||
|
||||
var mangaElements = document.select("a.group");
|
||||
|
||||
return mangaElements.stream()
|
||||
.map(
|
||||
element -> {
|
||||
var mangaUrl = url + element.attr("href").trim();
|
||||
|
||||
var title = element.selectFirst("h3.font-semibold").text();
|
||||
|
||||
return new ContentProviderMangaInfoResponseDTO(
|
||||
title.trim(), mangaUrl, null, MangaStatus.UNKNOWN);
|
||||
})
|
||||
.toList();
|
||||
} catch (NoSuchElementException e) {
|
||||
log.error("Error parsing mangas from MangaLivre", e);
|
||||
return List.of();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer getTotalPages() {
|
||||
log.info("Getting total pages for {}", ContentProviders.TAIMU);
|
||||
|
||||
try {
|
||||
var document = flareService.getContentAsJsoupDocument(url + "/biblioteca", ContentProviders.TAIMU);
|
||||
|
||||
return document.select("button:not([disabled])")
|
||||
.stream()
|
||||
.map(el -> el.text().trim())
|
||||
.filter(t -> t.matches("\\d+"))
|
||||
.mapToInt(Integer::parseInt)
|
||||
.max()
|
||||
.orElse(1);
|
||||
} catch (NoSuchElementException e) {
|
||||
log.error("Error parsing total pages {}", ContentProviders.TAIMU, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user