backend/src/main/java/com/magamochi/ingestion/providers/impl/MangaLivreBlogProvider.java

173 lines
6.3 KiB
Java

package com.magamochi.ingestion.providers.impl;
import com.magamochi.ingestion.model.dto.MangaInfoDTO;
import com.magamochi.ingestion.providers.ContentProvider;
import com.magamochi.ingestion.providers.ContentProviders;
import com.magamochi.ingestion.providers.PagedContentProvider;
import com.magamochi.model.dto.ContentProviderMangaChapterResponseDTO;
import com.magamochi.model.entity.MangaContentProvider;
import com.magamochi.model.enumeration.MangaStatus;
import java.io.IOException;
import java.util.*;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.springframework.stereotype.Service;
@Log4j2
@Service(ContentProviders.MANGA_LIVRE_BLOG)
@RequiredArgsConstructor
public class MangaLivreBlogProvider implements ContentProvider, PagedContentProvider {
private static final Pattern NUMERIC_PATTERN = Pattern.compile("-?\\d+");
private final String url = "https://mangalivre.blog/manga/";
@Override
public List<ContentProviderMangaChapterResponseDTO> getAvailableChapters(
MangaContentProvider mangaContentProvider) {
log.info(
"Getting available chapters from {}, manga {}",
ContentProviders.MANGA_LIVRE_BLOG,
mangaContentProvider.getManga().getTitle());
try {
var document = Jsoup.connect(mangaContentProvider.getUrl()).get();
var chapterList = document.getElementsByClass("chapters-list").getFirst();
var chapterItems = chapterList.getElementsByClass("chapter-item");
return chapterItems.stream()
.map(
chapterItemElement -> {
var chapterDetailsContainer =
chapterItemElement.getElementsByClass("chapter-details").getFirst();
var linkElement = chapterDetailsContainer.getElementsByTag("a").getFirst();
var chapterNumberElement =
linkElement.getElementsByClass("chapter-number").getFirst();
return new ContentProviderMangaChapterResponseDTO(
chapterNumberElement.text(), linkElement.attr("href"), null, "pt-BR");
})
.toList();
} catch (IOException | NoSuchElementException e) {
log.error("Error fetching mangas from MangaLivre", e);
return List.of();
}
}
@Override
public Map<Integer, String> getChapterImagesUrls(String chapterUrl) {
log.info("Getting images from {}, url {}", ContentProviders.MANGA_LIVRE_BLOG, chapterUrl);
try {
var document = Jsoup.connect(chapterUrl).get();
var chapterImageContainers = document.getElementsByClass("chapter-image-container");
var imageUrls =
chapterImageContainers.stream()
.map(
chapterImageContainerElement -> {
var imageElement =
chapterImageContainerElement.getElementsByTag("img").getFirst();
var dataLazySrc = imageElement.attr("data-lazy-src");
if (StringUtils.isNoneBlank(dataLazySrc)) {
return dataLazySrc;
}
var dataSrc = imageElement.attr("src");
if (StringUtils.isNoneBlank(dataSrc)) {
return dataSrc;
}
throw new NoSuchElementException("Image URL not found");
})
.toList();
return IntStream.range(0, imageUrls.size())
.boxed()
.collect(
Collectors.toMap(
i -> i, imageUrls::get, (existing, replacement) -> existing, LinkedHashMap::new));
} catch (IOException | NoSuchElementException e) {
log.error("Error fetching mangas from MangaLivre", e);
return Map.of();
}
}
@Override
public List<MangaInfoDTO> getMangasFromPage(int page) {
log.info("Getting mangas from {}, page {}", ContentProviders.MANGA_LIVRE_BLOG, page);
try {
var document = Jsoup.connect(url + "page/" + page).get();
var mangaGrid = document.getElementsByClass("manga-grid").getFirst();
var mangaElements = mangaGrid.getElementsByTag("article");
return mangaElements.stream()
.map(
element -> {
try {
var linkElement = element.getElementsByTag("a").getFirst();
var imageContainer =
linkElement.getElementsByClass("manga-card-image").getFirst();
var contentContainer =
linkElement.getElementsByClass("manga-card-content").getFirst();
var title = contentContainer.getElementsByTag("h3").text();
var url = linkElement.attr("href");
var status =
switch (imageContainer
.getElementsByClass("manga-status")
.text()
.toLowerCase()) {
case "em andamento" -> MangaStatus.ONGOING;
case "completo" -> MangaStatus.COMPLETED;
case "hiato" -> MangaStatus.HIATUS;
default -> MangaStatus.UNKNOWN;
};
return new MangaInfoDTO(title, url, status);
} catch (Exception e) {
return null;
}
})
.filter(Objects::nonNull)
.toList();
} catch (IOException | NoSuchElementException e) {
log.error("Error fetching mangas from MangaLivre", e);
return List.of();
}
}
@Override
public int getTotalPages() {
log.info("Getting total pages for {}", ContentProviders.MANGA_LIVRE_BLOG);
try {
var document = Jsoup.connect(url).get();
var navLinks = document.getElementsByClass("nav-links").getFirst();
var links = navLinks.getElementsByTag("a");
var pageNumbers =
links.stream()
.map(Element::text)
.filter(NUMERIC_PATTERN.asMatchPredicate())
.map(Integer::parseInt)
.toList();
return pageNumbers.stream().max(Integer::compareTo).orElse(null);
} catch (IOException | NoSuchElementException e) {
log.error("Error fetching total pages from MangaLivre", e);
return 0;
}
}
}