173 lines
6.3 KiB
Java
173 lines
6.3 KiB
Java
package com.magamochi.ingestion.providers.impl;
|
|
|
|
import com.magamochi.ingestion.model.dto.MangaInfoDTO;
|
|
import com.magamochi.ingestion.providers.ContentProvider;
|
|
import com.magamochi.ingestion.providers.ContentProviders;
|
|
import com.magamochi.ingestion.providers.PagedContentProvider;
|
|
import com.magamochi.model.dto.ContentProviderMangaChapterResponseDTO;
|
|
import com.magamochi.model.entity.MangaContentProvider;
|
|
import com.magamochi.model.enumeration.MangaStatus;
|
|
import java.io.IOException;
|
|
import java.util.*;
|
|
import java.util.regex.Pattern;
|
|
import java.util.stream.Collectors;
|
|
import java.util.stream.IntStream;
|
|
import lombok.RequiredArgsConstructor;
|
|
import lombok.extern.log4j.Log4j2;
|
|
import org.apache.commons.lang3.StringUtils;
|
|
import org.jsoup.Jsoup;
|
|
import org.jsoup.nodes.Element;
|
|
import org.springframework.stereotype.Service;
|
|
|
|
@Log4j2
|
|
@Service(ContentProviders.MANGA_LIVRE_BLOG)
|
|
@RequiredArgsConstructor
|
|
public class MangaLivreBlogProvider implements ContentProvider, PagedContentProvider {
|
|
private static final Pattern NUMERIC_PATTERN = Pattern.compile("-?\\d+");
|
|
|
|
private final String url = "https://mangalivre.blog/manga/";
|
|
|
|
@Override
|
|
public List<ContentProviderMangaChapterResponseDTO> getAvailableChapters(
|
|
MangaContentProvider mangaContentProvider) {
|
|
log.info(
|
|
"Getting available chapters from {}, manga {}",
|
|
ContentProviders.MANGA_LIVRE_BLOG,
|
|
mangaContentProvider.getManga().getTitle());
|
|
|
|
try {
|
|
var document = Jsoup.connect(mangaContentProvider.getUrl()).get();
|
|
|
|
var chapterList = document.getElementsByClass("chapters-list").getFirst();
|
|
var chapterItems = chapterList.getElementsByClass("chapter-item");
|
|
|
|
return chapterItems.stream()
|
|
.map(
|
|
chapterItemElement -> {
|
|
var chapterDetailsContainer =
|
|
chapterItemElement.getElementsByClass("chapter-details").getFirst();
|
|
var linkElement = chapterDetailsContainer.getElementsByTag("a").getFirst();
|
|
var chapterNumberElement =
|
|
linkElement.getElementsByClass("chapter-number").getFirst();
|
|
|
|
return new ContentProviderMangaChapterResponseDTO(
|
|
chapterNumberElement.text(), linkElement.attr("href"), null, "pt-BR");
|
|
})
|
|
.toList();
|
|
} catch (IOException | NoSuchElementException e) {
|
|
log.error("Error fetching mangas from MangaLivre", e);
|
|
return List.of();
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public Map<Integer, String> getChapterImagesUrls(String chapterUrl) {
|
|
log.info("Getting images from {}, url {}", ContentProviders.MANGA_LIVRE_BLOG, chapterUrl);
|
|
|
|
try {
|
|
var document = Jsoup.connect(chapterUrl).get();
|
|
|
|
var chapterImageContainers = document.getElementsByClass("chapter-image-container");
|
|
var imageUrls =
|
|
chapterImageContainers.stream()
|
|
.map(
|
|
chapterImageContainerElement -> {
|
|
var imageElement =
|
|
chapterImageContainerElement.getElementsByTag("img").getFirst();
|
|
|
|
var dataLazySrc = imageElement.attr("data-lazy-src");
|
|
if (StringUtils.isNoneBlank(dataLazySrc)) {
|
|
return dataLazySrc;
|
|
}
|
|
|
|
var dataSrc = imageElement.attr("src");
|
|
if (StringUtils.isNoneBlank(dataSrc)) {
|
|
return dataSrc;
|
|
}
|
|
|
|
throw new NoSuchElementException("Image URL not found");
|
|
})
|
|
.toList();
|
|
|
|
return IntStream.range(0, imageUrls.size())
|
|
.boxed()
|
|
.collect(
|
|
Collectors.toMap(
|
|
i -> i, imageUrls::get, (existing, replacement) -> existing, LinkedHashMap::new));
|
|
} catch (IOException | NoSuchElementException e) {
|
|
log.error("Error fetching mangas from MangaLivre", e);
|
|
return Map.of();
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public List<MangaInfoDTO> getMangasFromPage(int page) {
|
|
log.info("Getting mangas from {}, page {}", ContentProviders.MANGA_LIVRE_BLOG, page);
|
|
|
|
try {
|
|
var document = Jsoup.connect(url + "page/" + page).get();
|
|
|
|
var mangaGrid = document.getElementsByClass("manga-grid").getFirst();
|
|
var mangaElements = mangaGrid.getElementsByTag("article");
|
|
|
|
return mangaElements.stream()
|
|
.map(
|
|
element -> {
|
|
try {
|
|
var linkElement = element.getElementsByTag("a").getFirst();
|
|
|
|
var imageContainer =
|
|
linkElement.getElementsByClass("manga-card-image").getFirst();
|
|
var contentContainer =
|
|
linkElement.getElementsByClass("manga-card-content").getFirst();
|
|
|
|
var title = contentContainer.getElementsByTag("h3").text();
|
|
var url = linkElement.attr("href");
|
|
var status =
|
|
switch (imageContainer
|
|
.getElementsByClass("manga-status")
|
|
.text()
|
|
.toLowerCase()) {
|
|
case "em andamento" -> MangaStatus.ONGOING;
|
|
case "completo" -> MangaStatus.COMPLETED;
|
|
case "hiato" -> MangaStatus.HIATUS;
|
|
default -> MangaStatus.UNKNOWN;
|
|
};
|
|
|
|
return new MangaInfoDTO(title, url, status);
|
|
} catch (Exception e) {
|
|
return null;
|
|
}
|
|
})
|
|
.filter(Objects::nonNull)
|
|
.toList();
|
|
} catch (IOException | NoSuchElementException e) {
|
|
log.error("Error fetching mangas from MangaLivre", e);
|
|
return List.of();
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public int getTotalPages() {
|
|
log.info("Getting total pages for {}", ContentProviders.MANGA_LIVRE_BLOG);
|
|
|
|
try {
|
|
var document = Jsoup.connect(url).get();
|
|
|
|
var navLinks = document.getElementsByClass("nav-links").getFirst();
|
|
var links = navLinks.getElementsByTag("a");
|
|
|
|
var pageNumbers =
|
|
links.stream()
|
|
.map(Element::text)
|
|
.filter(NUMERIC_PATTERN.asMatchPredicate())
|
|
.map(Integer::parseInt)
|
|
.toList();
|
|
return pageNumbers.stream().max(Integer::compareTo).orElse(null);
|
|
} catch (IOException | NoSuchElementException e) {
|
|
log.error("Error fetching total pages from MangaLivre", e);
|
|
return 0;
|
|
}
|
|
}
|
|
}
|