From f0a151159e2c56b4249d584f154d8737b063e92f Mon Sep 17 00:00:00 2001 From: Rodrigo Verdiani Date: Thu, 26 Mar 2026 10:16:53 -0300 Subject: [PATCH] refactor: implement archive extraction functionality with support for RAR and ZIP formats --- pom.xml | 6 ++ .../content/service/ContentImportService.java | 94 +++++++++---------- .../service/archive/ArchiveExtractor.java | 10 ++ .../archive/ArchiveExtractorFactory.java | 23 +++++ .../content/service/archive/RarExtractor.java | 58 ++++++++++++ .../content/service/archive/ZipExtractor.java | 47 ++++++++++ 6 files changed, 191 insertions(+), 47 deletions(-) create mode 100644 src/main/java/com/magamochi/content/service/archive/ArchiveExtractor.java create mode 100644 src/main/java/com/magamochi/content/service/archive/ArchiveExtractorFactory.java create mode 100644 src/main/java/com/magamochi/content/service/archive/RarExtractor.java create mode 100644 src/main/java/com/magamochi/content/service/archive/ZipExtractor.java diff --git a/pom.xml b/pom.xml index 49ef5d8..8d22cc0 100644 --- a/pom.xml +++ b/pom.xml @@ -130,6 +130,12 @@ 1.1 compile + + com.github.junrar + junrar + 7.5.8 + compile + diff --git a/src/main/java/com/magamochi/content/service/ContentImportService.java b/src/main/java/com/magamochi/content/service/ContentImportService.java index 4d87c90..cce72a5 100644 --- a/src/main/java/com/magamochi/content/service/ContentImportService.java +++ b/src/main/java/com/magamochi/content/service/ContentImportService.java @@ -10,6 +10,7 @@ import com.magamochi.common.exception.UnprocessableException; import com.magamochi.common.model.enumeration.ContentType; import com.magamochi.content.model.dto.PresignedImportRequestDTO; import com.magamochi.content.model.dto.PresignedImportResponseDTO; +import com.magamochi.content.model.entity.MangaContent; import com.magamochi.content.model.entity.MangaContentImage; import com.magamochi.content.model.entity.MangaImportJob; import com.magamochi.content.model.enumeration.ImportJobStatus; @@ -17,19 +18,18 @@ import com.magamochi.content.model.repository.MangaContentImageRepository; import com.magamochi.content.model.repository.MangaImportJobRepository; import com.magamochi.content.queue.command.FileImportCommand; import com.magamochi.content.queue.producer.FileImportProducer; +import com.magamochi.content.service.archive.ArchiveExtractorFactory; import com.magamochi.image.service.ImageFetchService; import com.magamochi.image.service.ImageService; import com.magamochi.image.service.S3Service; import com.magamochi.ingestion.service.ContentProviderService; import jakarta.validation.constraints.NotNull; -import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.security.NoSuchAlgorithmException; import java.util.*; -import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; import lombok.RequiredArgsConstructor; import lombok.extern.log4j.Log4j2; -import net.greypanther.natsort.CaseInsensitiveSimpleNaturalComparator; +import org.apache.tika.io.TikaInputStream; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Propagation; import org.springframework.transaction.annotation.Transactional; @@ -45,11 +45,14 @@ public class ContentImportService { private final ContentIngestService contentIngestService; private final ImageFetchService imageFetchService; private final S3Service s3Service; + private final ImageService imageService; - private final FileImportProducer fileImportProducer; private final MangaContentImageRepository mangaContentImageRepository; private final MangaImportJobRepository mangaImportJobRepository; - private final ImageService imageService; + + private final FileImportProducer fileImportProducer; + + private final ArchiveExtractorFactory extractorFactory; public void importFiles(String malId, String aniListId, @NotNull List files) { if (isBlank(malId) && isBlank(aniListId)) { @@ -128,52 +131,18 @@ public class ContentImportService { @Transactional public void importFile(Long mangaContentProviderId, String filename, Long mangaImportJobId) { - var contentName = removeImportPrefix(removeFileExtension(filename)); - - if (nonNull(mangaImportJobId)) { - var jobOpt = mangaImportJobRepository.findById(mangaImportJobId); - if (jobOpt.isPresent()) { - contentName = removeFileExtension(jobOpt.get().getOriginalFilename()); - } - } - + var contentName = determineContentName(filename, mangaImportJobId); var mangaContent = contentIngestService.ingest(mangaContentProviderId, contentName, null, "en-US"); - try (var is = s3Service.getFileStream(filename); - var zis = new ZipInputStream(is)) { + try (var originalStream = s3Service.getFileStream(filename); + var tikaStream = TikaInputStream.get(originalStream)) { + var extractor = extractorFactory.getExtractor(tikaStream); + var entryMap = extractor.extract(tikaStream); - Map entryMap = - new TreeMap<>(CaseInsensitiveSimpleNaturalComparator.getInstance()); - - ZipEntry entry; - while ((entry = zis.getNextEntry()) != null) { - if (entry.isDirectory()) { - continue; - } - - var os = new ByteArrayOutputStream(); - zis.transferTo(os); - entryMap.put(entry.getName(), os.toByteArray()); - zis.closeEntry(); - } - - var position = 0; - for (var sortedEntry : entryMap.entrySet()) { - var bytes = sortedEntry.getValue(); - - var imageId = imageFetchService.uploadImage(bytes, null, ContentType.CONTENT_IMAGE); - var image = imageService.find(imageId); - - mangaContentImageRepository.save( - MangaContentImage.builder() - .image(image) - .mangaContent(mangaContent) - .position(position++) - .build()); - } + saveImages(entryMap, mangaContent); } catch (Exception e) { - throw new RuntimeException("Failed to process zip: " + filename, e); + throw new UnprocessableException("Failed to process archive: " + filename, e); } mangaContent.setDownloaded(true); @@ -202,4 +171,35 @@ public class ContentImportService { return path.replace("temp/import/", ""); } + + private String determineContentName(String filename, Long mangaImportJobId) { + var contentName = removeImportPrefix(removeFileExtension(filename)); + + if (nonNull(mangaImportJobId)) { + var jobOpt = mangaImportJobRepository.findById(mangaImportJobId); + if (jobOpt.isPresent()) { + contentName = removeFileExtension(jobOpt.get().getOriginalFilename()); + } + } + + return contentName; + } + + private void saveImages(Map entryMap, MangaContent mangaContent) + throws NoSuchAlgorithmException { + var position = 0; + for (var sortedEntry : entryMap.entrySet()) { + var bytes = sortedEntry.getValue(); + + var imageId = imageFetchService.uploadImage(bytes, null, ContentType.CONTENT_IMAGE); + var image = imageService.find(imageId); + + mangaContentImageRepository.save( + MangaContentImage.builder() + .image(image) + .mangaContent(mangaContent) + .position(position++) + .build()); + } + } } diff --git a/src/main/java/com/magamochi/content/service/archive/ArchiveExtractor.java b/src/main/java/com/magamochi/content/service/archive/ArchiveExtractor.java new file mode 100644 index 0000000..d890871 --- /dev/null +++ b/src/main/java/com/magamochi/content/service/archive/ArchiveExtractor.java @@ -0,0 +1,10 @@ +package com.magamochi.content.service.archive; + +import java.io.InputStream; +import java.util.Map; + +public interface ArchiveExtractor { + boolean supports(String mimeType); + + Map extract(InputStream stream) throws Exception; +} diff --git a/src/main/java/com/magamochi/content/service/archive/ArchiveExtractorFactory.java b/src/main/java/com/magamochi/content/service/archive/ArchiveExtractorFactory.java new file mode 100644 index 0000000..192c3f9 --- /dev/null +++ b/src/main/java/com/magamochi/content/service/archive/ArchiveExtractorFactory.java @@ -0,0 +1,23 @@ +package com.magamochi.content.service.archive; + +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.apache.tika.Tika; +import org.apache.tika.io.TikaInputStream; +import org.springframework.stereotype.Component; + +@Component +@RequiredArgsConstructor +public class ArchiveExtractorFactory { + private final List extractors; + private final Tika tika = new Tika(); + + public ArchiveExtractor getExtractor(TikaInputStream tikaStream) throws Exception { + var mimeType = tika.detect(tikaStream); + + return extractors.stream() + .filter(extractor -> extractor.supports(mimeType)) + .findFirst() + .orElseThrow(() -> new IllegalArgumentException("Unsupported archive format: " + mimeType)); + } +} diff --git a/src/main/java/com/magamochi/content/service/archive/RarExtractor.java b/src/main/java/com/magamochi/content/service/archive/RarExtractor.java new file mode 100644 index 0000000..72f1da7 --- /dev/null +++ b/src/main/java/com/magamochi/content/service/archive/RarExtractor.java @@ -0,0 +1,58 @@ +package com.magamochi.content.service.archive; + +import static java.util.Objects.isNull; +import static java.util.Objects.nonNull; + +import com.github.junrar.Archive; +import com.github.junrar.rarfile.FileHeader; +import java.io.ByteArrayOutputStream; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.util.Map; +import java.util.TreeMap; +import net.greypanther.natsort.CaseInsensitiveSimpleNaturalComparator; +import org.springframework.stereotype.Component; + +@Component +public class RarExtractor implements ArchiveExtractor { + + @Override + public boolean supports(String mimeType) { + if (isNull(mimeType)) { + return false; + } + + return mimeType.startsWith("application/x-rar-compressed") + || mimeType.startsWith("application/rar"); + } + + @Override + public Map extract(InputStream stream) throws Exception { + Map entryMap = + new TreeMap<>(CaseInsensitiveSimpleNaturalComparator.getInstance()); + + var tempFile = Files.createTempFile("manga_import_rar_", ".rar"); + + try { + Files.copy(stream, tempFile, StandardCopyOption.REPLACE_EXISTING); + + try (var archive = new Archive(tempFile.toFile())) { + FileHeader fileHeader; + while (nonNull(fileHeader = archive.nextFileHeader())) { + if (fileHeader.isDirectory()) { + continue; + } + + var os = new ByteArrayOutputStream(); + archive.extractFile(fileHeader, os); + entryMap.put(fileHeader.getFileName(), os.toByteArray()); + } + } + } finally { + Files.deleteIfExists(tempFile); + } + + return entryMap; + } +} diff --git a/src/main/java/com/magamochi/content/service/archive/ZipExtractor.java b/src/main/java/com/magamochi/content/service/archive/ZipExtractor.java new file mode 100644 index 0000000..1725c6f --- /dev/null +++ b/src/main/java/com/magamochi/content/service/archive/ZipExtractor.java @@ -0,0 +1,47 @@ +package com.magamochi.content.service.archive; + +import static java.util.Objects.isNull; +import static java.util.Objects.nonNull; + +import java.io.ByteArrayOutputStream; +import java.io.InputStream; +import java.util.Map; +import java.util.TreeMap; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; +import net.greypanther.natsort.CaseInsensitiveSimpleNaturalComparator; +import org.springframework.stereotype.Component; + +@Component +public class ZipExtractor implements ArchiveExtractor { + @Override + public boolean supports(String mimeType) { + if (isNull(mimeType)) { + return false; + } + + return mimeType.startsWith("application/zip"); + } + + @Override + public Map extract(InputStream stream) throws Exception { + Map entryMap = + new TreeMap<>(CaseInsensitiveSimpleNaturalComparator.getInstance()); + + try (var zis = new ZipInputStream(stream)) { + ZipEntry entry; + while (nonNull(entry = zis.getNextEntry())) { + if (entry.isDirectory()) { + continue; + } + + var os = new ByteArrayOutputStream(); + zis.transferTo(os); + entryMap.put(entry.getName(), os.toByteArray()); + zis.closeEntry(); + } + } + + return entryMap; + } +}