Merge pull request 'refactor: implement archive extraction functionality with support for RAR and ZIP formats' (#35) from refactor-architecture into main

Reviewed-on: #35
This commit is contained in:
rov 2026-03-26 10:17:27 -03:00
commit 9548e07e42
6 changed files with 191 additions and 47 deletions

View File

@ -130,6 +130,12 @@
<version>1.1</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.github.junrar</groupId>
<artifactId>junrar</artifactId>
<version>7.5.8</version>
<scope>compile</scope>
</dependency>
</dependencies>
<build>

View File

@ -10,6 +10,7 @@ import com.magamochi.common.exception.UnprocessableException;
import com.magamochi.common.model.enumeration.ContentType;
import com.magamochi.content.model.dto.PresignedImportRequestDTO;
import com.magamochi.content.model.dto.PresignedImportResponseDTO;
import com.magamochi.content.model.entity.MangaContent;
import com.magamochi.content.model.entity.MangaContentImage;
import com.magamochi.content.model.entity.MangaImportJob;
import com.magamochi.content.model.enumeration.ImportJobStatus;
@ -17,19 +18,18 @@ import com.magamochi.content.model.repository.MangaContentImageRepository;
import com.magamochi.content.model.repository.MangaImportJobRepository;
import com.magamochi.content.queue.command.FileImportCommand;
import com.magamochi.content.queue.producer.FileImportProducer;
import com.magamochi.content.service.archive.ArchiveExtractorFactory;
import com.magamochi.image.service.ImageFetchService;
import com.magamochi.image.service.ImageService;
import com.magamochi.image.service.S3Service;
import com.magamochi.ingestion.service.ContentProviderService;
import jakarta.validation.constraints.NotNull;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.security.NoSuchAlgorithmException;
import java.util.*;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
import net.greypanther.natsort.CaseInsensitiveSimpleNaturalComparator;
import org.apache.tika.io.TikaInputStream;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Propagation;
import org.springframework.transaction.annotation.Transactional;
@ -45,11 +45,14 @@ public class ContentImportService {
private final ContentIngestService contentIngestService;
private final ImageFetchService imageFetchService;
private final S3Service s3Service;
private final ImageService imageService;
private final FileImportProducer fileImportProducer;
private final MangaContentImageRepository mangaContentImageRepository;
private final MangaImportJobRepository mangaImportJobRepository;
private final ImageService imageService;
private final FileImportProducer fileImportProducer;
private final ArchiveExtractorFactory extractorFactory;
public void importFiles(String malId, String aniListId, @NotNull List<MultipartFile> files) {
if (isBlank(malId) && isBlank(aniListId)) {
@ -128,52 +131,18 @@ public class ContentImportService {
@Transactional
public void importFile(Long mangaContentProviderId, String filename, Long mangaImportJobId) {
var contentName = removeImportPrefix(removeFileExtension(filename));
if (nonNull(mangaImportJobId)) {
var jobOpt = mangaImportJobRepository.findById(mangaImportJobId);
if (jobOpt.isPresent()) {
contentName = removeFileExtension(jobOpt.get().getOriginalFilename());
}
}
var contentName = determineContentName(filename, mangaImportJobId);
var mangaContent =
contentIngestService.ingest(mangaContentProviderId, contentName, null, "en-US");
try (var is = s3Service.getFileStream(filename);
var zis = new ZipInputStream(is)) {
try (var originalStream = s3Service.getFileStream(filename);
var tikaStream = TikaInputStream.get(originalStream)) {
var extractor = extractorFactory.getExtractor(tikaStream);
var entryMap = extractor.extract(tikaStream);
Map<String, byte[]> entryMap =
new TreeMap<>(CaseInsensitiveSimpleNaturalComparator.getInstance());
ZipEntry entry;
while ((entry = zis.getNextEntry()) != null) {
if (entry.isDirectory()) {
continue;
}
var os = new ByteArrayOutputStream();
zis.transferTo(os);
entryMap.put(entry.getName(), os.toByteArray());
zis.closeEntry();
}
var position = 0;
for (var sortedEntry : entryMap.entrySet()) {
var bytes = sortedEntry.getValue();
var imageId = imageFetchService.uploadImage(bytes, null, ContentType.CONTENT_IMAGE);
var image = imageService.find(imageId);
mangaContentImageRepository.save(
MangaContentImage.builder()
.image(image)
.mangaContent(mangaContent)
.position(position++)
.build());
}
saveImages(entryMap, mangaContent);
} catch (Exception e) {
throw new RuntimeException("Failed to process zip: " + filename, e);
throw new UnprocessableException("Failed to process archive: " + filename, e);
}
mangaContent.setDownloaded(true);
@ -202,4 +171,35 @@ public class ContentImportService {
return path.replace("temp/import/", "");
}
private String determineContentName(String filename, Long mangaImportJobId) {
var contentName = removeImportPrefix(removeFileExtension(filename));
if (nonNull(mangaImportJobId)) {
var jobOpt = mangaImportJobRepository.findById(mangaImportJobId);
if (jobOpt.isPresent()) {
contentName = removeFileExtension(jobOpt.get().getOriginalFilename());
}
}
return contentName;
}
private void saveImages(Map<String, byte[]> entryMap, MangaContent mangaContent)
throws NoSuchAlgorithmException {
var position = 0;
for (var sortedEntry : entryMap.entrySet()) {
var bytes = sortedEntry.getValue();
var imageId = imageFetchService.uploadImage(bytes, null, ContentType.CONTENT_IMAGE);
var image = imageService.find(imageId);
mangaContentImageRepository.save(
MangaContentImage.builder()
.image(image)
.mangaContent(mangaContent)
.position(position++)
.build());
}
}
}

View File

@ -0,0 +1,10 @@
package com.magamochi.content.service.archive;
import java.io.InputStream;
import java.util.Map;
public interface ArchiveExtractor {
boolean supports(String mimeType);
Map<String, byte[]> extract(InputStream stream) throws Exception;
}

View File

@ -0,0 +1,23 @@
package com.magamochi.content.service.archive;
import java.util.List;
import lombok.RequiredArgsConstructor;
import org.apache.tika.Tika;
import org.apache.tika.io.TikaInputStream;
import org.springframework.stereotype.Component;
@Component
@RequiredArgsConstructor
public class ArchiveExtractorFactory {
private final List<ArchiveExtractor> extractors;
private final Tika tika = new Tika();
public ArchiveExtractor getExtractor(TikaInputStream tikaStream) throws Exception {
var mimeType = tika.detect(tikaStream);
return extractors.stream()
.filter(extractor -> extractor.supports(mimeType))
.findFirst()
.orElseThrow(() -> new IllegalArgumentException("Unsupported archive format: " + mimeType));
}
}

View File

@ -0,0 +1,58 @@
package com.magamochi.content.service.archive;
import static java.util.Objects.isNull;
import static java.util.Objects.nonNull;
import com.github.junrar.Archive;
import com.github.junrar.rarfile.FileHeader;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.Map;
import java.util.TreeMap;
import net.greypanther.natsort.CaseInsensitiveSimpleNaturalComparator;
import org.springframework.stereotype.Component;
@Component
public class RarExtractor implements ArchiveExtractor {
@Override
public boolean supports(String mimeType) {
if (isNull(mimeType)) {
return false;
}
return mimeType.startsWith("application/x-rar-compressed")
|| mimeType.startsWith("application/rar");
}
@Override
public Map<String, byte[]> extract(InputStream stream) throws Exception {
Map<String, byte[]> entryMap =
new TreeMap<>(CaseInsensitiveSimpleNaturalComparator.getInstance());
var tempFile = Files.createTempFile("manga_import_rar_", ".rar");
try {
Files.copy(stream, tempFile, StandardCopyOption.REPLACE_EXISTING);
try (var archive = new Archive(tempFile.toFile())) {
FileHeader fileHeader;
while (nonNull(fileHeader = archive.nextFileHeader())) {
if (fileHeader.isDirectory()) {
continue;
}
var os = new ByteArrayOutputStream();
archive.extractFile(fileHeader, os);
entryMap.put(fileHeader.getFileName(), os.toByteArray());
}
}
} finally {
Files.deleteIfExists(tempFile);
}
return entryMap;
}
}

View File

@ -0,0 +1,47 @@
package com.magamochi.content.service.archive;
import static java.util.Objects.isNull;
import static java.util.Objects.nonNull;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.util.Map;
import java.util.TreeMap;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import net.greypanther.natsort.CaseInsensitiveSimpleNaturalComparator;
import org.springframework.stereotype.Component;
@Component
public class ZipExtractor implements ArchiveExtractor {
@Override
public boolean supports(String mimeType) {
if (isNull(mimeType)) {
return false;
}
return mimeType.startsWith("application/zip");
}
@Override
public Map<String, byte[]> extract(InputStream stream) throws Exception {
Map<String, byte[]> entryMap =
new TreeMap<>(CaseInsensitiveSimpleNaturalComparator.getInstance());
try (var zis = new ZipInputStream(stream)) {
ZipEntry entry;
while (nonNull(entry = zis.getNextEntry())) {
if (entry.isDirectory()) {
continue;
}
var os = new ByteArrayOutputStream();
zis.transferTo(os);
entryMap.put(entry.getName(), os.toByteArray());
zis.closeEntry();
}
}
return entryMap;
}
}