refactor: implement archive extraction functionality with support for RAR and ZIP formats #35
6
pom.xml
6
pom.xml
@ -130,6 +130,12 @@
|
|||||||
<version>1.1</version>
|
<version>1.1</version>
|
||||||
<scope>compile</scope>
|
<scope>compile</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.github.junrar</groupId>
|
||||||
|
<artifactId>junrar</artifactId>
|
||||||
|
<version>7.5.8</version>
|
||||||
|
<scope>compile</scope>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
|||||||
@ -10,6 +10,7 @@ import com.magamochi.common.exception.UnprocessableException;
|
|||||||
import com.magamochi.common.model.enumeration.ContentType;
|
import com.magamochi.common.model.enumeration.ContentType;
|
||||||
import com.magamochi.content.model.dto.PresignedImportRequestDTO;
|
import com.magamochi.content.model.dto.PresignedImportRequestDTO;
|
||||||
import com.magamochi.content.model.dto.PresignedImportResponseDTO;
|
import com.magamochi.content.model.dto.PresignedImportResponseDTO;
|
||||||
|
import com.magamochi.content.model.entity.MangaContent;
|
||||||
import com.magamochi.content.model.entity.MangaContentImage;
|
import com.magamochi.content.model.entity.MangaContentImage;
|
||||||
import com.magamochi.content.model.entity.MangaImportJob;
|
import com.magamochi.content.model.entity.MangaImportJob;
|
||||||
import com.magamochi.content.model.enumeration.ImportJobStatus;
|
import com.magamochi.content.model.enumeration.ImportJobStatus;
|
||||||
@ -17,19 +18,18 @@ import com.magamochi.content.model.repository.MangaContentImageRepository;
|
|||||||
import com.magamochi.content.model.repository.MangaImportJobRepository;
|
import com.magamochi.content.model.repository.MangaImportJobRepository;
|
||||||
import com.magamochi.content.queue.command.FileImportCommand;
|
import com.magamochi.content.queue.command.FileImportCommand;
|
||||||
import com.magamochi.content.queue.producer.FileImportProducer;
|
import com.magamochi.content.queue.producer.FileImportProducer;
|
||||||
|
import com.magamochi.content.service.archive.ArchiveExtractorFactory;
|
||||||
import com.magamochi.image.service.ImageFetchService;
|
import com.magamochi.image.service.ImageFetchService;
|
||||||
import com.magamochi.image.service.ImageService;
|
import com.magamochi.image.service.ImageService;
|
||||||
import com.magamochi.image.service.S3Service;
|
import com.magamochi.image.service.S3Service;
|
||||||
import com.magamochi.ingestion.service.ContentProviderService;
|
import com.magamochi.ingestion.service.ContentProviderService;
|
||||||
import jakarta.validation.constraints.NotNull;
|
import jakarta.validation.constraints.NotNull;
|
||||||
import java.io.ByteArrayOutputStream;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.security.NoSuchAlgorithmException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.zip.ZipEntry;
|
|
||||||
import java.util.zip.ZipInputStream;
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.log4j.Log4j2;
|
import lombok.extern.log4j.Log4j2;
|
||||||
import net.greypanther.natsort.CaseInsensitiveSimpleNaturalComparator;
|
import org.apache.tika.io.TikaInputStream;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import org.springframework.transaction.annotation.Propagation;
|
import org.springframework.transaction.annotation.Propagation;
|
||||||
import org.springframework.transaction.annotation.Transactional;
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
@ -45,11 +45,14 @@ public class ContentImportService {
|
|||||||
private final ContentIngestService contentIngestService;
|
private final ContentIngestService contentIngestService;
|
||||||
private final ImageFetchService imageFetchService;
|
private final ImageFetchService imageFetchService;
|
||||||
private final S3Service s3Service;
|
private final S3Service s3Service;
|
||||||
|
private final ImageService imageService;
|
||||||
|
|
||||||
private final FileImportProducer fileImportProducer;
|
|
||||||
private final MangaContentImageRepository mangaContentImageRepository;
|
private final MangaContentImageRepository mangaContentImageRepository;
|
||||||
private final MangaImportJobRepository mangaImportJobRepository;
|
private final MangaImportJobRepository mangaImportJobRepository;
|
||||||
private final ImageService imageService;
|
|
||||||
|
private final FileImportProducer fileImportProducer;
|
||||||
|
|
||||||
|
private final ArchiveExtractorFactory extractorFactory;
|
||||||
|
|
||||||
public void importFiles(String malId, String aniListId, @NotNull List<MultipartFile> files) {
|
public void importFiles(String malId, String aniListId, @NotNull List<MultipartFile> files) {
|
||||||
if (isBlank(malId) && isBlank(aniListId)) {
|
if (isBlank(malId) && isBlank(aniListId)) {
|
||||||
@ -128,52 +131,18 @@ public class ContentImportService {
|
|||||||
|
|
||||||
@Transactional
|
@Transactional
|
||||||
public void importFile(Long mangaContentProviderId, String filename, Long mangaImportJobId) {
|
public void importFile(Long mangaContentProviderId, String filename, Long mangaImportJobId) {
|
||||||
var contentName = removeImportPrefix(removeFileExtension(filename));
|
var contentName = determineContentName(filename, mangaImportJobId);
|
||||||
|
|
||||||
if (nonNull(mangaImportJobId)) {
|
|
||||||
var jobOpt = mangaImportJobRepository.findById(mangaImportJobId);
|
|
||||||
if (jobOpt.isPresent()) {
|
|
||||||
contentName = removeFileExtension(jobOpt.get().getOriginalFilename());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var mangaContent =
|
var mangaContent =
|
||||||
contentIngestService.ingest(mangaContentProviderId, contentName, null, "en-US");
|
contentIngestService.ingest(mangaContentProviderId, contentName, null, "en-US");
|
||||||
|
|
||||||
try (var is = s3Service.getFileStream(filename);
|
try (var originalStream = s3Service.getFileStream(filename);
|
||||||
var zis = new ZipInputStream(is)) {
|
var tikaStream = TikaInputStream.get(originalStream)) {
|
||||||
|
var extractor = extractorFactory.getExtractor(tikaStream);
|
||||||
|
var entryMap = extractor.extract(tikaStream);
|
||||||
|
|
||||||
Map<String, byte[]> entryMap =
|
saveImages(entryMap, mangaContent);
|
||||||
new TreeMap<>(CaseInsensitiveSimpleNaturalComparator.getInstance());
|
|
||||||
|
|
||||||
ZipEntry entry;
|
|
||||||
while ((entry = zis.getNextEntry()) != null) {
|
|
||||||
if (entry.isDirectory()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
var os = new ByteArrayOutputStream();
|
|
||||||
zis.transferTo(os);
|
|
||||||
entryMap.put(entry.getName(), os.toByteArray());
|
|
||||||
zis.closeEntry();
|
|
||||||
}
|
|
||||||
|
|
||||||
var position = 0;
|
|
||||||
for (var sortedEntry : entryMap.entrySet()) {
|
|
||||||
var bytes = sortedEntry.getValue();
|
|
||||||
|
|
||||||
var imageId = imageFetchService.uploadImage(bytes, null, ContentType.CONTENT_IMAGE);
|
|
||||||
var image = imageService.find(imageId);
|
|
||||||
|
|
||||||
mangaContentImageRepository.save(
|
|
||||||
MangaContentImage.builder()
|
|
||||||
.image(image)
|
|
||||||
.mangaContent(mangaContent)
|
|
||||||
.position(position++)
|
|
||||||
.build());
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new RuntimeException("Failed to process zip: " + filename, e);
|
throw new UnprocessableException("Failed to process archive: " + filename, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
mangaContent.setDownloaded(true);
|
mangaContent.setDownloaded(true);
|
||||||
@ -202,4 +171,35 @@ public class ContentImportService {
|
|||||||
|
|
||||||
return path.replace("temp/import/", "");
|
return path.replace("temp/import/", "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String determineContentName(String filename, Long mangaImportJobId) {
|
||||||
|
var contentName = removeImportPrefix(removeFileExtension(filename));
|
||||||
|
|
||||||
|
if (nonNull(mangaImportJobId)) {
|
||||||
|
var jobOpt = mangaImportJobRepository.findById(mangaImportJobId);
|
||||||
|
if (jobOpt.isPresent()) {
|
||||||
|
contentName = removeFileExtension(jobOpt.get().getOriginalFilename());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return contentName;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void saveImages(Map<String, byte[]> entryMap, MangaContent mangaContent)
|
||||||
|
throws NoSuchAlgorithmException {
|
||||||
|
var position = 0;
|
||||||
|
for (var sortedEntry : entryMap.entrySet()) {
|
||||||
|
var bytes = sortedEntry.getValue();
|
||||||
|
|
||||||
|
var imageId = imageFetchService.uploadImage(bytes, null, ContentType.CONTENT_IMAGE);
|
||||||
|
var image = imageService.find(imageId);
|
||||||
|
|
||||||
|
mangaContentImageRepository.save(
|
||||||
|
MangaContentImage.builder()
|
||||||
|
.image(image)
|
||||||
|
.mangaContent(mangaContent)
|
||||||
|
.position(position++)
|
||||||
|
.build());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -0,0 +1,10 @@
|
|||||||
|
package com.magamochi.content.service.archive;
|
||||||
|
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public interface ArchiveExtractor {
|
||||||
|
boolean supports(String mimeType);
|
||||||
|
|
||||||
|
Map<String, byte[]> extract(InputStream stream) throws Exception;
|
||||||
|
}
|
||||||
@ -0,0 +1,23 @@
|
|||||||
|
package com.magamochi.content.service.archive;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.apache.tika.Tika;
|
||||||
|
import org.apache.tika.io.TikaInputStream;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class ArchiveExtractorFactory {
|
||||||
|
private final List<ArchiveExtractor> extractors;
|
||||||
|
private final Tika tika = new Tika();
|
||||||
|
|
||||||
|
public ArchiveExtractor getExtractor(TikaInputStream tikaStream) throws Exception {
|
||||||
|
var mimeType = tika.detect(tikaStream);
|
||||||
|
|
||||||
|
return extractors.stream()
|
||||||
|
.filter(extractor -> extractor.supports(mimeType))
|
||||||
|
.findFirst()
|
||||||
|
.orElseThrow(() -> new IllegalArgumentException("Unsupported archive format: " + mimeType));
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,58 @@
|
|||||||
|
package com.magamochi.content.service.archive;
|
||||||
|
|
||||||
|
import static java.util.Objects.isNull;
|
||||||
|
import static java.util.Objects.nonNull;
|
||||||
|
|
||||||
|
import com.github.junrar.Archive;
|
||||||
|
import com.github.junrar.rarfile.FileHeader;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.StandardCopyOption;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
import net.greypanther.natsort.CaseInsensitiveSimpleNaturalComparator;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class RarExtractor implements ArchiveExtractor {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean supports(String mimeType) {
|
||||||
|
if (isNull(mimeType)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return mimeType.startsWith("application/x-rar-compressed")
|
||||||
|
|| mimeType.startsWith("application/rar");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, byte[]> extract(InputStream stream) throws Exception {
|
||||||
|
Map<String, byte[]> entryMap =
|
||||||
|
new TreeMap<>(CaseInsensitiveSimpleNaturalComparator.getInstance());
|
||||||
|
|
||||||
|
var tempFile = Files.createTempFile("manga_import_rar_", ".rar");
|
||||||
|
|
||||||
|
try {
|
||||||
|
Files.copy(stream, tempFile, StandardCopyOption.REPLACE_EXISTING);
|
||||||
|
|
||||||
|
try (var archive = new Archive(tempFile.toFile())) {
|
||||||
|
FileHeader fileHeader;
|
||||||
|
while (nonNull(fileHeader = archive.nextFileHeader())) {
|
||||||
|
if (fileHeader.isDirectory()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
var os = new ByteArrayOutputStream();
|
||||||
|
archive.extractFile(fileHeader, os);
|
||||||
|
entryMap.put(fileHeader.getFileName(), os.toByteArray());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
Files.deleteIfExists(tempFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
return entryMap;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,47 @@
|
|||||||
|
package com.magamochi.content.service.archive;
|
||||||
|
|
||||||
|
import static java.util.Objects.isNull;
|
||||||
|
import static java.util.Objects.nonNull;
|
||||||
|
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
import java.util.zip.ZipEntry;
|
||||||
|
import java.util.zip.ZipInputStream;
|
||||||
|
import net.greypanther.natsort.CaseInsensitiveSimpleNaturalComparator;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class ZipExtractor implements ArchiveExtractor {
|
||||||
|
@Override
|
||||||
|
public boolean supports(String mimeType) {
|
||||||
|
if (isNull(mimeType)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return mimeType.startsWith("application/zip");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, byte[]> extract(InputStream stream) throws Exception {
|
||||||
|
Map<String, byte[]> entryMap =
|
||||||
|
new TreeMap<>(CaseInsensitiveSimpleNaturalComparator.getInstance());
|
||||||
|
|
||||||
|
try (var zis = new ZipInputStream(stream)) {
|
||||||
|
ZipEntry entry;
|
||||||
|
while (nonNull(entry = zis.getNextEntry())) {
|
||||||
|
if (entry.isDirectory()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
var os = new ByteArrayOutputStream();
|
||||||
|
zis.transferTo(os);
|
||||||
|
entryMap.put(entry.getName(), os.toByteArray());
|
||||||
|
zis.closeEntry();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return entryMap;
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user