feat: optimize image cleanup process with improved S3 key scanning and repository query

This commit is contained in:
Rodrigo Verdiani 2026-04-15 11:18:54 -03:00
parent da3114b85e
commit 40991371b7
4 changed files with 25 additions and 22 deletions

View File

@ -1,10 +1,16 @@
package com.magamochi.image.model.repository;
import com.magamochi.image.model.entity.Image;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Query;
import org.springframework.data.repository.query.Param;
public interface ImageRepository extends JpaRepository<Image, UUID> {
Optional<Image> findByFileHash(String fileHash);
@Query("SELECT i.objectKey FROM Image i WHERE i.objectKey IN :objectKeys")
List<String> findExistingObjectKeys(@Param("objectKeys") List<String> objectKeys);
}

View File

@ -4,7 +4,9 @@ import com.magamochi.common.exception.NotFoundException;
import com.magamochi.image.model.entity.Image;
import com.magamochi.image.model.repository.ImageRepository;
import java.io.InputStream;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
@ -49,8 +51,8 @@ public class ImageService {
.orElseThrow(() -> new NotFoundException("Image not found with ID " + id));
}
public List<Image> findAll() {
return imageRepository.findAll();
public Set<String> findExistingObjectKeys(List<String> objectKeys) {
return new HashSet<>(imageRepository.findExistingObjectKeys(objectKeys));
}
public InputStream getStream(Image image) {

View File

@ -4,7 +4,6 @@ import static java.util.Objects.nonNull;
import java.io.InputStream;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import lombok.Getter;
@ -42,8 +41,7 @@ public class S3Service {
return filename;
}
public List<String> listAllObjectKeys() {
var keys = new ArrayList<String>();
public void processObjectKeyPages(java.util.function.Consumer<List<String>> pageConsumer) {
String continuationToken = null;
do {
@ -55,13 +53,12 @@ public class S3Service {
var response = s3Client.listObjectsV2(requestBuilder.build());
response.contents().forEach(s3Object -> keys.add(s3Object.key()));
var page = response.contents().stream().map(S3Object::key).toList();
pageConsumer.accept(page);
continuationToken = response.isTruncated() ? response.nextContinuationToken() : null;
} while (nonNull(continuationToken));
return keys;
}
public void deleteObjects(Set<String> objectKeys) {

View File

@ -1,9 +1,7 @@
package com.magamochi.image.task;
import com.magamochi.image.model.entity.Image;
import com.magamochi.image.service.ImageService;
import com.magamochi.image.service.S3Service;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
import org.springframework.beans.factory.annotation.Value;
@ -31,21 +29,21 @@ public class ImageCleanupTask {
}
public void cleanupImages() {
log.info("Getting unused S3 object keys to remove.");
log.info("Scanning S3 pages for orphaned object keys.");
var keysToRemove = new java.util.HashSet<String>();
var imageKeys = s3Service.listAllObjectKeys();
s3Service.processObjectKeyPages(
page -> {
var existing = imageService.findExistingObjectKeys(page);
page.stream().filter(key -> !existing.contains(key)).forEach(keysToRemove::add);
});
var existingImages =
imageService.findAll().parallelStream()
.map(Image::getObjectKey)
.collect(Collectors.toSet());
if (keysToRemove.isEmpty()) {
log.info("No orphaned objects found.");
return;
}
var keysToRemove =
imageKeys.parallelStream()
.filter(imageKey -> !existingImages.contains(imageKey))
.collect(Collectors.toSet());
log.info("Removing {} objects from S3 storage", keysToRemove.size());
log.info("Removing {} orphaned objects from S3 storage", keysToRemove.size());
s3Service.deleteObjects(keysToRemove);