diff --git a/.env.example b/.env.example index 40aed6a..9623c6d 100644 --- a/.env.example +++ b/.env.example @@ -7,7 +7,6 @@ MINIO_USER= MINIO_PASS= FLARESOLVERR_ENDPOINT=localhost:8191 -MANGAMATCHER_ENDPOINT=localhost:8000 RABBITMQ_HOST=localhost RABBITMQ_PORT=5672 diff --git a/src/main/java/com/magamochi/mangamochi/client/RapidFuzzClient.java b/src/main/java/com/magamochi/mangamochi/client/RapidFuzzClient.java deleted file mode 100644 index f2f7741..0000000 --- a/src/main/java/com/magamochi/mangamochi/client/RapidFuzzClient.java +++ /dev/null @@ -1,16 +0,0 @@ -package com.magamochi.mangamochi.client; - -import java.util.List; -import org.springframework.cloud.openfeign.FeignClient; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestBody; - -@FeignClient(name = "rapidFuzz", url = "${manga-matcher.endpoint}") -public interface RapidFuzzClient { - @PostMapping - Response mangaSearch(@RequestBody Request dto); - - record Request(String title, List options) {} - - record Response(boolean match_found, String best_match, double similarity) {} -} diff --git a/src/main/java/com/magamochi/mangamochi/model/dto/TitleMatchRequestDTO.java b/src/main/java/com/magamochi/mangamochi/model/dto/TitleMatchRequestDTO.java new file mode 100644 index 0000000..a31b88c --- /dev/null +++ b/src/main/java/com/magamochi/mangamochi/model/dto/TitleMatchRequestDTO.java @@ -0,0 +1,15 @@ +package com.magamochi.mangamochi.model.dto; + +import java.util.List; +import lombok.Builder; +import lombok.Getter; + +@Getter +@Builder +public class TitleMatchRequestDTO { + private String title; + + private List options; + + @Builder.Default private int threshold = 85; +} diff --git a/src/main/java/com/magamochi/mangamochi/model/dto/TitleMatchResponseDTO.java b/src/main/java/com/magamochi/mangamochi/model/dto/TitleMatchResponseDTO.java new file mode 100644 index 0000000..c2a3362 --- /dev/null +++ b/src/main/java/com/magamochi/mangamochi/model/dto/TitleMatchResponseDTO.java @@ -0,0 +1,12 @@ +package com.magamochi.mangamochi.model.dto; + +import lombok.Builder; +import lombok.Getter; + +@Getter +@Builder +public class TitleMatchResponseDTO { + boolean matchFound; + String bestMatch; + Double similarity; +} diff --git a/src/main/java/com/magamochi/mangamochi/service/MangaCreationService.java b/src/main/java/com/magamochi/mangamochi/service/MangaCreationService.java index da0542a..a703844 100644 --- a/src/main/java/com/magamochi/mangamochi/service/MangaCreationService.java +++ b/src/main/java/com/magamochi/mangamochi/service/MangaCreationService.java @@ -2,7 +2,7 @@ package com.magamochi.mangamochi.service; import com.google.common.util.concurrent.RateLimiter; import com.magamochi.mangamochi.client.JikanClient; -import com.magamochi.mangamochi.client.RapidFuzzClient; +import com.magamochi.mangamochi.model.dto.TitleMatchRequestDTO; import com.magamochi.mangamochi.model.dto.UpdateMangaDataCommand; import com.magamochi.mangamochi.model.entity.Manga; import com.magamochi.mangamochi.model.entity.MangaImportReview; @@ -21,8 +21,9 @@ public class MangaCreationService { private final MangaRepository mangaRepository; private final MangaImportReviewRepository mangaImportReviewRepository; + private final TitleMatcherService titleMatcherService; + private final JikanClient jikanClient; - private final RapidFuzzClient rapidFuzzClient; private final RateLimiter jikanRateLimiter; @@ -42,18 +43,20 @@ public class MangaCreationService { return null; } - var request = - new RapidFuzzClient.Request( - title, - jikanResults.stream() - .flatMap( - results -> - results.titles().stream() - .map(JikanClient.SearchResponse.MangaData.TitleData::title)) - .toList()); + var titleMatchResponse = + titleMatcherService.findBestMatch( + TitleMatchRequestDTO.builder() + .title(title) + .options( + jikanResults.stream() + .flatMap( + results -> + results.titles().stream() + .map(JikanClient.SearchResponse.MangaData.TitleData::title)) + .toList()) + .build()); - var fuzzResults = rapidFuzzClient.mangaSearch(request); - if (!fuzzResults.match_found()) { + if (!titleMatchResponse.isMatchFound()) { createMangaImportReview(title, url, provider); log.warn("No match found for manga with title {}", title); return null; @@ -66,7 +69,7 @@ public class MangaCreationService { results.titles().stream() .map(JikanClient.SearchResponse.MangaData.TitleData::title) .toList() - .contains(fuzzResults.best_match())) + .contains(titleMatchResponse.getBestMatch())) .findFirst(); if (resultOptional.isEmpty()) { createMangaImportReview(title, url, provider); diff --git a/src/main/java/com/magamochi/mangamochi/service/TitleMatcherService.java b/src/main/java/com/magamochi/mangamochi/service/TitleMatcherService.java new file mode 100644 index 0000000..de6ab3a --- /dev/null +++ b/src/main/java/com/magamochi/mangamochi/service/TitleMatcherService.java @@ -0,0 +1,67 @@ +package com.magamochi.mangamochi.service; + +import static org.apache.commons.lang3.StringUtils.isBlank; +import static org.springframework.util.CollectionUtils.isEmpty; + +import com.magamochi.mangamochi.model.dto.TitleMatchRequestDTO; +import com.magamochi.mangamochi.model.dto.TitleMatchResponseDTO; +import lombok.extern.log4j.Log4j2; +import org.apache.commons.text.similarity.LevenshteinDistance; +import org.springframework.stereotype.Service; + +@Log4j2 +@Service +public class TitleMatcherService { + private final LevenshteinDistance levenshteinDistance = LevenshteinDistance.getDefaultInstance(); + + public TitleMatchResponseDTO findBestMatch(TitleMatchRequestDTO request) { + if (isBlank(request.getTitle()) || isEmpty(request.getOptions())) { + throw new IllegalArgumentException("Title and options are required"); + } + + log.info("Finding best match for {}. Options: {}", request.getTitle(), request.getOptions()); + + String bestMatch = null; + double bestScore = 0.0; + + for (var option : request.getOptions()) { + var score = calculateSimilarityScore(request.getTitle(), option); + + if (score > bestScore) { + bestScore = score; + bestMatch = option; + } + } + + if (bestScore >= request.getThreshold()) { + log.info( + "Found best match for {}: {}. Similarity: {}", request.getTitle(), bestMatch, bestScore); + + return TitleMatchResponseDTO.builder() + .matchFound(true) + .bestMatch(bestMatch) + .similarity(bestScore) + .build(); + } + + log.info("No match found for {}. Threshold: {}", request.getTitle(), request.getThreshold()); + + return TitleMatchResponseDTO.builder().matchFound(false).build(); + } + + private double calculateSimilarityScore(String title, String option) { + var dist = levenshteinDistance.apply(title, option); + + var maxLength = Math.max(title.length(), option.length()); + if (maxLength == 0) { + return 100.0; + } + + // Calculate similarity: 100 * (1 - (distance / max_length)) + // This scales the distance into a percentage. + var similarity = 100.0 * (1.0 - ((double) dist / maxLength)); + + // Format to two decimal places for a cleaner result + return Math.round(similarity * 100.0) / 100.0; + } +} diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 895fa4a..d68da5c 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -53,9 +53,6 @@ jwt: refresh-secret: MIV9ctIwrImmrZBjh9QueNEcDOLLVv9Rephii+0DKbk= refresh-expiration: 2629746000 -manga-matcher: - endpoint: ${MANGAMATCHER_ENDPOINT} - resilience4j: retry: instances: