feat: implement title matching service and refactor manga creation logic

This commit is contained in:
Rodrigo Verdiani 2025-11-29 19:44:51 -03:00
parent 8b79776b27
commit 4b0a5ab3e5
7 changed files with 111 additions and 34 deletions

View File

@ -7,7 +7,6 @@ MINIO_USER=
MINIO_PASS=
FLARESOLVERR_ENDPOINT=localhost:8191
MANGAMATCHER_ENDPOINT=localhost:8000
RABBITMQ_HOST=localhost
RABBITMQ_PORT=5672

View File

@ -1,16 +0,0 @@
package com.magamochi.mangamochi.client;
import java.util.List;
import org.springframework.cloud.openfeign.FeignClient;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
@FeignClient(name = "rapidFuzz", url = "${manga-matcher.endpoint}")
public interface RapidFuzzClient {
@PostMapping
Response mangaSearch(@RequestBody Request dto);
record Request(String title, List<String> options) {}
record Response(boolean match_found, String best_match, double similarity) {}
}

View File

@ -0,0 +1,15 @@
package com.magamochi.mangamochi.model.dto;
import java.util.List;
import lombok.Builder;
import lombok.Getter;
@Getter
@Builder
public class TitleMatchRequestDTO {
private String title;
private List<String> options;
@Builder.Default private int threshold = 85;
}

View File

@ -0,0 +1,12 @@
package com.magamochi.mangamochi.model.dto;
import lombok.Builder;
import lombok.Getter;
@Getter
@Builder
public class TitleMatchResponseDTO {
boolean matchFound;
String bestMatch;
Double similarity;
}

View File

@ -2,7 +2,7 @@ package com.magamochi.mangamochi.service;
import com.google.common.util.concurrent.RateLimiter;
import com.magamochi.mangamochi.client.JikanClient;
import com.magamochi.mangamochi.client.RapidFuzzClient;
import com.magamochi.mangamochi.model.dto.TitleMatchRequestDTO;
import com.magamochi.mangamochi.model.dto.UpdateMangaDataCommand;
import com.magamochi.mangamochi.model.entity.Manga;
import com.magamochi.mangamochi.model.entity.MangaImportReview;
@ -21,8 +21,9 @@ public class MangaCreationService {
private final MangaRepository mangaRepository;
private final MangaImportReviewRepository mangaImportReviewRepository;
private final TitleMatcherService titleMatcherService;
private final JikanClient jikanClient;
private final RapidFuzzClient rapidFuzzClient;
private final RateLimiter jikanRateLimiter;
@ -42,18 +43,20 @@ public class MangaCreationService {
return null;
}
var request =
new RapidFuzzClient.Request(
title,
jikanResults.stream()
.flatMap(
results ->
results.titles().stream()
.map(JikanClient.SearchResponse.MangaData.TitleData::title))
.toList());
var titleMatchResponse =
titleMatcherService.findBestMatch(
TitleMatchRequestDTO.builder()
.title(title)
.options(
jikanResults.stream()
.flatMap(
results ->
results.titles().stream()
.map(JikanClient.SearchResponse.MangaData.TitleData::title))
.toList())
.build());
var fuzzResults = rapidFuzzClient.mangaSearch(request);
if (!fuzzResults.match_found()) {
if (!titleMatchResponse.isMatchFound()) {
createMangaImportReview(title, url, provider);
log.warn("No match found for manga with title {}", title);
return null;
@ -66,7 +69,7 @@ public class MangaCreationService {
results.titles().stream()
.map(JikanClient.SearchResponse.MangaData.TitleData::title)
.toList()
.contains(fuzzResults.best_match()))
.contains(titleMatchResponse.getBestMatch()))
.findFirst();
if (resultOptional.isEmpty()) {
createMangaImportReview(title, url, provider);

View File

@ -0,0 +1,67 @@
package com.magamochi.mangamochi.service;
import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.springframework.util.CollectionUtils.isEmpty;
import com.magamochi.mangamochi.model.dto.TitleMatchRequestDTO;
import com.magamochi.mangamochi.model.dto.TitleMatchResponseDTO;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.text.similarity.LevenshteinDistance;
import org.springframework.stereotype.Service;
@Log4j2
@Service
public class TitleMatcherService {
private final LevenshteinDistance levenshteinDistance = LevenshteinDistance.getDefaultInstance();
public TitleMatchResponseDTO findBestMatch(TitleMatchRequestDTO request) {
if (isBlank(request.getTitle()) || isEmpty(request.getOptions())) {
throw new IllegalArgumentException("Title and options are required");
}
log.info("Finding best match for {}. Options: {}", request.getTitle(), request.getOptions());
String bestMatch = null;
double bestScore = 0.0;
for (var option : request.getOptions()) {
var score = calculateSimilarityScore(request.getTitle(), option);
if (score > bestScore) {
bestScore = score;
bestMatch = option;
}
}
if (bestScore >= request.getThreshold()) {
log.info(
"Found best match for {}: {}. Similarity: {}", request.getTitle(), bestMatch, bestScore);
return TitleMatchResponseDTO.builder()
.matchFound(true)
.bestMatch(bestMatch)
.similarity(bestScore)
.build();
}
log.info("No match found for {}. Threshold: {}", request.getTitle(), request.getThreshold());
return TitleMatchResponseDTO.builder().matchFound(false).build();
}
private double calculateSimilarityScore(String title, String option) {
var dist = levenshteinDistance.apply(title, option);
var maxLength = Math.max(title.length(), option.length());
if (maxLength == 0) {
return 100.0;
}
// Calculate similarity: 100 * (1 - (distance / max_length))
// This scales the distance into a percentage.
var similarity = 100.0 * (1.0 - ((double) dist / maxLength));
// Format to two decimal places for a cleaner result
return Math.round(similarity * 100.0) / 100.0;
}
}

View File

@ -53,9 +53,6 @@ jwt:
refresh-secret: MIV9ctIwrImmrZBjh9QueNEcDOLLVv9Rephii+0DKbk=
refresh-expiration: 2629746000
manga-matcher:
endpoint: ${MANGAMATCHER_ENDPOINT}
resilience4j:
retry:
instances: