feat: implement title matching service and refactor manga creation logic
This commit is contained in:
parent
8b79776b27
commit
4b0a5ab3e5
@ -7,7 +7,6 @@ MINIO_USER=
|
|||||||
MINIO_PASS=
|
MINIO_PASS=
|
||||||
|
|
||||||
FLARESOLVERR_ENDPOINT=localhost:8191
|
FLARESOLVERR_ENDPOINT=localhost:8191
|
||||||
MANGAMATCHER_ENDPOINT=localhost:8000
|
|
||||||
|
|
||||||
RABBITMQ_HOST=localhost
|
RABBITMQ_HOST=localhost
|
||||||
RABBITMQ_PORT=5672
|
RABBITMQ_PORT=5672
|
||||||
|
|||||||
@ -1,16 +0,0 @@
|
|||||||
package com.magamochi.mangamochi.client;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import org.springframework.cloud.openfeign.FeignClient;
|
|
||||||
import org.springframework.web.bind.annotation.PostMapping;
|
|
||||||
import org.springframework.web.bind.annotation.RequestBody;
|
|
||||||
|
|
||||||
@FeignClient(name = "rapidFuzz", url = "${manga-matcher.endpoint}")
|
|
||||||
public interface RapidFuzzClient {
|
|
||||||
@PostMapping
|
|
||||||
Response mangaSearch(@RequestBody Request dto);
|
|
||||||
|
|
||||||
record Request(String title, List<String> options) {}
|
|
||||||
|
|
||||||
record Response(boolean match_found, String best_match, double similarity) {}
|
|
||||||
}
|
|
||||||
@ -0,0 +1,15 @@
|
|||||||
|
package com.magamochi.mangamochi.model.dto;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Builder
|
||||||
|
public class TitleMatchRequestDTO {
|
||||||
|
private String title;
|
||||||
|
|
||||||
|
private List<String> options;
|
||||||
|
|
||||||
|
@Builder.Default private int threshold = 85;
|
||||||
|
}
|
||||||
@ -0,0 +1,12 @@
|
|||||||
|
package com.magamochi.mangamochi.model.dto;
|
||||||
|
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Builder
|
||||||
|
public class TitleMatchResponseDTO {
|
||||||
|
boolean matchFound;
|
||||||
|
String bestMatch;
|
||||||
|
Double similarity;
|
||||||
|
}
|
||||||
@ -2,7 +2,7 @@ package com.magamochi.mangamochi.service;
|
|||||||
|
|
||||||
import com.google.common.util.concurrent.RateLimiter;
|
import com.google.common.util.concurrent.RateLimiter;
|
||||||
import com.magamochi.mangamochi.client.JikanClient;
|
import com.magamochi.mangamochi.client.JikanClient;
|
||||||
import com.magamochi.mangamochi.client.RapidFuzzClient;
|
import com.magamochi.mangamochi.model.dto.TitleMatchRequestDTO;
|
||||||
import com.magamochi.mangamochi.model.dto.UpdateMangaDataCommand;
|
import com.magamochi.mangamochi.model.dto.UpdateMangaDataCommand;
|
||||||
import com.magamochi.mangamochi.model.entity.Manga;
|
import com.magamochi.mangamochi.model.entity.Manga;
|
||||||
import com.magamochi.mangamochi.model.entity.MangaImportReview;
|
import com.magamochi.mangamochi.model.entity.MangaImportReview;
|
||||||
@ -21,8 +21,9 @@ public class MangaCreationService {
|
|||||||
private final MangaRepository mangaRepository;
|
private final MangaRepository mangaRepository;
|
||||||
private final MangaImportReviewRepository mangaImportReviewRepository;
|
private final MangaImportReviewRepository mangaImportReviewRepository;
|
||||||
|
|
||||||
|
private final TitleMatcherService titleMatcherService;
|
||||||
|
|
||||||
private final JikanClient jikanClient;
|
private final JikanClient jikanClient;
|
||||||
private final RapidFuzzClient rapidFuzzClient;
|
|
||||||
|
|
||||||
private final RateLimiter jikanRateLimiter;
|
private final RateLimiter jikanRateLimiter;
|
||||||
|
|
||||||
@ -42,18 +43,20 @@ public class MangaCreationService {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
var request =
|
var titleMatchResponse =
|
||||||
new RapidFuzzClient.Request(
|
titleMatcherService.findBestMatch(
|
||||||
title,
|
TitleMatchRequestDTO.builder()
|
||||||
jikanResults.stream()
|
.title(title)
|
||||||
.flatMap(
|
.options(
|
||||||
results ->
|
jikanResults.stream()
|
||||||
results.titles().stream()
|
.flatMap(
|
||||||
.map(JikanClient.SearchResponse.MangaData.TitleData::title))
|
results ->
|
||||||
.toList());
|
results.titles().stream()
|
||||||
|
.map(JikanClient.SearchResponse.MangaData.TitleData::title))
|
||||||
|
.toList())
|
||||||
|
.build());
|
||||||
|
|
||||||
var fuzzResults = rapidFuzzClient.mangaSearch(request);
|
if (!titleMatchResponse.isMatchFound()) {
|
||||||
if (!fuzzResults.match_found()) {
|
|
||||||
createMangaImportReview(title, url, provider);
|
createMangaImportReview(title, url, provider);
|
||||||
log.warn("No match found for manga with title {}", title);
|
log.warn("No match found for manga with title {}", title);
|
||||||
return null;
|
return null;
|
||||||
@ -66,7 +69,7 @@ public class MangaCreationService {
|
|||||||
results.titles().stream()
|
results.titles().stream()
|
||||||
.map(JikanClient.SearchResponse.MangaData.TitleData::title)
|
.map(JikanClient.SearchResponse.MangaData.TitleData::title)
|
||||||
.toList()
|
.toList()
|
||||||
.contains(fuzzResults.best_match()))
|
.contains(titleMatchResponse.getBestMatch()))
|
||||||
.findFirst();
|
.findFirst();
|
||||||
if (resultOptional.isEmpty()) {
|
if (resultOptional.isEmpty()) {
|
||||||
createMangaImportReview(title, url, provider);
|
createMangaImportReview(title, url, provider);
|
||||||
|
|||||||
@ -0,0 +1,67 @@
|
|||||||
|
package com.magamochi.mangamochi.service;
|
||||||
|
|
||||||
|
import static org.apache.commons.lang3.StringUtils.isBlank;
|
||||||
|
import static org.springframework.util.CollectionUtils.isEmpty;
|
||||||
|
|
||||||
|
import com.magamochi.mangamochi.model.dto.TitleMatchRequestDTO;
|
||||||
|
import com.magamochi.mangamochi.model.dto.TitleMatchResponseDTO;
|
||||||
|
import lombok.extern.log4j.Log4j2;
|
||||||
|
import org.apache.commons.text.similarity.LevenshteinDistance;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
@Log4j2
|
||||||
|
@Service
|
||||||
|
public class TitleMatcherService {
|
||||||
|
private final LevenshteinDistance levenshteinDistance = LevenshteinDistance.getDefaultInstance();
|
||||||
|
|
||||||
|
public TitleMatchResponseDTO findBestMatch(TitleMatchRequestDTO request) {
|
||||||
|
if (isBlank(request.getTitle()) || isEmpty(request.getOptions())) {
|
||||||
|
throw new IllegalArgumentException("Title and options are required");
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("Finding best match for {}. Options: {}", request.getTitle(), request.getOptions());
|
||||||
|
|
||||||
|
String bestMatch = null;
|
||||||
|
double bestScore = 0.0;
|
||||||
|
|
||||||
|
for (var option : request.getOptions()) {
|
||||||
|
var score = calculateSimilarityScore(request.getTitle(), option);
|
||||||
|
|
||||||
|
if (score > bestScore) {
|
||||||
|
bestScore = score;
|
||||||
|
bestMatch = option;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bestScore >= request.getThreshold()) {
|
||||||
|
log.info(
|
||||||
|
"Found best match for {}: {}. Similarity: {}", request.getTitle(), bestMatch, bestScore);
|
||||||
|
|
||||||
|
return TitleMatchResponseDTO.builder()
|
||||||
|
.matchFound(true)
|
||||||
|
.bestMatch(bestMatch)
|
||||||
|
.similarity(bestScore)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("No match found for {}. Threshold: {}", request.getTitle(), request.getThreshold());
|
||||||
|
|
||||||
|
return TitleMatchResponseDTO.builder().matchFound(false).build();
|
||||||
|
}
|
||||||
|
|
||||||
|
private double calculateSimilarityScore(String title, String option) {
|
||||||
|
var dist = levenshteinDistance.apply(title, option);
|
||||||
|
|
||||||
|
var maxLength = Math.max(title.length(), option.length());
|
||||||
|
if (maxLength == 0) {
|
||||||
|
return 100.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate similarity: 100 * (1 - (distance / max_length))
|
||||||
|
// This scales the distance into a percentage.
|
||||||
|
var similarity = 100.0 * (1.0 - ((double) dist / maxLength));
|
||||||
|
|
||||||
|
// Format to two decimal places for a cleaner result
|
||||||
|
return Math.round(similarity * 100.0) / 100.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -53,9 +53,6 @@ jwt:
|
|||||||
refresh-secret: MIV9ctIwrImmrZBjh9QueNEcDOLLVv9Rephii+0DKbk=
|
refresh-secret: MIV9ctIwrImmrZBjh9QueNEcDOLLVv9Rephii+0DKbk=
|
||||||
refresh-expiration: 2629746000
|
refresh-expiration: 2629746000
|
||||||
|
|
||||||
manga-matcher:
|
|
||||||
endpoint: ${MANGAMATCHER_ENDPOINT}
|
|
||||||
|
|
||||||
resilience4j:
|
resilience4j:
|
||||||
retry:
|
retry:
|
||||||
instances:
|
instances:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user