From 476b9858cfd2366d7d8507d3c1e8b77ed87b9d63 Mon Sep 17 00:00:00 2001 From: shinae1023 Date: Thu, 18 Jun 2026 13:42:03 +0900 Subject: [PATCH] =?UTF-8?q?[Fix]=20cohere=20=ED=8C=8C=EC=8B=B1=20=EB=B0=A9?= =?UTF-8?q?=EC=8B=9D=20=EB=B3=80=EA=B2=BD=20(#38)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/import_corpus.py | 2 +- .../service/CohereCorpusEmbeddingClient.java | 53 +++++++++++++------ 2 files changed, 37 insertions(+), 18 deletions(-) diff --git a/scripts/import_corpus.py b/scripts/import_corpus.py index 70cff29..1a4c9c3 100644 --- a/scripts/import_corpus.py +++ b/scripts/import_corpus.py @@ -208,7 +208,7 @@ def resolve_detail_classification_id(cur, cache: dict[tuple[str | None, str | No row = fetch_one( cur, """ - select dcm.detail_classification_id as id + select ccm.detail_classification_id as id from corpus_classification_mappings ccm join detail_classifications dcm on dcm.id = ccm.detail_classification_id where ccm.source_job_group_l1 = %s diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CohereCorpusEmbeddingClient.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CohereCorpusEmbeddingClient.java index 7d4ffb4..85d151d 100644 --- a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CohereCorpusEmbeddingClient.java +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CohereCorpusEmbeddingClient.java @@ -1,5 +1,7 @@ package com.jobdri.jobdri_api.domain.corpus.service; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; import lombok.RequiredArgsConstructor; import org.springframework.beans.factory.annotation.Value; import org.springframework.http.HttpHeaders; @@ -17,6 +19,7 @@ public class CohereCorpusEmbeddingClient implements CorpusEmbeddingClient { private final RestClient.Builder restClientBuilder; + private final ObjectMapper objectMapper; @Value("${cohere.api.key:}") private String cohereApiKey; @@ -47,7 +50,7 @@ public List embed(List texts, InputType inputType) { .defaultHeader(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_VALUE) .build(); - EmbedResponse response = client.post() + String responseBody = client.post() .uri("/v2/embed") .body(new EmbedRequest( texts, @@ -57,15 +60,9 @@ public List embed(List texts, InputType inputType) { List.of("float") )) .retrieve() - .body(EmbedResponse.class); + .body(String.class); - if (response == null || response.embeddings() == null || response.embeddings().floatEmbeddings() == null) { - throw new IllegalStateException("Cohere 임베딩 응답이 비어 있습니다."); - } - - return response.embeddings().floatEmbeddings().stream() - .map(this::toFloatArray) - .toList(); + return parseEmbeddings(responseBody); } private float[] toFloatArray(List values) { @@ -76,6 +73,36 @@ private float[] toFloatArray(List values) { return array; } + private List parseEmbeddings(String responseBody) { + if (!StringUtils.hasText(responseBody)) { + throw new IllegalStateException("Cohere 임베딩 응답이 비어 있습니다."); + } + + try { + JsonNode root = objectMapper.readTree(responseBody); + JsonNode floatEmbeddings = root.path("embeddings").path("float"); + if (!floatEmbeddings.isArray()) { + throw new IllegalStateException("Cohere 임베딩 응답 형식이 예상과 다릅니다."); + } + + List result = new java.util.ArrayList<>(); + for (JsonNode embeddingNode : floatEmbeddings) { + if (!embeddingNode.isArray()) { + throw new IllegalStateException("Cohere 임베딩 벡터 형식이 예상과 다릅니다."); + } + + float[] vector = new float[embeddingNode.size()]; + for (int i = 0; i < embeddingNode.size(); i++) { + vector[i] = embeddingNode.get(i).floatValue(); + } + result.add(vector); + } + return result; + } catch (Exception e) { + throw new IllegalStateException("Cohere 임베딩 응답 파싱에 실패했습니다.", e); + } + } + private record EmbedRequest( List texts, String model, @@ -85,12 +112,4 @@ private record EmbedRequest( ) { } - private record EmbedResponse(Embeddings embeddings) { - } - - private record Embeddings( - @com.fasterxml.jackson.annotation.JsonProperty("float") - List> floatEmbeddings - ) { - } }