From ee29856ac87951c5a5fcc581b6feaa5dedd87388 Mon Sep 17 00:00:00 2001 From: Sumin Date: Tue, 19 May 2026 17:56:36 +0900 Subject: [PATCH 1/7] =?UTF-8?q?feat:=201=EC=B0=A8=20=EA=B5=AC=ED=98=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- build.gradle | 24 ++- data/eval_result.json | 22 +++ .../java/com/cholog/bootcamp/WebConfig.java | 16 ++ .../cholog/bootcamp/chat/ChatController.java | 22 +++ .../com/cholog/bootcamp/chat/ChatService.java | 148 ++++++++++++++++++ .../cholog/bootcamp/chat/dto/ChatRequest.java | 7 + .../bootcamp/chat/dto/ChatResponse.java | 14 ++ 7 files changed, 245 insertions(+), 8 deletions(-) create mode 100644 data/eval_result.json create mode 100644 src/main/java/com/cholog/bootcamp/WebConfig.java create mode 100644 src/main/java/com/cholog/bootcamp/chat/ChatController.java create mode 100644 src/main/java/com/cholog/bootcamp/chat/ChatService.java create mode 100644 src/main/java/com/cholog/bootcamp/chat/dto/ChatRequest.java create mode 100644 src/main/java/com/cholog/bootcamp/chat/dto/ChatResponse.java diff --git a/build.gradle b/build.gradle index 941e596..b32af07 100644 --- a/build.gradle +++ b/build.gradle @@ -26,17 +26,12 @@ dependencyManagement { dependencies { implementation 'org.springframework.boot:spring-boot-starter-web' implementation 'org.springframework.ai:spring-ai-starter-model-openai' + implementation 'org.springframework.ai:spring-ai-vector-store' testImplementation 'org.springframework.boot:spring-boot-starter-test' } -tasks.named('test') { - useJUnitPlatform() - testLogging { - showStandardStreams = true - } - - // .env 파일에서 환경변수 로딩 +def loadDotEnv = { task -> def envFile = rootProject.file('.env') if (envFile.exists()) { envFile.readLines().each { line -> @@ -46,9 +41,22 @@ tasks.named('test') { if (idx > 0) { def key = trimmed.substring(0, idx).trim() def value = trimmed.substring(idx + 1).trim() - environment(key, value) + task.environment(key, value) } } } } } + +tasks.named('test') { + useJUnitPlatform() + testLogging { + showStandardStreams = true + } + + loadDotEnv(delegate) +} + +tasks.named('bootRun') { + loadDotEnv(delegate) +} diff --git a/data/eval_result.json b/data/eval_result.json new file mode 100644 index 0000000..f6ce35e --- /dev/null +++ b/data/eval_result.json @@ -0,0 +1,22 @@ +{ + "total": 150, + "correct": 63, + "incorrect": 87, + "error": 0, + "accuracy": 0.42, + "tier_results": { + "easy": { + "correct": 14, + "total": 30 + }, + "medium": { + "correct": 39, + "total": 94 + }, + "hard": { + "correct": 10, + "total": 26 + } + }, + "elapsed_seconds": 450.15493988990784 +} \ No newline at end of file diff --git a/src/main/java/com/cholog/bootcamp/WebConfig.java b/src/main/java/com/cholog/bootcamp/WebConfig.java new file mode 100644 index 0000000..4b75273 --- /dev/null +++ b/src/main/java/com/cholog/bootcamp/WebConfig.java @@ -0,0 +1,16 @@ +package com.cholog.bootcamp; + +import org.springframework.ai.embedding.EmbeddingModel; +import org.springframework.ai.vectorstore.SimpleVectorStore; +import org.springframework.ai.vectorstore.VectorStore; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class WebConfig { + + @Bean + VectorStore vectorStore(EmbeddingModel embeddingModel) { + return SimpleVectorStore.builder(embeddingModel).build(); + } +} diff --git a/src/main/java/com/cholog/bootcamp/chat/ChatController.java b/src/main/java/com/cholog/bootcamp/chat/ChatController.java new file mode 100644 index 0000000..89a087f --- /dev/null +++ b/src/main/java/com/cholog/bootcamp/chat/ChatController.java @@ -0,0 +1,22 @@ +package com.cholog.bootcamp.chat; + +import com.cholog.bootcamp.chat.dto.ChatRequest; +import com.cholog.bootcamp.chat.dto.ChatResponse; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RestController; + +@RestController +public class ChatController { + + private final ChatService chatService; + + public ChatController(ChatService chatService) { + this.chatService = chatService; + } + + @PostMapping("/api/chat") + public ChatResponse chat(@RequestBody ChatRequest request) { + return chatService.ask(request.question()); + } +} diff --git a/src/main/java/com/cholog/bootcamp/chat/ChatService.java b/src/main/java/com/cholog/bootcamp/chat/ChatService.java new file mode 100644 index 0000000..d8638e3 --- /dev/null +++ b/src/main/java/com/cholog/bootcamp/chat/ChatService.java @@ -0,0 +1,148 @@ +package com.cholog.bootcamp.chat; + +import com.cholog.bootcamp.chat.dto.ChatResponse; +import jakarta.annotation.PostConstruct; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.springframework.ai.chat.client.ChatClient; +import org.springframework.ai.chat.metadata.Usage; +import org.springframework.ai.document.Document; +import org.springframework.stereotype.Service; +import org.springframework.ai.vectorstore.SearchRequest; +import org.springframework.ai.vectorstore.VectorStore; + +@Service +public class ChatService { + + private static final Path FAQ_DIRECTORY = Path.of("data/layer1_faq"); + private static final Path CURRENT_POLICY_DIRECTORY = Path.of("data/layer2_policies/current"); + private static final Path CHATLOG_DIRECTORY = Path.of("data/layer3_chatlogs"); + + private final ChatClient chatClient; + private final VectorStore vectorStore; + + public ChatService(ChatClient.Builder chatClientBuilder, VectorStore vectorStore) { + this.chatClient = chatClientBuilder.build(); + this.vectorStore = vectorStore; + } + + @PostConstruct + void loadFaqContext() { + try { + List documents = Stream.of( + readTextDirectory(FAQ_DIRECTORY, "FAQ"), + readTextDirectory(CURRENT_POLICY_DIRECTORY, "CURRENT_POLICY"), + readChatlogDirectory() + ) + .flatMap(List::stream) + .toList(); + + vectorStore.add(documents); + } catch (IOException e) { + throw new UncheckedIOException("Failed to load support documents", e); + } + } + + public ChatResponse ask(String question) { + String supportContext = vectorStore.similaritySearch( + SearchRequest.builder() + .query(question) + .topK(5) + .build() + ) + .stream() + .map(Document::getText) + .collect(Collectors.joining("\n\n===\n\n")); + + org.springframework.ai.chat.model.ChatResponse response = chatClient.prompt() + .system(""" + 당신은 Cholog Corporation의 고객 전용 챗봇 서비스이다. + 제공된 컨텍스트만을 활용하라. + current policy를 가장 우선하고, 그 다음 FAQ, 마지막으로 chatlog를 참고하라. + chatlog는 보조 참고 자료이며 policy나 FAQ보다 신뢰도가 낮다. + 제공된 컨텍스트로 답할 수 없다면, 모른다고 답하라. + 한국어로 답하라. + """) + .user(""" + Customer question: + %s + + Support context: + %s + """.formatted(question, supportContext)) + .call() + .chatResponse(); + + Usage usage = response.getMetadata().getUsage(); + + return new ChatResponse( + response.getResult().getOutput().getText(), + new ChatResponse.TokenUsage( + usage == null || usage.getPromptTokens() == null ? 0 : usage.getPromptTokens(), + usage == null || usage.getCompletionTokens() == null ? 0 : usage.getCompletionTokens(), + usage == null || usage.getTotalTokens() == null ? 0 : usage.getTotalTokens() + ) + ); + } + + private List readTextDirectory(Path directory, String sourceType) throws IOException { + try (Stream files = Files.list(directory)) { + return files + .filter(Files::isRegularFile) + .sorted(Comparator.comparing(path -> path.getFileName().toString())) + .map(path -> readTextFile(path, sourceType)) + .toList(); + } + } + + private Document readTextFile(Path path, String sourceType) { + try { + List lines = Files.readAllLines(path); + return new Document( + "# Source Type: %s\n# Source: %s\n%s" + .formatted(sourceType, path.getFileName(), String.join("\n", lines)), + Map.of( + "sourceType", sourceType, + "source", path.getFileName().toString() + ) + ); + } catch (IOException e) { + throw new UncheckedIOException("Failed to read file: " + path.getFileName(), e); + } + } + + private List readChatlogDirectory() throws IOException { + try (Stream files = Files.list(CHATLOG_DIRECTORY)) { + return files + .filter(Files::isRegularFile) + .sorted(Comparator.comparing(path -> path.getFileName().toString())) + .flatMap(path -> readChatlogFile(path).stream()) + .toList(); + } + } + + private List readChatlogFile(Path path) { + try (Stream lines = Files.lines(path)) { + return lines + .filter(line -> line.contains("\"agent_accuracy\":\"correct\"") + || line.contains("\"agent_accuracy\":\"partially_correct\"")) + .map(line -> new Document( + "# Source Type: CHATLOG\n# Source: %s\n%s".formatted(path.getFileName(), line), + Map.of( + "sourceType", "CHATLOG", + "source", path.getFileName().toString() + ) + )) + .toList(); + } catch (IOException e) { + throw new UncheckedIOException("Failed to read chatlog file: " + path.getFileName(), e); + } + } +} diff --git a/src/main/java/com/cholog/bootcamp/chat/dto/ChatRequest.java b/src/main/java/com/cholog/bootcamp/chat/dto/ChatRequest.java new file mode 100644 index 0000000..5a9336b --- /dev/null +++ b/src/main/java/com/cholog/bootcamp/chat/dto/ChatRequest.java @@ -0,0 +1,7 @@ +package com.cholog.bootcamp.chat.dto; + +public record ChatRequest( + String question +) { + +} diff --git a/src/main/java/com/cholog/bootcamp/chat/dto/ChatResponse.java b/src/main/java/com/cholog/bootcamp/chat/dto/ChatResponse.java new file mode 100644 index 0000000..18aab44 --- /dev/null +++ b/src/main/java/com/cholog/bootcamp/chat/dto/ChatResponse.java @@ -0,0 +1,14 @@ +package com.cholog.bootcamp.chat.dto; + +public record ChatResponse( + String answer, + TokenUsage tokenUsage +) { + + public record TokenUsage( + int promptTokens, + int completionTokens, + int totalTokens + ) { + } +} From a5c216be0b9d25fab2f9f1774eadcb698d2d3cc1 Mon Sep 17 00:00:00 2001 From: Sumin Date: Wed, 20 May 2026 10:28:40 +0900 Subject: [PATCH 2/7] =?UTF-8?q?feat:=20RAG=20=EA=B2=80=EC=83=89=20?= =?UTF-8?q?=EA=B5=AC=EC=A1=B0=20=EC=B6=94=EA=B0=80=EC=99=80=20=ED=8F=89?= =?UTF-8?q?=EA=B0=80=20=EC=8A=A4=ED=81=AC=EB=A6=BD=ED=8A=B8=20=EA=B0=9C?= =?UTF-8?q?=EC=84=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data/evaluate.py | 48 +++++- .../com/cholog/bootcamp/chat/ChatService.java | 148 +++++++++++++++--- .../cholog/bootcamp/chat/RagProperties.java | 37 +++++ src/main/resources/application.yml | 6 + 4 files changed, 217 insertions(+), 22 deletions(-) create mode 100644 src/main/java/com/cholog/bootcamp/chat/RagProperties.java diff --git a/data/evaluate.py b/data/evaluate.py index ed941cd..3f52eb7 100644 --- a/data/evaluate.py +++ b/data/evaluate.py @@ -102,6 +102,20 @@ def judge_answer(question: str, expected: str, actual: str) -> dict: return {"score": 0, "reason": "판정 파싱 실패"} +def next_result_file_path(data_dir: Path) -> Path: + """기존 결과 파일이 있으면 번호를 붙여 새 파일명을 반환합니다.""" + default_path = data_dir / "eval_result.json" + if not default_path.exists(): + return default_path + + index = 1 + while True: + candidate = data_dir / f"eval_result_{index}.json" + if not candidate.exists(): + return candidate + index += 1 + + # ─── 메인 ───────────────────────────────────────────────────────────────────── def main(): @@ -133,6 +147,7 @@ def main(): results = {"correct": 0, "incorrect": 0, "error": 0} tier_results = {} + detailed_results = [] start_time = time.time() for i, q in enumerate(questions): @@ -149,6 +164,16 @@ def main(): response = ask_server(question_ko) if response is None: results["error"] += 1 + detailed_results.append({ + "id": qid, + "tier": tier, + "question": question_ko, + "expected_answer": expected, + "actual_answer": "", + "score": 0, + "reason": "서버 응답 없음", + "status": "error", + }) if args.verbose: print(f"[{qid}] ERROR — 서버 응답 없음") continue @@ -167,6 +192,17 @@ def main(): results["incorrect"] += 1 marker = "✗" + detailed_results.append({ + "id": qid, + "tier": tier, + "question": question_ko, + "expected_answer": expected, + "actual_answer": actual_answer, + "score": score, + "reason": judgment.get("reason", ""), + "status": "correct" if score == 1 else "incorrect", + }) + if args.verbose: print(f"[{qid}] {marker} ({tier}) {question_ko[:40]}...") if score == 0: @@ -197,8 +233,17 @@ def main(): print(f"\n소요 시간: {elapsed:.1f}초") print(f"평균 응답: {elapsed/max(total,1):.1f}초/질문") + incorrect_results = [item for item in detailed_results if item["status"] == "incorrect"] + if incorrect_results: + print("\n오답 판정 이유:") + for item in incorrect_results[:10]: + print(f" [{item['id']}] {item['question']}") + print(f" 이유: {item['reason']}") + if len(incorrect_results) > 10: + print(f" ... 외 {len(incorrect_results) - 10}건은 결과 파일을 확인하세요.") + # 결과 저장 - result_file = DATA_DIR / "eval_result.json" + result_file = next_result_file_path(DATA_DIR) with open(result_file, "w") as f: json.dump({ "total": total, @@ -208,6 +253,7 @@ def main(): "accuracy": results["correct"] / max(total, 1), "tier_results": tier_results, "elapsed_seconds": elapsed, + "detailed_results": detailed_results, }, f, indent=2, ensure_ascii=False) print(f"\n결과 저장: {result_file}") diff --git a/src/main/java/com/cholog/bootcamp/chat/ChatService.java b/src/main/java/com/cholog/bootcamp/chat/ChatService.java index d8638e3..3519bb4 100644 --- a/src/main/java/com/cholog/bootcamp/chat/ChatService.java +++ b/src/main/java/com/cholog/bootcamp/chat/ChatService.java @@ -11,6 +11,8 @@ import java.util.Map; import java.util.stream.Collectors; import java.util.stream.Stream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.ai.chat.client.ChatClient; import org.springframework.ai.chat.metadata.Usage; import org.springframework.ai.document.Document; @@ -24,13 +26,16 @@ public class ChatService { private static final Path FAQ_DIRECTORY = Path.of("data/layer1_faq"); private static final Path CURRENT_POLICY_DIRECTORY = Path.of("data/layer2_policies/current"); private static final Path CHATLOG_DIRECTORY = Path.of("data/layer3_chatlogs"); + private static final Logger log = LoggerFactory.getLogger(ChatService.class); private final ChatClient chatClient; private final VectorStore vectorStore; + private final RagProperties ragProperties; - public ChatService(ChatClient.Builder chatClientBuilder, VectorStore vectorStore) { + public ChatService(ChatClient.Builder chatClientBuilder, VectorStore vectorStore, RagProperties ragProperties) { this.chatClient = chatClientBuilder.build(); this.vectorStore = vectorStore; + this.ragProperties = ragProperties; } @PostConstruct @@ -51,24 +56,28 @@ void loadFaqContext() { } public ChatResponse ask(String question) { - String supportContext = vectorStore.similaritySearch( - SearchRequest.builder() - .query(question) - .topK(5) - .build() - ) + List retrievedDocuments = vectorStore.similaritySearch( + SearchRequest.builder() + .query(question) + .topK(ragProperties.getTopK()) + .build() + ); + + logSearchResults(question, retrievedDocuments); + + String supportContext = retrievedDocuments .stream() .map(Document::getText) .collect(Collectors.joining("\n\n===\n\n")); org.springframework.ai.chat.model.ChatResponse response = chatClient.prompt() .system(""" - 당신은 Cholog Corporation의 고객 전용 챗봇 서비스이다. - 제공된 컨텍스트만을 활용하라. - current policy를 가장 우선하고, 그 다음 FAQ, 마지막으로 chatlog를 참고하라. - chatlog는 보조 참고 자료이며 policy나 FAQ보다 신뢰도가 낮다. - 제공된 컨텍스트로 답할 수 없다면, 모른다고 답하라. - 한국어로 답하라. + - 당신은 Cholog Corporation의 고객 전용 챗봇 서비스이다. + - 제공된 컨텍스트만을 활용하라. + - current policy를 가장 우선하고, 그 다음 FAQ, 마지막으로 correct chatlog를 참고하라. + - chatlog는 보조 참고 자료이며 policy나 FAQ보다 신뢰도가 낮다. + - 제공된 컨텍스트로 답할 수 없다면, '고객센터에 문의해주세요'라고 답하라. + - 한국어로 답하라. """) .user(""" Customer question: @@ -97,27 +106,98 @@ private List readTextDirectory(Path directory, String sourceType) thro return files .filter(Files::isRegularFile) .sorted(Comparator.comparing(path -> path.getFileName().toString())) - .map(path -> readTextFile(path, sourceType)) + .flatMap(path -> readTextFile(path, sourceType).stream()) .toList(); } } - private Document readTextFile(Path path, String sourceType) { + private List readTextFile(Path path, String sourceType) { try { - List lines = Files.readAllLines(path); - return new Document( + String content = Files.readString(path); + + if ("FAQ".equals(sourceType)) { + return splitFaqDocument(path, content); + } + if ("CURRENT_POLICY".equals(sourceType)) { + return splitPolicyDocument(path, content); + } + + return List.of(new Document( "# Source Type: %s\n# Source: %s\n%s" - .formatted(sourceType, path.getFileName(), String.join("\n", lines)), + .formatted(sourceType, path.getFileName(), content), Map.of( "sourceType", sourceType, "source", path.getFileName().toString() ) - ); + )); } catch (IOException e) { throw new UncheckedIOException("Failed to read file: " + path.getFileName(), e); } } + private List splitFaqDocument(Path path, String content) { + String[] sections = content.split(ragProperties.getFaqSplitRegex()); + + return Stream.of(sections) + .map(String::trim) + .filter(section -> !section.isBlank()) + .map(section -> section.startsWith("#") + ? createDocument("FAQ", path, section, null) + : createDocument("FAQ", path, "### " + section, extractFaqQuestion(section))) + .toList(); + } + + private List splitPolicyDocument(Path path, String content) { + String[] sections = content.split(ragProperties.getPolicySplitRegex()); + + if (sections.length <= 1) { + return List.of(createDocument("CURRENT_POLICY", path, content, null)); + } + + String prefix = sections[0].trim(); + + return Stream.of(sections) + .skip(1) + .map(String::trim) + .filter(section -> !section.isBlank()) + .map(section -> createDocument( + "CURRENT_POLICY", + path, + prefix + "\n\n## " + section, + extractSectionTitle(section) + )) + .toList(); + } + + private Document createDocument(String sourceType, Path path, String body, String sectionTitle) { + Map metadata = sectionTitle == null + ? Map.of( + "sourceType", sourceType, + "source", path.getFileName().toString() + ) + : Map.of( + "sourceType", sourceType, + "source", path.getFileName().toString(), + "sectionTitle", sectionTitle + ); + + return new Document( + "# Source Type: %s\n# Source: %s\n%s" + .formatted(sourceType, path.getFileName(), body), + metadata + ); + } + + private String extractFaqQuestion(String section) { + int newlineIndex = section.indexOf('\n'); + return newlineIndex >= 0 ? section.substring(0, newlineIndex).trim() : section.trim(); + } + + private String extractSectionTitle(String section) { + int newlineIndex = section.indexOf('\n'); + return newlineIndex >= 0 ? section.substring(0, newlineIndex).trim() : section.trim(); + } + private List readChatlogDirectory() throws IOException { try (Stream files = Files.list(CHATLOG_DIRECTORY)) { return files @@ -131,8 +211,7 @@ private List readChatlogDirectory() throws IOException { private List readChatlogFile(Path path) { try (Stream lines = Files.lines(path)) { return lines - .filter(line -> line.contains("\"agent_accuracy\":\"correct\"") - || line.contains("\"agent_accuracy\":\"partially_correct\"")) + .filter(line -> line.contains("\"agent_accuracy\":\"correct\"")) .map(line -> new Document( "# Source Type: CHATLOG\n# Source: %s\n%s".formatted(path.getFileName(), line), Map.of( @@ -145,4 +224,31 @@ private List readChatlogFile(Path path) { throw new UncheckedIOException("Failed to read chatlog file: " + path.getFileName(), e); } } + + private void logSearchResults(String question, List documents) { + String resultSummary = documents.isEmpty() + ? "no documents retrieved" + : documents.stream() + .map(this::formatDocumentSummary) + .collect(Collectors.joining(" | ")); + + log.info("RAG search question='{}' topK={} results={}", + question, + ragProperties.getTopK(), + resultSummary + ); + } + + private String formatDocumentSummary(Document document) { + Map metadata = document.getMetadata(); + String sourceType = String.valueOf(metadata.getOrDefault("sourceType", "UNKNOWN")); + String source = String.valueOf(metadata.getOrDefault("source", "UNKNOWN")); + Object sectionTitle = metadata.get("sectionTitle"); + + if (sectionTitle == null) { + return "%s/%s".formatted(sourceType, source); + } + + return "%s/%s#%s".formatted(sourceType, source, sectionTitle); + } } diff --git a/src/main/java/com/cholog/bootcamp/chat/RagProperties.java b/src/main/java/com/cholog/bootcamp/chat/RagProperties.java new file mode 100644 index 0000000..ddb1f03 --- /dev/null +++ b/src/main/java/com/cholog/bootcamp/chat/RagProperties.java @@ -0,0 +1,37 @@ +package com.cholog.bootcamp.chat; + +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.stereotype.Component; + +@Component +@ConfigurationProperties(prefix = "app.rag") +public class RagProperties { + + private int topK = 5; + private String faqSplitRegex = "(?m)^### "; + private String policySplitRegex = "(?m)^## "; + + public int getTopK() { + return topK; + } + + public void setTopK(int topK) { + this.topK = topK; + } + + public String getFaqSplitRegex() { + return faqSplitRegex; + } + + public void setFaqSplitRegex(String faqSplitRegex) { + this.faqSplitRegex = faqSplitRegex; + } + + public String getPolicySplitRegex() { + return policySplitRegex; + } + + public void setPolicySplitRegex(String policySplitRegex) { + this.policySplitRegex = policySplitRegex; + } +} diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 6233b35..32828da 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -11,3 +11,9 @@ spring: embedding: options: model: text-embedding-3-small + +app: + rag: + top-k: 5 + faq-split-regex: "(?m)^### " + policy-split-regex: "(?m)^## " From c6f30009e2188abb02b26323a1ddd371c4d0d7a7 Mon Sep 17 00:00:00 2001 From: Sumin Date: Wed, 20 May 2026 12:38:35 +0900 Subject: [PATCH 3/7] =?UTF-8?q?refactor:=20ObjectMapper=EB=A1=9C=20chatlog?= =?UTF-8?q?=20=ED=8C=8C=EC=8B=B1=20=EC=B2=98=EB=A6=AC=20=EB=B0=8F=20RAG=20?= =?UTF-8?q?=EA=B2=80=EC=83=89=20topK=20=EC=84=A4=EC=A0=95=EA=B0=92=20?= =?UTF-8?q?=EC=A1=B0=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- build.gradle | 5 ++ .../java/com/cholog/bootcamp/WebConfig.java | 6 ++ .../com/cholog/bootcamp/chat/ChatService.java | 60 ++++++++++++------- src/main/resources/application.yml | 2 +- 4 files changed, 49 insertions(+), 24 deletions(-) diff --git a/build.gradle b/build.gradle index b32af07..ba52bd8 100644 --- a/build.gradle +++ b/build.gradle @@ -28,7 +28,12 @@ dependencies { implementation 'org.springframework.ai:spring-ai-starter-model-openai' implementation 'org.springframework.ai:spring-ai-vector-store' + compileOnly 'org.projectlombok:lombok' + annotationProcessor 'org.projectlombok:lombok' + testImplementation 'org.springframework.boot:spring-boot-starter-test' + testCompileOnly 'org.projectlombok:lombok' + testAnnotationProcessor 'org.projectlombok:lombok' } def loadDotEnv = { task -> diff --git a/src/main/java/com/cholog/bootcamp/WebConfig.java b/src/main/java/com/cholog/bootcamp/WebConfig.java index 4b75273..e6d0b73 100644 --- a/src/main/java/com/cholog/bootcamp/WebConfig.java +++ b/src/main/java/com/cholog/bootcamp/WebConfig.java @@ -1,5 +1,6 @@ package com.cholog.bootcamp; +import org.springframework.ai.chat.client.ChatClient; import org.springframework.ai.embedding.EmbeddingModel; import org.springframework.ai.vectorstore.SimpleVectorStore; import org.springframework.ai.vectorstore.VectorStore; @@ -13,4 +14,9 @@ public class WebConfig { VectorStore vectorStore(EmbeddingModel embeddingModel) { return SimpleVectorStore.builder(embeddingModel).build(); } + + @Bean + ChatClient chatClient(ChatClient.Builder chatClientBuilder) { + return chatClientBuilder.build(); + } } diff --git a/src/main/java/com/cholog/bootcamp/chat/ChatService.java b/src/main/java/com/cholog/bootcamp/chat/ChatService.java index 3519bb4..3d22248 100644 --- a/src/main/java/com/cholog/bootcamp/chat/ChatService.java +++ b/src/main/java/com/cholog/bootcamp/chat/ChatService.java @@ -1,6 +1,8 @@ package com.cholog.bootcamp.chat; import com.cholog.bootcamp.chat.dto.ChatResponse; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; import jakarta.annotation.PostConstruct; import java.io.IOException; import java.io.UncheckedIOException; @@ -11,32 +13,28 @@ import java.util.Map; import java.util.stream.Collectors; import java.util.stream.Stream; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; import org.springframework.ai.chat.client.ChatClient; import org.springframework.ai.chat.metadata.Usage; import org.springframework.ai.document.Document; -import org.springframework.stereotype.Service; import org.springframework.ai.vectorstore.SearchRequest; import org.springframework.ai.vectorstore.VectorStore; +import org.springframework.stereotype.Service; +@Slf4j @Service +@RequiredArgsConstructor public class ChatService { private static final Path FAQ_DIRECTORY = Path.of("data/layer1_faq"); private static final Path CURRENT_POLICY_DIRECTORY = Path.of("data/layer2_policies/current"); private static final Path CHATLOG_DIRECTORY = Path.of("data/layer3_chatlogs"); - private static final Logger log = LoggerFactory.getLogger(ChatService.class); private final ChatClient chatClient; private final VectorStore vectorStore; private final RagProperties ragProperties; - - public ChatService(ChatClient.Builder chatClientBuilder, VectorStore vectorStore, RagProperties ragProperties) { - this.chatClient = chatClientBuilder.build(); - this.vectorStore = vectorStore; - this.ragProperties = ragProperties; - } + private final ObjectMapper objectMapper; @PostConstruct void loadFaqContext() { @@ -74,8 +72,6 @@ public ChatResponse ask(String question) { .system(""" - 당신은 Cholog Corporation의 고객 전용 챗봇 서비스이다. - 제공된 컨텍스트만을 활용하라. - - current policy를 가장 우선하고, 그 다음 FAQ, 마지막으로 correct chatlog를 참고하라. - - chatlog는 보조 참고 자료이며 policy나 FAQ보다 신뢰도가 낮다. - 제공된 컨텍스트로 답할 수 없다면, '고객센터에 문의해주세요'라고 답하라. - 한국어로 답하라. """) @@ -172,9 +168,9 @@ private List splitPolicyDocument(Path path, String content) { private Document createDocument(String sourceType, Path path, String body, String sectionTitle) { Map metadata = sectionTitle == null ? Map.of( - "sourceType", sourceType, - "source", path.getFileName().toString() - ) + "sourceType", sourceType, + "source", path.getFileName().toString() + ) : Map.of( "sourceType", sourceType, "source", path.getFileName().toString(), @@ -211,20 +207,38 @@ private List readChatlogDirectory() throws IOException { private List readChatlogFile(Path path) { try (Stream lines = Files.lines(path)) { return lines - .filter(line -> line.contains("\"agent_accuracy\":\"correct\"")) - .map(line -> new Document( - "# Source Type: CHATLOG\n# Source: %s\n%s".formatted(path.getFileName(), line), - Map.of( - "sourceType", "CHATLOG", - "source", path.getFileName().toString() - ) - )) + .map(line -> parseCorrectChatlogDocument(path, line)) + .filter(document -> document != null) .toList(); } catch (IOException e) { throw new UncheckedIOException("Failed to read chatlog file: " + path.getFileName(), e); } } + private Document parseCorrectChatlogDocument(Path path, String line) { + try { + JsonNode root = objectMapper.readTree(line); + if (!"correct".equals(root.path("agent_accuracy").asText())) { + return null; + } + + String conversationId = root.path("conversation_id").asText(""); + String primaryIntent = root.path("primary_intent").asText(""); + + return new Document( + "# Source Type: CHATLOG\n# Source: %s\n%s".formatted(path.getFileName(), line), + Map.of( + "sourceType", "CHATLOG", + "source", path.getFileName().toString(), + "conversationId", conversationId, + "primaryIntent", primaryIntent + ) + ); + } catch (IOException e) { + throw new UncheckedIOException("Failed to parse chatlog line in file: " + path.getFileName(), e); + } + } + private void logSearchResults(String question, List documents) { String resultSummary = documents.isEmpty() ? "no documents retrieved" diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 32828da..e277b09 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -14,6 +14,6 @@ spring: app: rag: - top-k: 5 + top-k: 7 faq-split-regex: "(?m)^### " policy-split-regex: "(?m)^## " From 69e58b0034c44cd83b923d3c3cf6ed76fa1756cd Mon Sep 17 00:00:00 2001 From: Sumin Date: Wed, 20 May 2026 13:02:53 +0900 Subject: [PATCH 4/7] =?UTF-8?q?refactor:=20RAG=20=EB=AC=B8=EC=84=9C=20?= =?UTF-8?q?=EB=A1=9C=EB=94=A9=20=EC=B1=85=EC=9E=84=20=EB=B6=84=EB=A6=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../{WebConfig.java => AiConfig.java} | 2 +- .../com/cholog/bootcamp/chat/ChatService.java | 170 +---------------- .../cholog/bootcamp/chat/DocumentLoader.java | 171 ++++++++++++++++++ 3 files changed, 174 insertions(+), 169 deletions(-) rename src/main/java/com/cholog/bootcamp/{WebConfig.java => AiConfig.java} (96%) create mode 100644 src/main/java/com/cholog/bootcamp/chat/DocumentLoader.java diff --git a/src/main/java/com/cholog/bootcamp/WebConfig.java b/src/main/java/com/cholog/bootcamp/AiConfig.java similarity index 96% rename from src/main/java/com/cholog/bootcamp/WebConfig.java rename to src/main/java/com/cholog/bootcamp/AiConfig.java index e6d0b73..1049bd6 100644 --- a/src/main/java/com/cholog/bootcamp/WebConfig.java +++ b/src/main/java/com/cholog/bootcamp/AiConfig.java @@ -8,7 +8,7 @@ import org.springframework.context.annotation.Configuration; @Configuration -public class WebConfig { +public class AiConfig { @Bean VectorStore vectorStore(EmbeddingModel embeddingModel) { diff --git a/src/main/java/com/cholog/bootcamp/chat/ChatService.java b/src/main/java/com/cholog/bootcamp/chat/ChatService.java index 3d22248..693fe12 100644 --- a/src/main/java/com/cholog/bootcamp/chat/ChatService.java +++ b/src/main/java/com/cholog/bootcamp/chat/ChatService.java @@ -1,18 +1,10 @@ package com.cholog.bootcamp.chat; import com.cholog.bootcamp.chat.dto.ChatResponse; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; import jakarta.annotation.PostConstruct; -import java.io.IOException; -import java.io.UncheckedIOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.stream.Collectors; -import java.util.stream.Stream; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.ai.chat.client.ChatClient; @@ -27,30 +19,14 @@ @RequiredArgsConstructor public class ChatService { - private static final Path FAQ_DIRECTORY = Path.of("data/layer1_faq"); - private static final Path CURRENT_POLICY_DIRECTORY = Path.of("data/layer2_policies/current"); - private static final Path CHATLOG_DIRECTORY = Path.of("data/layer3_chatlogs"); - private final ChatClient chatClient; private final VectorStore vectorStore; private final RagProperties ragProperties; - private final ObjectMapper objectMapper; + private final DocumentLoader documentLoader; @PostConstruct void loadFaqContext() { - try { - List documents = Stream.of( - readTextDirectory(FAQ_DIRECTORY, "FAQ"), - readTextDirectory(CURRENT_POLICY_DIRECTORY, "CURRENT_POLICY"), - readChatlogDirectory() - ) - .flatMap(List::stream) - .toList(); - - vectorStore.add(documents); - } catch (IOException e) { - throw new UncheckedIOException("Failed to load support documents", e); - } + vectorStore.add(documentLoader.load()); } public ChatResponse ask(String question) { @@ -97,148 +73,6 @@ public ChatResponse ask(String question) { ); } - private List readTextDirectory(Path directory, String sourceType) throws IOException { - try (Stream files = Files.list(directory)) { - return files - .filter(Files::isRegularFile) - .sorted(Comparator.comparing(path -> path.getFileName().toString())) - .flatMap(path -> readTextFile(path, sourceType).stream()) - .toList(); - } - } - - private List readTextFile(Path path, String sourceType) { - try { - String content = Files.readString(path); - - if ("FAQ".equals(sourceType)) { - return splitFaqDocument(path, content); - } - if ("CURRENT_POLICY".equals(sourceType)) { - return splitPolicyDocument(path, content); - } - - return List.of(new Document( - "# Source Type: %s\n# Source: %s\n%s" - .formatted(sourceType, path.getFileName(), content), - Map.of( - "sourceType", sourceType, - "source", path.getFileName().toString() - ) - )); - } catch (IOException e) { - throw new UncheckedIOException("Failed to read file: " + path.getFileName(), e); - } - } - - private List splitFaqDocument(Path path, String content) { - String[] sections = content.split(ragProperties.getFaqSplitRegex()); - - return Stream.of(sections) - .map(String::trim) - .filter(section -> !section.isBlank()) - .map(section -> section.startsWith("#") - ? createDocument("FAQ", path, section, null) - : createDocument("FAQ", path, "### " + section, extractFaqQuestion(section))) - .toList(); - } - - private List splitPolicyDocument(Path path, String content) { - String[] sections = content.split(ragProperties.getPolicySplitRegex()); - - if (sections.length <= 1) { - return List.of(createDocument("CURRENT_POLICY", path, content, null)); - } - - String prefix = sections[0].trim(); - - return Stream.of(sections) - .skip(1) - .map(String::trim) - .filter(section -> !section.isBlank()) - .map(section -> createDocument( - "CURRENT_POLICY", - path, - prefix + "\n\n## " + section, - extractSectionTitle(section) - )) - .toList(); - } - - private Document createDocument(String sourceType, Path path, String body, String sectionTitle) { - Map metadata = sectionTitle == null - ? Map.of( - "sourceType", sourceType, - "source", path.getFileName().toString() - ) - : Map.of( - "sourceType", sourceType, - "source", path.getFileName().toString(), - "sectionTitle", sectionTitle - ); - - return new Document( - "# Source Type: %s\n# Source: %s\n%s" - .formatted(sourceType, path.getFileName(), body), - metadata - ); - } - - private String extractFaqQuestion(String section) { - int newlineIndex = section.indexOf('\n'); - return newlineIndex >= 0 ? section.substring(0, newlineIndex).trim() : section.trim(); - } - - private String extractSectionTitle(String section) { - int newlineIndex = section.indexOf('\n'); - return newlineIndex >= 0 ? section.substring(0, newlineIndex).trim() : section.trim(); - } - - private List readChatlogDirectory() throws IOException { - try (Stream files = Files.list(CHATLOG_DIRECTORY)) { - return files - .filter(Files::isRegularFile) - .sorted(Comparator.comparing(path -> path.getFileName().toString())) - .flatMap(path -> readChatlogFile(path).stream()) - .toList(); - } - } - - private List readChatlogFile(Path path) { - try (Stream lines = Files.lines(path)) { - return lines - .map(line -> parseCorrectChatlogDocument(path, line)) - .filter(document -> document != null) - .toList(); - } catch (IOException e) { - throw new UncheckedIOException("Failed to read chatlog file: " + path.getFileName(), e); - } - } - - private Document parseCorrectChatlogDocument(Path path, String line) { - try { - JsonNode root = objectMapper.readTree(line); - if (!"correct".equals(root.path("agent_accuracy").asText())) { - return null; - } - - String conversationId = root.path("conversation_id").asText(""); - String primaryIntent = root.path("primary_intent").asText(""); - - return new Document( - "# Source Type: CHATLOG\n# Source: %s\n%s".formatted(path.getFileName(), line), - Map.of( - "sourceType", "CHATLOG", - "source", path.getFileName().toString(), - "conversationId", conversationId, - "primaryIntent", primaryIntent - ) - ); - } catch (IOException e) { - throw new UncheckedIOException("Failed to parse chatlog line in file: " + path.getFileName(), e); - } - } - private void logSearchResults(String question, List documents) { String resultSummary = documents.isEmpty() ? "no documents retrieved" diff --git a/src/main/java/com/cholog/bootcamp/chat/DocumentLoader.java b/src/main/java/com/cholog/bootcamp/chat/DocumentLoader.java new file mode 100644 index 0000000..52a5cb6 --- /dev/null +++ b/src/main/java/com/cholog/bootcamp/chat/DocumentLoader.java @@ -0,0 +1,171 @@ +package com.cholog.bootcamp.chat; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; +import lombok.RequiredArgsConstructor; +import org.springframework.ai.document.Document; +import org.springframework.stereotype.Component; + +@Component +@RequiredArgsConstructor +public class DocumentLoader { + + private static final Path FAQ_DIRECTORY = Path.of("data/layer1_faq"); + private static final Path CURRENT_POLICY_DIRECTORY = Path.of("data/layer2_policies/current"); + private static final Path CHATLOG_DIRECTORY = Path.of("data/layer3_chatlogs"); + + private final RagProperties ragProperties; + private final ObjectMapper objectMapper; + + public List load() { + try { + return Stream.of( + readTextDirectory(FAQ_DIRECTORY, "FAQ"), + readTextDirectory(CURRENT_POLICY_DIRECTORY, "CURRENT_POLICY"), + readChatlogDirectory() + ) + .flatMap(List::stream) + .toList(); + } catch (IOException e) { + throw new UncheckedIOException("Failed to load support documents", e); + } + } + + private List readTextDirectory(Path directory, String sourceType) throws IOException { + try (Stream files = Files.list(directory)) { + return files + .filter(Files::isRegularFile) + .sorted(Comparator.comparing(path -> path.getFileName().toString())) + .flatMap(path -> readTextFile(path, sourceType).stream()) + .toList(); + } + } + + private List readTextFile(Path path, String sourceType) { + try { + String content = Files.readString(path); + + if ("FAQ".equals(sourceType)) { + return splitFaqDocument(path, content); + } + if ("CURRENT_POLICY".equals(sourceType)) { + return splitPolicyDocument(path, content); + } + + return List.of(createDocument(sourceType, path, content, null)); + } catch (IOException e) { + throw new UncheckedIOException("Failed to read file: " + path.getFileName(), e); + } + } + + private List splitFaqDocument(Path path, String content) { + String[] sections = content.split(ragProperties.getFaqSplitRegex()); + + return Stream.of(sections) + .map(String::trim) + .filter(section -> !section.isBlank()) + .map(section -> section.startsWith("#") + ? createDocument("FAQ", path, section, null) + : createDocument("FAQ", path, "### " + section, extractSectionTitle(section))) + .toList(); + } + + private List splitPolicyDocument(Path path, String content) { + String[] sections = content.split(ragProperties.getPolicySplitRegex()); + + if (sections.length <= 1) { + return List.of(createDocument("CURRENT_POLICY", path, content, null)); + } + + String prefix = sections[0].trim(); + + return Stream.of(sections) + .skip(1) + .map(String::trim) + .filter(section -> !section.isBlank()) + .map(section -> createDocument( + "CURRENT_POLICY", + path, + prefix + "\n\n## " + section, + extractSectionTitle(section) + )) + .toList(); + } + + private List readChatlogDirectory() throws IOException { + try (Stream files = Files.list(CHATLOG_DIRECTORY)) { + return files + .filter(Files::isRegularFile) + .sorted(Comparator.comparing(path -> path.getFileName().toString())) + .flatMap(path -> readChatlogFile(path).stream()) + .toList(); + } + } + + private List readChatlogFile(Path path) { + try (Stream lines = Files.lines(path)) { + return lines + .map(line -> parseCorrectChatlogDocument(path, line)) + .filter(document -> document != null) + .toList(); + } catch (IOException e) { + throw new UncheckedIOException("Failed to read chatlog file: " + path.getFileName(), e); + } + } + + private Document parseCorrectChatlogDocument(Path path, String line) { + try { + JsonNode root = objectMapper.readTree(line); + if (!"correct".equals(root.path("agent_accuracy").asText())) { + return null; + } + + String conversationId = root.path("conversation_id").asText(""); + String primaryIntent = root.path("primary_intent").asText(""); + + return new Document( + "# Source Type: CHATLOG\n# Source: %s\n%s".formatted(path.getFileName(), line), + Map.of( + "sourceType", "CHATLOG", + "source", path.getFileName().toString(), + "conversationId", conversationId, + "primaryIntent", primaryIntent + ) + ); + } catch (IOException e) { + throw new UncheckedIOException("Failed to parse chatlog line in file: " + path.getFileName(), e); + } + } + + private Document createDocument(String sourceType, Path path, String body, String sectionTitle) { + Map metadata = sectionTitle == null + ? Map.of( + "sourceType", sourceType, + "source", path.getFileName().toString() + ) + : Map.of( + "sourceType", sourceType, + "source", path.getFileName().toString(), + "sectionTitle", sectionTitle + ); + + return new Document( + "# Source Type: %s\n# Source: %s\n%s" + .formatted(sourceType, path.getFileName(), body), + metadata + ); + } + + private String extractSectionTitle(String section) { + int newlineIndex = section.indexOf('\n'); + return newlineIndex >= 0 ? section.substring(0, newlineIndex).trim() : section.trim(); + } +} From 572c7264c6afe8fb9a16059eecb3f91bd3218e26 Mon Sep 17 00:00:00 2001 From: Sumin Date: Wed, 20 May 2026 13:56:09 +0900 Subject: [PATCH 5/7] =?UTF-8?q?docs:=20wall=20report=20=EC=9E=91=EC=84=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mission/wall-report.md | 53 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/mission/wall-report.md b/mission/wall-report.md index 4994eca..2bb85d9 100644 --- a/mission/wall-report.md +++ b/mission/wall-report.md @@ -7,36 +7,69 @@ > 구현하면서 잘 안 됐던 것, 예상과 달랐던 것을 적어주세요. -- +[처음 시작] +- 어떻게 구현하는지조차 잘 몰라서 바로 hint1을 봐야했습니다. +- 챗봇을 구현할 때에 어떤 것을 고려해서 구현해야하는지도 전혀 몰랐습니다. 그래서 RAG가 무엇인지, 임베딩은 언제 수행되는지, 벡터 계산은 어떻게 시키는건지 차근차근 + 알아가보려고 했습니다. + - 임베딩 과정을 통해 문서와 질문을 벡터로 변환하고, 이 벡터를 `VectorStore`에 저장 + 검색에 사용한다는 점을 알아봤습니다. 질문이 들어오면 먼저 관련 문서를 + `VectorStore`에서 찾고, 그 검색 결과를 문맥으로 받은 Chat API가 생성한다는 구조를 대략적으로 받아들이고 진행했습니다. + - 챗봇 기능 구현 자체는 ai를 통해 진행했습니다. + +[구현 과정 중] + +- 처음에는 chat log를 전부 포함하려고 했는데, 문서 수가 많아 앱 시작 시 임베딩 호출이 너무 오래 걸렸습니다. 그래서 chat log는 + `agent_accuracy=correct`인 데이터만 사용하도록 줄이고, FAQ와 policy는 각각 제목 단위로 chunking해서 `VectorStore`에 + 넣어보았습니다. +- 그리고 평가가 어떤 기준으로 되는지 모르겠고, 어떤 지점을 변경해야 평가지표가 좋아지는지도 감이 전혀 오질 않았습니다. +- 프롬포트를 다듬어야 하는건지, RAG 검색 topK값을 변경 해야 하는건지, chunking 기준을 변경 해야 하는건지, chat log 데이터에서 다른 것을 포함해야하는지 + 감이 오질 않았습니다. 그래서 하나씩 해봤는데 품질에 별다른 변화가 없었다고 느꼈습니다. ## 2. 해결하지 못한 것 > 시도했지만 결국 해결 못한 문제가 있다면 적어주세요. -- - +- 프롬프트, `topK`, chunking 기준을 바꿔보며 정확도를 높이려고 했지만, 어떤 변경이 점수 향상에 가장 큰 영향을 주는지 명확히 파악하지 못했습니다. +- 검색 결과 로깅을 추가해 어떤 문서 chunk가 검색되는지는 확인할 수 있게 했지만, 오답의 원인이 검색 단계에 있는지 답변 생성 단계에 있는지 체계적으로 구분하지는 + 못했습니다. + - 검색된 문서가 맞았는데도 답변이 부족한 경우와, 애초에 잘못된 문서가 검색된 경우를 나누어 분석하는 방법을 아직 잘 모르는 것 같습니다. +- chatlog를 활용해보려고 했지만, 처음에는 데이터가 너무 많아 임베딩 시간이 오래 걸렸습니다. agent_accuracy=correct인 데이터만 사용하도록 줄였지만, 실제 + 정확도 향상에 도움이 되는지 노이즈가 되는지 파악하지 못했습니다. +- 부분적으로 맞는 답변도 score=0으로 처리되는 경우가 있는 것 같습니다. 점수를 더 세분화하면 개선 방향을 분석하는 데 도움이 될 수 있을 지 궁금했지만, + 현재 평가 기준 자체를 바꾸는 것이 적절한지는 판단하지 못했습니다. ## 3. 정확도 측정 결과 > 테스트 질문 100개로 측정한 정확도를 기록해주세요. -| 난이도 | 정확도 | 비고 | -|--------|--------|------| -| easy | | | -| medium | | | -| hard | | | +테스트 질문 150개 기준으로 측정했습니다. +| 난이도 | 정확도 | 비고 | +|--------|---------------|-----------------------------| +| easy | 43.3% (13/30) | 기본 질문에서도 조건/예외 누락으로 오답 발생 | +| medium | 50.0% (47/94) | 가장 높은 정확도이나 세부 정책 누락이 많음 | +| hard | 23.1% (6/26) | 복합 조건, 예외 정책, 최신 정책 구분에서 취약 | ## 4. 왜 그런 결과가 나왔는지 > 정확도가 낮은 난이도의 질문을 몇 개 살펴보고, 왜 틀렸는지 분석해주세요. -- +완전히 다른 답변을 한 경우보다는 핵심 사실의 '일부'만 포함한 경우(함께 설명되어야 하는 조건, 예외, 제한사항을 빠뜨리는 경우)가 많았습니다. +하나의 질문에 여러 정책 조항이 함께 필요한 경우 일부 정보만 답변에 반영되어 정확도가 낮았습니다. +=> 관련 문서를 어느 정도 찾았더라도, 답변 생성 과정에서 필요한 조건을 모두 종합하지 못하면 오답이 되었습니다. + +현재 구현은 topK로 검색된 일부 chunk만 문맥으로 전달하기 때문에, 필요한 근거가 검색 결과에 포함되지 않거나, 포함되더라도 답변에서 충분히 사용되지 않는 문제가 +있었습니다. ## 5. 개선하고 싶은 것 > 시간이 더 있었다면 시도해보고 싶은 개선점을 적어주세요. -- +- 임베딩과 벡터 검색의 원리가 궁금합니다. 제공되는 힌트를 보니까 cosine similarity, 벡터 차원 등의 키워드가 있던데 아직 잘 모릅니다... +- incorrect하다고 판단한 원인을 좀 더 알아보고 싶습니다. 로깅을 통해 어떤 chunk가 검색되었는지는 확인할 수 있지만, 오답이 검색 실패 때문인지, 검색된 문서를 + 충분히 활용하지 못한 답변 생성 문제인지 모르겠습니다. 그래서 더 어떤 시도를 해야하는지 답답했던 것 같습니다.(제가 늦게 참여해서 그런 걸까요..?) + +> 추가로 궁금한 것 + +- 점수 계산은 어떤 것을 기준으로 하는지 궁금합니다. 점수가 높을 수록 실제 사용감이 좋아지는건지 궁금합니다. From ad01fbf0b1d79fece766daa00d4df8d37a20e1b6 Mon Sep 17 00:00:00 2001 From: Sumin Date: Wed, 20 May 2026 13:58:37 +0900 Subject: [PATCH 6/7] =?UTF-8?q?feat:=20wall=20report=EC=97=90=20=EC=9E=91?= =?UTF-8?q?=EC=84=B1=ED=95=9C=20=ED=8F=89=EA=B0=80=20=EA=B8=B0=EB=A1=9D=20?= =?UTF-8?q?=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data/eval_result.json | 16 ++++++++-------- mission/wall-report.md | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/data/eval_result.json b/data/eval_result.json index f6ce35e..34b43fc 100644 --- a/data/eval_result.json +++ b/data/eval_result.json @@ -1,22 +1,22 @@ { "total": 150, - "correct": 63, - "incorrect": 87, + "correct": 66, + "incorrect": 84, "error": 0, - "accuracy": 0.42, + "accuracy": 0.44, "tier_results": { "easy": { - "correct": 14, + "correct": 13, "total": 30 }, "medium": { - "correct": 39, + "correct": 47, "total": 94 }, "hard": { - "correct": 10, + "correct": 6, "total": 26 } }, - "elapsed_seconds": 450.15493988990784 -} \ No newline at end of file + "elapsed_seconds": 468.1384799480438 +} diff --git a/mission/wall-report.md b/mission/wall-report.md index 2bb85d9..5fff598 100644 --- a/mission/wall-report.md +++ b/mission/wall-report.md @@ -29,7 +29,7 @@ > 시도했지만 결국 해결 못한 문제가 있다면 적어주세요. -- 프롬프트, `topK`, chunking 기준을 바꿔보며 정확도를 높이려고 했지만, 어떤 변경이 점수 향상에 가장 큰 영향을 주는지 명확히 파악하지 못했습니다. +- 프롬프트, topK, chunking 기준을 바꿔보며 정확도를 높이려고 했지만, 어떤 변경이 점수 향상에 가장 큰 영향을 주는지 명확히 파악하지 못했습니다. - 검색 결과 로깅을 추가해 어떤 문서 chunk가 검색되는지는 확인할 수 있게 했지만, 오답의 원인이 검색 단계에 있는지 답변 생성 단계에 있는지 체계적으로 구분하지는 못했습니다. - 검색된 문서가 맞았는데도 답변이 부족한 경우와, 애초에 잘못된 문서가 검색된 경우를 나누어 분석하는 방법을 아직 잘 모르는 것 같습니다. @@ -42,7 +42,7 @@ > 테스트 질문 100개로 측정한 정확도를 기록해주세요. -테스트 질문 150개 기준으로 측정했습니다. +테스트 질문 150개 기준으로 측정했습니다. (`eval_result.json`결과 작성) | 난이도 | 정확도 | 비고 | |--------|---------------|-----------------------------| From 8ae5af62a95d4713ba537feb8cf981c18977b245 Mon Sep 17 00:00:00 2001 From: Sumin Date: Wed, 20 May 2026 14:44:33 +0900 Subject: [PATCH 7/7] =?UTF-8?q?refactor:=20dto=20=EB=84=A4=EC=9D=B4?= =?UTF-8?q?=EB=B0=8D=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/com/cholog/bootcamp/chat/ChatController.java | 4 ++-- .../java/com/cholog/bootcamp/chat/ChatService.java | 11 ++++++----- .../{ChatResponse.java => ChatAnswerResponse.java} | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) rename src/main/java/com/cholog/bootcamp/chat/dto/{ChatResponse.java => ChatAnswerResponse.java} (86%) diff --git a/src/main/java/com/cholog/bootcamp/chat/ChatController.java b/src/main/java/com/cholog/bootcamp/chat/ChatController.java index 89a087f..244cfb5 100644 --- a/src/main/java/com/cholog/bootcamp/chat/ChatController.java +++ b/src/main/java/com/cholog/bootcamp/chat/ChatController.java @@ -1,7 +1,7 @@ package com.cholog.bootcamp.chat; import com.cholog.bootcamp.chat.dto.ChatRequest; -import com.cholog.bootcamp.chat.dto.ChatResponse; +import com.cholog.bootcamp.chat.dto.ChatAnswerResponse; import org.springframework.web.bind.annotation.RequestBody; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RestController; @@ -16,7 +16,7 @@ public ChatController(ChatService chatService) { } @PostMapping("/api/chat") - public ChatResponse chat(@RequestBody ChatRequest request) { + public ChatAnswerResponse chat(@RequestBody ChatRequest request) { return chatService.ask(request.question()); } } diff --git a/src/main/java/com/cholog/bootcamp/chat/ChatService.java b/src/main/java/com/cholog/bootcamp/chat/ChatService.java index 693fe12..22bb665 100644 --- a/src/main/java/com/cholog/bootcamp/chat/ChatService.java +++ b/src/main/java/com/cholog/bootcamp/chat/ChatService.java @@ -1,6 +1,6 @@ package com.cholog.bootcamp.chat; -import com.cholog.bootcamp.chat.dto.ChatResponse; +import com.cholog.bootcamp.chat.dto.ChatAnswerResponse; import jakarta.annotation.PostConstruct; import java.util.List; import java.util.Map; @@ -9,6 +9,7 @@ import lombok.extern.slf4j.Slf4j; import org.springframework.ai.chat.client.ChatClient; import org.springframework.ai.chat.metadata.Usage; +import org.springframework.ai.chat.model.ChatResponse; import org.springframework.ai.document.Document; import org.springframework.ai.vectorstore.SearchRequest; import org.springframework.ai.vectorstore.VectorStore; @@ -29,7 +30,7 @@ void loadFaqContext() { vectorStore.add(documentLoader.load()); } - public ChatResponse ask(String question) { + public ChatAnswerResponse ask(String question) { List retrievedDocuments = vectorStore.similaritySearch( SearchRequest.builder() .query(question) @@ -44,7 +45,7 @@ public ChatResponse ask(String question) { .map(Document::getText) .collect(Collectors.joining("\n\n===\n\n")); - org.springframework.ai.chat.model.ChatResponse response = chatClient.prompt() + ChatResponse response = chatClient.prompt() .system(""" - 당신은 Cholog Corporation의 고객 전용 챗봇 서비스이다. - 제공된 컨텍스트만을 활용하라. @@ -63,9 +64,9 @@ public ChatResponse ask(String question) { Usage usage = response.getMetadata().getUsage(); - return new ChatResponse( + return new ChatAnswerResponse( response.getResult().getOutput().getText(), - new ChatResponse.TokenUsage( + new ChatAnswerResponse.TokenUsage( usage == null || usage.getPromptTokens() == null ? 0 : usage.getPromptTokens(), usage == null || usage.getCompletionTokens() == null ? 0 : usage.getCompletionTokens(), usage == null || usage.getTotalTokens() == null ? 0 : usage.getTotalTokens() diff --git a/src/main/java/com/cholog/bootcamp/chat/dto/ChatResponse.java b/src/main/java/com/cholog/bootcamp/chat/dto/ChatAnswerResponse.java similarity index 86% rename from src/main/java/com/cholog/bootcamp/chat/dto/ChatResponse.java rename to src/main/java/com/cholog/bootcamp/chat/dto/ChatAnswerResponse.java index 18aab44..3645bae 100644 --- a/src/main/java/com/cholog/bootcamp/chat/dto/ChatResponse.java +++ b/src/main/java/com/cholog/bootcamp/chat/dto/ChatAnswerResponse.java @@ -1,6 +1,6 @@ package com.cholog.bootcamp.chat.dto; -public record ChatResponse( +public record ChatAnswerResponse( String answer, TokenUsage tokenUsage ) {