cho-log · boyekim · May 19, 2026 · May 20, 2026 · May 20, 2026 · May 20, 2026
diff --git a/build.gradle b/build.gradle
@@ -26,17 +26,17 @@ dependencyManagement {
 dependencies {
     implementation 'org.springframework.boot:spring-boot-starter-web'
     implementation 'org.springframework.ai:spring-ai-starter-model-openai'
+    implementation 'org.springframework.ai:spring-ai-vector-store'
+
+    compileOnly 'org.projectlombok:lombok'
+    annotationProcessor 'org.projectlombok:lombok'
 
     testImplementation 'org.springframework.boot:spring-boot-starter-test'
+    testCompileOnly 'org.projectlombok:lombok'
+    testAnnotationProcessor 'org.projectlombok:lombok'
 }
 
-tasks.named('test') {
-    useJUnitPlatform()
-    testLogging {
-        showStandardStreams = true
-    }
-
-    // .env 파일에서 환경변수 로딩
+def loadDotEnv = { task ->
     def envFile = rootProject.file('.env')
     if (envFile.exists()) {
         envFile.readLines().each { line ->
@@ -46,9 +46,22 @@ tasks.named('test') {
                 if (idx > 0) {
                     def key = trimmed.substring(0, idx).trim()
                     def value = trimmed.substring(idx + 1).trim()
-                    environment(key, value)
+                    task.environment(key, value)
                 }
             }
         }
     }
 }
+
+tasks.named('test') {
+    useJUnitPlatform()
+    testLogging {
+        showStandardStreams = true
+    }
+
+    loadDotEnv(delegate)
+}
+
+tasks.named('bootRun') {
+    loadDotEnv(delegate)
+}
diff --git a/data/eval_result.json b/data/eval_result.json
@@ -0,0 +1,22 @@
+{
+  "total": 150,
+  "correct": 66,
+  "incorrect": 84,
+  "error": 0,
+  "accuracy": 0.44,
+  "tier_results": {
+    "easy": {
+      "correct": 13,
+      "total": 30
+    },
+    "medium": {
+      "correct": 47,
+      "total": 94
+    },
+    "hard": {
+      "correct": 6,
+      "total": 26
+    }
+  },
+  "elapsed_seconds": 468.1384799480438
+}
diff --git a/data/evaluate.py b/data/evaluate.py
@@ -102,6 +102,20 @@ def judge_answer(question: str, expected: str, actual: str) -> dict:
         return {"score": 0, "reason": "판정 파싱 실패"}
 
 
+def next_result_file_path(data_dir: Path) -> Path:
+    """기존 결과 파일이 있으면 번호를 붙여 새 파일명을 반환합니다."""
+    default_path = data_dir / "eval_result.json"
+    if not default_path.exists():
+        return default_path
+
+    index = 1
+    while True:
+        candidate = data_dir / f"eval_result_{index}.json"
+        if not candidate.exists():
+            return candidate
+        index += 1
+
+
 # ─── 메인 ─────────────────────────────────────────────────────────────────────
 
 def main():
@@ -133,6 +147,7 @@ def main():
 
     results = {"correct": 0, "incorrect": 0, "error": 0}
     tier_results = {}
+    detailed_results = []
     start_time = time.time()
 
     for i, q in enumerate(questions):
@@ -149,6 +164,16 @@ def main():
         response = ask_server(question_ko)
         if response is None:
             results["error"] += 1
+            detailed_results.append({
+                "id": qid,
+                "tier": tier,
+                "question": question_ko,
+                "expected_answer": expected,
+                "actual_answer": "",
+                "score": 0,
+                "reason": "서버 응답 없음",
+                "status": "error",
+            })
             if args.verbose:
                 print(f"[{qid}] ERROR — 서버 응답 없음")
             continue
@@ -167,6 +192,17 @@ def main():
             results["incorrect"] += 1
             marker = "✗"
 
+        detailed_results.append({
+            "id": qid,
+            "tier": tier,
+            "question": question_ko,
+            "expected_answer": expected,
+            "actual_answer": actual_answer,
+            "score": score,
+            "reason": judgment.get("reason", ""),
+            "status": "correct" if score == 1 else "incorrect",
+        })
+
         if args.verbose:
             print(f"[{qid}] {marker} ({tier}) {question_ko[:40]}...")
             if score == 0:
@@ -197,8 +233,17 @@ def main():
     print(f"\n소요 시간: {elapsed:.1f}초")
     print(f"평균 응답: {elapsed/max(total,1):.1f}초/질문")
 
+    incorrect_results = [item for item in detailed_results if item["status"] == "incorrect"]
+    if incorrect_results:
+        print("\n오답 판정 이유:")
+        for item in incorrect_results[:10]:
+            print(f"  [{item['id']}] {item['question']}")
+            print(f"    이유: {item['reason']}")
+        if len(incorrect_results) > 10:
+            print(f"  ... 외 {len(incorrect_results) - 10}건은 결과 파일을 확인하세요.")
+
     # 결과 저장
-    result_file = DATA_DIR / "eval_result.json"
+    result_file = next_result_file_path(DATA_DIR)
     with open(result_file, "w") as f:
         json.dump({
             "total": total,
@@ -208,6 +253,7 @@ def main():
             "accuracy": results["correct"] / max(total, 1),
             "tier_results": tier_results,
             "elapsed_seconds": elapsed,
+            "detailed_results": detailed_results,
         }, f, indent=2, ensure_ascii=False)
     print(f"\n결과 저장: {result_file}")
 

diff --git a/mission/wall-report.md b/mission/wall-report.md
@@ -7,36 +7,69 @@
 
 > 구현하면서 잘 안 됐던 것, 예상과 달랐던 것을 적어주세요.
 
-- 
+[처음 시작]
 
+- 어떻게 구현하는지조차 잘 몰라서 바로 hint1을 봐야했습니다.
+- 챗봇을 구현할 때에 어떤 것을 고려해서 구현해야하는지도 전혀 몰랐습니다. 그래서 RAG가 무엇인지, 임베딩은 언제 수행되는지, 벡터 계산은 어떻게 시키는건지 차근차근
+  알아가보려고 했습니다.
+    - 임베딩 과정을 통해 문서와 질문을 벡터로 변환하고, 이 벡터를 `VectorStore`에 저장 + 검색에 사용한다는 점을 알아봤습니다. 질문이 들어오면 먼저 관련 문서를
+      `VectorStore`에서 찾고, 그 검색 결과를 문맥으로 받은 Chat API가 생성한다는 구조를 대략적으로 받아들이고 진행했습니다.
+    - 챗봇 기능 구현 자체는 ai를 통해 진행했습니다.
+
+[구현 과정 중]
+
+- 처음에는 chat log를 전부 포함하려고 했는데, 문서 수가 많아 앱 시작 시 임베딩 호출이 너무 오래 걸렸습니다. 그래서 chat log는
+  `agent_accuracy=correct`인 데이터만 사용하도록 줄이고, FAQ와 policy는 각각 제목 단위로 chunking해서 `VectorStore`에
+  넣어보았습니다.
+- 그리고 평가가 어떤 기준으로 되는지 모르겠고, 어떤 지점을 변경해야 평가지표가 좋아지는지도 감이 전혀 오질 않았습니다.
+- 프롬포트를 다듬어야 하는건지, RAG 검색 topK값을 변경 해야 하는건지, chunking 기준을 변경 해야 하는건지, chat log 데이터에서 다른 것을 포함해야하는지
+  감이 오질 않았습니다. 그래서 하나씩 해봤는데 품질에 별다른 변화가 없었다고 느꼈습니다.
 
 ## 2. 해결하지 못한 것
 
 > 시도했지만 결국 해결 못한 문제가 있다면 적어주세요.
 
-- 
-
+- 프롬프트, topK, chunking 기준을 바꿔보며 정확도를 높이려고 했지만, 어떤 변경이 점수 향상에 가장 큰 영향을 주는지 명확히 파악하지 못했습니다.
+- 검색 결과 로깅을 추가해 어떤 문서 chunk가 검색되는지는 확인할 수 있게 했지만, 오답의 원인이 검색 단계에 있는지 답변 생성 단계에 있는지 체계적으로 구분하지는
+  못했습니다.
+    - 검색된 문서가 맞았는데도 답변이 부족한 경우와, 애초에 잘못된 문서가 검색된 경우를 나누어 분석하는 방법을 아직 잘 모르는 것 같습니다.
+- chatlog를 활용해보려고 했지만, 처음에는 데이터가 너무 많아 임베딩 시간이 오래 걸렸습니다. agent_accuracy=correct인 데이터만 사용하도록 줄였지만, 실제
+  정확도 향상에 도움이 되는지 노이즈가 되는지 파악하지 못했습니다.
+- 부분적으로 맞는 답변도 score=0으로 처리되는 경우가 있는 것 같습니다. 점수를 더 세분화하면 개선 방향을 분석하는 데 도움이 될 수 있을 지 궁금했지만,
+  현재 평가 기준 자체를 바꾸는 것이 적절한지는 판단하지 못했습니다.
 
 ## 3. 정확도 측정 결과
 
 > 테스트 질문 100개로 측정한 정확도를 기록해주세요.
 
-| 난이도 | 정확도 | 비고 |
-|--------|--------|------|
-| easy   |        |      |
-| medium |        |      |
-| hard   |        |      |
+테스트 질문 150개 기준으로 측정했습니다. (`eval_result.json`결과 작성)
 
+| 난이도    | 정확도           | 비고                          |
+|--------|---------------|-----------------------------|
+| easy   | 43.3% (13/30) | 기본 질문에서도 조건/예외 누락으로 오답 발생   |
+| medium | 50.0% (47/94) | 가장 높은 정확도이나 세부 정책 누락이 많음    |
+| hard   | 23.1% (6/26)  | 복합 조건, 예외 정책, 최신 정책 구분에서 취약 |
 
 ## 4. 왜 그런 결과가 나왔는지
 
 > 정확도가 낮은 난이도의 질문을 몇 개 살펴보고, 왜 틀렸는지 분석해주세요.
 
-- 
+완전히 다른 답변을 한 경우보다는 핵심 사실의 '일부'만 포함한 경우(함께 설명되어야 하는 조건, 예외, 제한사항을 빠뜨리는 경우)가 많았습니다.
 
+하나의 질문에 여러 정책 조항이 함께 필요한 경우 일부 정보만 답변에 반영되어 정확도가 낮았습니다.
+=> 관련 문서를 어느 정도 찾았더라도, 답변 생성 과정에서 필요한 조건을 모두 종합하지 못하면 오답이 되었습니다.
+
+현재 구현은 topK로 검색된 일부 chunk만 문맥으로 전달하기 때문에, 필요한 근거가 검색 결과에 포함되지 않거나, 포함되더라도 답변에서 충분히 사용되지 않는 문제가
+있었습니다.
 
 ## 5. 개선하고 싶은 것
 
 > 시간이 더 있었다면 시도해보고 싶은 개선점을 적어주세요.
 
-- 
+- 임베딩과 벡터 검색의 원리가 궁금합니다. 제공되는 힌트를 보니까 cosine similarity, 벡터 차원 등의 키워드가 있던데 아직 잘 모릅니다...
+- incorrect하다고 판단한 원인을 좀 더 알아보고 싶습니다. 로깅을 통해 어떤 chunk가 검색되었는지는 확인할 수 있지만, 오답이 검색 실패 때문인지, 검색된 문서를
+  충분히 활용하지 못한 답변 생성 문제인지 모르겠습니다. 그래서 더 어떤 시도를 해야하는지 답답했던 것 같습니다.(제가 늦게 참여해서 그런 걸까요..?)
+
+> 추가로 궁금한 것
+
+- 점수 계산은 어떤 것을 기준으로 하는지 궁금합니다. 점수가 높을 수록 실제 사용감이 좋아지는건지 궁금합니다.
diff --git a/src/main/java/com/cholog/bootcamp/AiConfig.java b/src/main/java/com/cholog/bootcamp/AiConfig.java
@@ -0,0 +1,22 @@
+package com.cholog.bootcamp;
+
+import org.springframework.ai.chat.client.ChatClient;
+import org.springframework.ai.embedding.EmbeddingModel;
+import org.springframework.ai.vectorstore.SimpleVectorStore;
+import org.springframework.ai.vectorstore.VectorStore;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+
+@Configuration
+public class AiConfig {
+
+    @Bean
+    VectorStore vectorStore(EmbeddingModel embeddingModel) {
+        return SimpleVectorStore.builder(embeddingModel).build();
+    }
+
+    @Bean
+    ChatClient chatClient(ChatClient.Builder chatClientBuilder) {
+        return chatClientBuilder.build();
+    }
+}
diff --git a/src/main/java/com/cholog/bootcamp/chat/ChatController.java b/src/main/java/com/cholog/bootcamp/chat/ChatController.java
@@ -0,0 +1,22 @@
+package com.cholog.bootcamp.chat;
+
+import com.cholog.bootcamp.chat.dto.ChatRequest;
+import com.cholog.bootcamp.chat.dto.ChatAnswerResponse;
+import org.springframework.web.bind.annotation.RequestBody;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RestController;
+
+@RestController
+public class ChatController {
+
+    private final ChatService chatService;
+
+    public ChatController(ChatService chatService) {
+        this.chatService = chatService;
+    }
+
+    @PostMapping("/api/chat")
+    public ChatAnswerResponse chat(@RequestBody ChatRequest request) {
+        return chatService.ask(request.question());
+    }
+}
diff --git a/src/main/java/com/cholog/bootcamp/chat/ChatService.java b/src/main/java/com/cholog/bootcamp/chat/ChatService.java
@@ -0,0 +1,103 @@
+package com.cholog.bootcamp.chat;
+
+import com.cholog.bootcamp.chat.dto.ChatAnswerResponse;
+import jakarta.annotation.PostConstruct;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.ai.chat.client.ChatClient;
+import org.springframework.ai.chat.metadata.Usage;
+import org.springframework.ai.chat.model.ChatResponse;
+import org.springframework.ai.document.Document;
+import org.springframework.ai.vectorstore.SearchRequest;
+import org.springframework.ai.vectorstore.VectorStore;
+import org.springframework.stereotype.Service;
+
+@Slf4j
+@Service
+@RequiredArgsConstructor
+public class ChatService {
+
+    private final ChatClient chatClient;
+    private final VectorStore vectorStore;
+    private final RagProperties ragProperties;
+    private final DocumentLoader documentLoader;
+
+    @PostConstruct
+    void loadFaqContext() {
+        vectorStore.add(documentLoader.load());
+    }
+
+    public ChatAnswerResponse ask(String question) {
+        List<Document> retrievedDocuments = vectorStore.similaritySearch(
+            SearchRequest.builder()
+                .query(question)
+                .topK(ragProperties.getTopK())
+                .build()
+        );
+
+        logSearchResults(question, retrievedDocuments);
+
+        String supportContext = retrievedDocuments
+            .stream()
+            .map(Document::getText)
+            .collect(Collectors.joining("\n\n===\n\n"));
+
+        ChatResponse response = chatClient.prompt()
+            .system("""
+                    - 당신은 Cholog Corporation의 고객 전용 챗봇 서비스이다.
+                    - 제공된 컨텍스트만을 활용하라.
+                    - 제공된 컨텍스트로 답할 수 없다면, '고객센터에 문의해주세요'라고 답하라.
+                    - 한국어로 답하라.
+                """)
+            .user("""
+                    Customer question:
+                    %s
+
+                    Support context:
+                    %s
+                """.formatted(question, supportContext))
+            .call()
+            .chatResponse();
+
+        Usage usage = response.getMetadata().getUsage();
+
+        return new ChatAnswerResponse(
+            response.getResult().getOutput().getText(),
+            new ChatAnswerResponse.TokenUsage(
+                usage == null || usage.getPromptTokens() == null ? 0 : usage.getPromptTokens(),
+                usage == null || usage.getCompletionTokens() == null ? 0 : usage.getCompletionTokens(),
+                usage == null || usage.getTotalTokens() == null ? 0 : usage.getTotalTokens()
+            )
+        );
+    }
+
+    private void logSearchResults(String question, List<Document> documents) {
+        String resultSummary = documents.isEmpty()
+            ? "no documents retrieved"
+            : documents.stream()
+                .map(this::formatDocumentSummary)
+                .collect(Collectors.joining(" | "));
+
+        log.info("RAG search question='{}' topK={} results={}",
+            question,
+            ragProperties.getTopK(),
+            resultSummary
+        );
+    }
+
+    private String formatDocumentSummary(Document document) {
+        Map<String, Object> metadata = document.getMetadata();
+        String sourceType = String.valueOf(metadata.getOrDefault("sourceType", "UNKNOWN"));
+        String source = String.valueOf(metadata.getOrDefault("source", "UNKNOWN"));
+        Object sectionTitle = metadata.get("sectionTitle");
+
+        if (sectionTitle == null) {
+            return "%s/%s".formatted(sourceType, source);
+        }
+
+        return "%s/%s#%s".formatted(sourceType, source, sectionTitle);
+    }
+}