From f47ffe568b9c3b3dd19c3f8472c800012e77b8c5 Mon Sep 17 00:00:00 2001
From: chxghee <francis001021@gmail.com>
Date: Thu, 7 May 2026 14:38:52 +0900
Subject: [PATCH 01/20] =?UTF-8?q?chore:=20lombook=20=EC=9D=98=EC=A1=B4=20?=
 =?UTF-8?q?=EC=B6=94=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 build.gradle | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/build.gradle b/build.gradle
index 941e596..4745b6f 100644
--- a/build.gradle
+++ b/build.gradle
@@ -28,6 +28,9 @@ dependencies {
     implementation 'org.springframework.ai:spring-ai-starter-model-openai'
 
     testImplementation 'org.springframework.boot:spring-boot-starter-test'
+
+    compileOnly 'org.projectlombok:lombok'
+    annotationProcessor 'org.projectlombok:lombok'
 }
 
 tasks.named('test') {

From 3ef11dd3f7d7b38f8fbf39d8b42c860d286a07f2 Mon Sep 17 00:00:00 2001
From: chxghee <francis001021@gmail.com>
Date: Thu, 7 May 2026 14:40:16 +0900
Subject: [PATCH 02/20] =?UTF-8?q?chore:=20ai=20chatClient=20=EC=84=A4?=
 =?UTF-8?q?=EC=A0=95=20=EC=B6=94=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../com/cholog/bootcamp/config/AiConfig.java     | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 src/main/java/com/cholog/bootcamp/config/AiConfig.java

diff --git a/src/main/java/com/cholog/bootcamp/config/AiConfig.java b/src/main/java/com/cholog/bootcamp/config/AiConfig.java
new file mode 100644
index 0000000..984f8c3
--- /dev/null
+++ b/src/main/java/com/cholog/bootcamp/config/AiConfig.java
@@ -0,0 +1,16 @@
+package com.cholog.bootcamp.config;
+
+import org.springframework.ai.chat.client.ChatClient;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+
+@Configuration
+public class AiConfig {
+
+    @Bean
+    public ChatClient chatClient(ChatClient.Builder builder) {
+        return builder
+                .defaultSystem("당신은 친절한 고객센터 상담원입니다.")
+                .build();
+    }
+}

From b94022d69cf659b35893c0653785542c38d85f11 Mon Sep 17 00:00:00 2001
From: chxghee <francis001021@gmail.com>
Date: Thu, 7 May 2026 14:41:40 +0900
Subject: [PATCH 03/20] =?UTF-8?q?feat:=20chatbot=20API=20=EA=B8=B0?=
 =?UTF-8?q?=EB=B3=B8=20=EA=B5=AC=ED=98=84=20(open=20AI=20API=20=EC=97=B0?=
 =?UTF-8?q?=EA=B2=B0)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../chatbot/application/ChatbotService.java   | 24 +++++++++++++++++
 .../application/dto/ChatbotResult.java        | 22 +++++++++++++++
 .../presentation/ChatbotController.java       | 27 +++++++++++++++++++
 .../presentation/dto/ChatbotRequest.java      |  6 +++++
 .../presentation/dto/ChatbotResponse.java     | 27 +++++++++++++++++++
 5 files changed, 106 insertions(+)
 create mode 100644 src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
 create mode 100644 src/main/java/com/cholog/bootcamp/chatbot/application/dto/ChatbotResult.java
 create mode 100644 src/main/java/com/cholog/bootcamp/chatbot/presentation/ChatbotController.java
 create mode 100644 src/main/java/com/cholog/bootcamp/chatbot/presentation/dto/ChatbotRequest.java
 create mode 100644 src/main/java/com/cholog/bootcamp/chatbot/presentation/dto/ChatbotResponse.java

diff --git a/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java b/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
new file mode 100644
index 0000000..76aa886
--- /dev/null
+++ b/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
@@ -0,0 +1,24 @@
+package com.cholog.bootcamp.chatbot.application;
+
+import com.cholog.bootcamp.chatbot.application.dto.ChatbotResult;
+import lombok.RequiredArgsConstructor;
+import org.springframework.ai.chat.client.ChatClient;
+import org.springframework.ai.chat.model.ChatResponse;
+import org.springframework.stereotype.Service;
+
+@Service
+@RequiredArgsConstructor
+public class ChatbotService {
+
+    private final ChatClient chatClient;
+
+    public ChatbotResult chat(String question) {
+        ChatResponse aiResponse = chatClient.prompt()
+                .user(question)
+                .call()
+                .chatResponse();
+
+        return ChatbotResult.of(aiResponse);
+    }
+
+}
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/application/dto/ChatbotResult.java b/src/main/java/com/cholog/bootcamp/chatbot/application/dto/ChatbotResult.java
new file mode 100644
index 0000000..f727f1e
--- /dev/null
+++ b/src/main/java/com/cholog/bootcamp/chatbot/application/dto/ChatbotResult.java
@@ -0,0 +1,22 @@
+package com.cholog.bootcamp.chatbot.application.dto;
+
+import org.springframework.ai.chat.metadata.Usage;
+import org.springframework.ai.chat.model.ChatResponse;
+
+public record ChatbotResult(
+        String answer,
+        long promptTokens,
+        long completionTokens,
+        long totalTokens
+) {
+
+    public static ChatbotResult of(ChatResponse response) {
+        Usage usage = response.getMetadata().getUsage();
+        return new ChatbotResult(
+                response.getResult().getOutput().getText(),
+                usage.getPromptTokens(),
+                usage.getCompletionTokens(),
+                usage.getTotalTokens()
+        );
+    }
+}
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/presentation/ChatbotController.java b/src/main/java/com/cholog/bootcamp/chatbot/presentation/ChatbotController.java
new file mode 100644
index 0000000..4549525
--- /dev/null
+++ b/src/main/java/com/cholog/bootcamp/chatbot/presentation/ChatbotController.java
@@ -0,0 +1,27 @@
+package com.cholog.bootcamp.chatbot.presentation;
+
+import com.cholog.bootcamp.chatbot.application.ChatbotService;
+import com.cholog.bootcamp.chatbot.application.dto.ChatbotResult;
+import com.cholog.bootcamp.chatbot.presentation.dto.ChatbotRequest;
+import com.cholog.bootcamp.chatbot.presentation.dto.ChatbotResponse;
+import lombok.RequiredArgsConstructor;
+import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestBody;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RestController;
+
+@RestController
+@RequestMapping("/api/chat")
+@RequiredArgsConstructor
+public class ChatbotController {
+
+    private final ChatbotService chatbotService;
+
+    @PostMapping
+    public ResponseEntity<ChatbotResponse> chat(@RequestBody ChatbotRequest request) {
+        ChatbotResult chatbotResult = chatbotService.chat(request.question());
+        return ResponseEntity.ok().body(ChatbotResponse.of(chatbotResult));
+    }
+
+}
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/presentation/dto/ChatbotRequest.java b/src/main/java/com/cholog/bootcamp/chatbot/presentation/dto/ChatbotRequest.java
new file mode 100644
index 0000000..874b7c5
--- /dev/null
+++ b/src/main/java/com/cholog/bootcamp/chatbot/presentation/dto/ChatbotRequest.java
@@ -0,0 +1,6 @@
+package com.cholog.bootcamp.chatbot.presentation.dto;
+
+public record ChatbotRequest(
+        String question
+) {
+}
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/presentation/dto/ChatbotResponse.java b/src/main/java/com/cholog/bootcamp/chatbot/presentation/dto/ChatbotResponse.java
new file mode 100644
index 0000000..d7df049
--- /dev/null
+++ b/src/main/java/com/cholog/bootcamp/chatbot/presentation/dto/ChatbotResponse.java
@@ -0,0 +1,27 @@
+package com.cholog.bootcamp.chatbot.presentation.dto;
+
+import com.cholog.bootcamp.chatbot.application.dto.ChatbotResult;
+
+public record ChatbotResponse(
+        String answer,
+        TokenUsage tokenUsage
+) {
+
+    public static ChatbotResponse of(ChatbotResult result) {
+        return new ChatbotResponse(
+                result.answer(),
+                new TokenUsage(
+                        result.promptTokens(),
+                        result.completionTokens(),
+                        result.totalTokens()
+                )
+        );
+    }
+
+    record TokenUsage(
+            long promptTokens,
+            long completionTokens,
+            long totalTokens
+    ) {
+    }
+}

From 9ce59faa0f9f6bc9f68a0de915f16e30b3d58bea Mon Sep 17 00:00:00 2001
From: chxghee <francis001021@gmail.com>
Date: Mon, 11 May 2026 22:48:30 +0900
Subject: [PATCH 04/20] =?UTF-8?q?chore:=20vectorStore=20=EC=9D=98=EC=A1=B4?=
 =?UTF-8?q?=20=EC=84=A4=EC=A0=95=20=EC=B6=94=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 build.gradle                                     |  1 +
 .../com/cholog/bootcamp/config/AiConfig.java     | 16 +++++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/build.gradle b/build.gradle
index 4745b6f..e906914 100644
--- a/build.gradle
+++ b/build.gradle
@@ -26,6 +26,7 @@ dependencyManagement {
 dependencies {
     implementation 'org.springframework.boot:spring-boot-starter-web'
     implementation 'org.springframework.ai:spring-ai-starter-model-openai'
+    implementation 'org.springframework.ai:spring-ai-vector-store'
 
     testImplementation 'org.springframework.boot:spring-boot-starter-test'
 
diff --git a/src/main/java/com/cholog/bootcamp/config/AiConfig.java b/src/main/java/com/cholog/bootcamp/config/AiConfig.java
index 984f8c3..75f87e6 100644
--- a/src/main/java/com/cholog/bootcamp/config/AiConfig.java
+++ b/src/main/java/com/cholog/bootcamp/config/AiConfig.java
@@ -1,16 +1,30 @@
 package com.cholog.bootcamp.config;
 
 import org.springframework.ai.chat.client.ChatClient;
+import org.springframework.ai.embedding.EmbeddingModel;
+import org.springframework.ai.vectorstore.SimpleVectorStore;
+import org.springframework.ai.vectorstore.VectorStore;
 import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.Configuration;
 
 @Configuration
 public class AiConfig {
 
+    private static final String SYSTEM_PROMPT = """
+            당신은 초록 코퍼레이션의 친절한 고객센터 상담원입니다.
+            [고객 질문]에 대해 [참고 문서]를 기반으로 답변하세요.
+            """;
+
     @Bean
     public ChatClient chatClient(ChatClient.Builder builder) {
         return builder
-                .defaultSystem("당신은 친절한 고객센터 상담원입니다.")
+                .defaultSystem(SYSTEM_PROMPT)
                 .build();
     }
+
+    @Bean
+    public VectorStore vectorStore(EmbeddingModel embeddingModel) {
+        return SimpleVectorStore.builder(embeddingModel).build();
+    }
+
 }

From 1f5210979c4cc21535a9692cf496d67b9a962c07 Mon Sep 17 00:00:00 2001
From: chxghee <francis001021@gmail.com>
Date: Mon, 11 May 2026 22:52:46 +0900
Subject: [PATCH 05/20] =?UTF-8?q?feat:=20v1=20=ED=8C=8C=EC=9D=BC=20?=
 =?UTF-8?q?=EB=8B=A8=EC=9C=84=20=EC=9E=84=EB=B2=A0=EB=94=A9=20=EA=B5=AC?=
 =?UTF-8?q?=ED=98=84=20=EB=B0=8F=20=ED=8F=89=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data/eval_result.json                         | 22 ++++++
 .../chatbot/application/ChatbotService.java   | 35 +++++++++-
 .../infrastructure/DocumentLoader.java        | 67 +++++++++++++++++++
 .../VectorStoreInitializer.java               | 40 +++++++++++
 4 files changed, 162 insertions(+), 2 deletions(-)
 create mode 100644 data/eval_result.json
 create mode 100644 src/main/java/com/cholog/bootcamp/chatbot/infrastructure/DocumentLoader.java
 create mode 100644 src/main/java/com/cholog/bootcamp/chatbot/infrastructure/VectorStoreInitializer.java

diff --git a/data/eval_result.json b/data/eval_result.json
new file mode 100644
index 0000000..35d6e84
--- /dev/null
+++ b/data/eval_result.json
@@ -0,0 +1,22 @@
+{
+  "total": 150,
+  "correct": 80,
+  "incorrect": 70,
+  "error": 0,
+  "accuracy": 0.5333333333333333,
+  "tier_results": {
+    "easy": {
+      "correct": 18,
+      "total": 30
+    },
+    "medium": {
+      "correct": 51,
+      "total": 94
+    },
+    "hard": {
+      "correct": 11,
+      "total": 26
+    }
+  },
+  "elapsed_seconds": 664.7394919395447
+}
\ No newline at end of file
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java b/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
index 76aa886..cfa828f 100644
--- a/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
+++ b/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
@@ -3,22 +3,53 @@
 import com.cholog.bootcamp.chatbot.application.dto.ChatbotResult;
 import lombok.RequiredArgsConstructor;
 import org.springframework.ai.chat.client.ChatClient;
+
 import org.springframework.ai.chat.model.ChatResponse;
+import org.springframework.ai.document.Document;
+import org.springframework.ai.vectorstore.SearchRequest;
+import org.springframework.ai.vectorstore.VectorStore;
 import org.springframework.stereotype.Service;
 
+import java.util.List;
+import java.util.stream.Collectors;
+
 @Service
 @RequiredArgsConstructor
 public class ChatbotService {
 
+    private static final String PROMPT = """
+            [참고 문서]
+            %s
+            
+            [고객 질문]
+            %s
+            """;
+
     private final ChatClient chatClient;
+    private final VectorStore vectorStore;
 
     public ChatbotResult chat(String question) {
+        String context = searchRelevantDocuments(question);
+        String userMessage = PROMPT.formatted(context, question);
+
         ChatResponse aiResponse = chatClient.prompt()
-                .user(question)
+                .user(userMessage)
                 .call()
                 .chatResponse();
-
         return ChatbotResult.of(aiResponse);
     }
 
+    private String searchRelevantDocuments(String question) {
+        List<Document> docs = vectorStore.similaritySearch(
+                SearchRequest.builder()
+                        .query(question)
+                        .topK(5)
+                        .build()
+        );
+
+        return docs.stream()
+                .map(Document::getText)
+                .collect(Collectors.joining("\n\n"));
+    }
+
 }
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/DocumentLoader.java b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/DocumentLoader.java
new file mode 100644
index 0000000..71f0937
--- /dev/null
+++ b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/DocumentLoader.java
@@ -0,0 +1,67 @@
+package com.cholog.bootcamp.chatbot.infrastructure;
+
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.ai.document.Document;
+import org.springframework.ai.reader.TextReader;
+import org.springframework.core.io.Resource;
+import org.springframework.core.io.support.PathMatchingResourcePatternResolver;
+import org.springframework.stereotype.Component;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+@Slf4j
+@Component
+public class DocumentLoader {
+
+    private static final String FAQ_PATTERN = "file:data/layer1_faq/*.md";
+    private static final String POLICY_PATTERN = "file:data/layer2_policies/current/*.md";
+    private static final String LAYER_FAQ = "faq";
+    private static final String LAYER_POLICY = "policy";
+
+    private final PathMatchingResourcePatternResolver resolver =
+            new PathMatchingResourcePatternResolver();
+
+    public List<Document> loadFaq() {
+        return load(FAQ_PATTERN, LAYER_FAQ);
+    }
+
+    public List<Document> loadPolicies() {
+        return load(POLICY_PATTERN, LAYER_POLICY);
+    }
+
+    private List<Document> load(String pattern, String layer) {
+        try {
+            Resource[] resources = resolver.getResources(pattern);
+
+            if (resources.length == 0) {
+                log.warn("문서 없음: layer={}", layer);
+                return List.of();
+            }
+
+            List<Document> result = new ArrayList<>();
+            for (Resource resource : resources) {
+                result.addAll(toDocuments(resource, layer));
+            }
+
+            log.info("문서 로드 완료: layer={}, 총 {}개", layer, result.size());
+            return result;
+
+        } catch (IOException e) {
+            throw new IllegalStateException("문서 로딩 실패: pattern=" + pattern, e);
+        }
+    }
+
+    private List<Document> toDocuments(Resource resource, String layer) {
+        List<Document> docs = new TextReader(resource).get();
+        docs.forEach(doc -> attachMetadata(doc, resource, layer));
+        log.debug("파일 로드: {}", resource.getFilename());
+        return docs;
+    }
+
+    private void attachMetadata(Document doc, Resource resource, String layer) {
+        doc.getMetadata().put("source", resource.getFilename());
+        doc.getMetadata().put("layer", layer);
+    }
+}
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/VectorStoreInitializer.java b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/VectorStoreInitializer.java
new file mode 100644
index 0000000..c73584d
--- /dev/null
+++ b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/VectorStoreInitializer.java
@@ -0,0 +1,40 @@
+package com.cholog.bootcamp.chatbot.infrastructure;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.ai.document.Document;
+import org.springframework.ai.vectorstore.VectorStore;
+import org.springframework.boot.ApplicationArguments;
+import org.springframework.boot.ApplicationRunner;
+import org.springframework.stereotype.Component;
+
+import java.util.ArrayList;
+import java.util.List;
+
+@Slf4j
+@Component
+@RequiredArgsConstructor
+public class VectorStoreInitializer implements ApplicationRunner {
+
+    private final DocumentLoader documentLoader;
+    private final VectorStore vectorStore;
+
+    @Override
+    public void run(ApplicationArguments args) {
+        List<Document> faqDocs = documentLoader.loadFaq();
+        List<Document> policyDocs = documentLoader.loadPolicies();
+
+        List<Document> all = new ArrayList<>();
+        all.addAll(faqDocs);
+        all.addAll(policyDocs);
+
+        if (all.isEmpty()) {
+            log.warn("적재할 문서 없음. data/ 폴더 확인 필요");
+            return;
+        }
+
+        vectorStore.add(all);
+        log.info("임베딩 완료: 총 {}개 (faq={}, policy={})",
+                all.size(), faqDocs.size(), policyDocs.size());
+    }
+}

From 7d129147a7a59f6047a503e98bec51cd99bd3e78 Mon Sep 17 00:00:00 2001
From: chxghee <francis001021@gmail.com>
Date: Mon, 11 May 2026 22:55:12 +0900
Subject: [PATCH 06/20] =?UTF-8?q?test:=20=ED=8F=89=EA=B0=80=20=EC=8A=A4?=
 =?UTF-8?q?=ED=81=AC=EB=A6=BD=ED=8A=B8=EC=97=90=20=ED=86=A0=ED=81=B0=20?=
 =?UTF-8?q?=EC=82=AC=EC=9A=A9=EB=9F=89=20=EC=A7=91=EA=B3=84=20=EC=B6=94?=
 =?UTF-8?q?=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data/evaluate.py | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/data/evaluate.py b/data/evaluate.py
index ed941cd..447afe5 100644
--- a/data/evaluate.py
+++ b/data/evaluate.py
@@ -70,7 +70,7 @@ def ask_server(question: str) -> dict | None:
 # ─── LLM 판정 ─────────────────────────────────────────────────────────────────
 
 def judge_answer(question: str, expected: str, actual: str) -> dict:
-    """LLM으로 답변의 사실적 일치도를 판정합니다."""
+    """LLM으로 답변의 사실적 일치도를 판정합니다. usage 포함하여 반환."""
     prompt = f"""당신은 FAQ 챗봇 답변의 품질을 평가하는 판정자입니다.
 
 질문: {question}
@@ -96,10 +96,18 @@ def judge_answer(question: str, expected: str, actual: str) -> dict:
         response_format={"type": "json_object"},
     )
 
+    usage = resp.usage
     try:
-        return json.loads(resp.choices[0].message.content)
+        result = json.loads(resp.choices[0].message.content)
     except json.JSONDecodeError:
-        return {"score": 0, "reason": "판정 파싱 실패"}
+        result = {"score": 0, "reason": "판정 파싱 실패"}
+
+    result["judge_usage"] = {
+        "prompt_tokens": usage.prompt_tokens,
+        "completion_tokens": usage.completion_tokens,
+        "total_tokens": usage.total_tokens,
+    }
+    return result
 
 
 # ─── 메인 ─────────────────────────────────────────────────────────────────────
@@ -133,6 +141,7 @@ def main():
 
     results = {"correct": 0, "incorrect": 0, "error": 0}
     tier_results = {}
+    chatbot_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
     start_time = time.time()
 
     for i, q in enumerate(questions):
@@ -154,6 +163,10 @@ def main():
             continue
 
         actual_answer = response.get("answer", "")
+        token_usage = response.get("tokenUsage", {})
+        chatbot_usage["prompt_tokens"] += token_usage.get("promptTokens", 0)
+        chatbot_usage["completion_tokens"] += token_usage.get("completionTokens", 0)
+        chatbot_usage["total_tokens"] += token_usage.get("totalTokens", 0)
 
         # LLM 판정
         judgment = judge_answer(question_ko, expected, actual_answer)
@@ -197,6 +210,12 @@ def main():
     print(f"\n소요 시간: {elapsed:.1f}초")
     print(f"평균 응답: {elapsed/max(total,1):.1f}초/질문")
 
+    evaluated = total - results["error"]
+    print(f"\n=== 챗봇 토큰 사용량 ===")
+    print(f"  prompt    : 합계 {chatbot_usage['prompt_tokens']:,} / 평균 {chatbot_usage['prompt_tokens']//max(evaluated,1):,}")
+    print(f"  completion: 합계 {chatbot_usage['completion_tokens']:,} / 평균 {chatbot_usage['completion_tokens']//max(evaluated,1):,}")
+    print(f"  total     : 합계 {chatbot_usage['total_tokens']:,} / 평균 {chatbot_usage['total_tokens']//max(evaluated,1):,}")
+
     # 결과 저장
     result_file = DATA_DIR / "eval_result.json"
     with open(result_file, "w") as f:
@@ -208,6 +227,7 @@ def main():
             "accuracy": results["correct"] / max(total, 1),
             "tier_results": tier_results,
             "elapsed_seconds": elapsed,
+            "chatbot_token_usage": chatbot_usage,
         }, f, indent=2, ensure_ascii=False)
     print(f"\n결과 저장: {result_file}")
 

From ff47bdfc0356db0fa29ba4de66494cafc601ede5 Mon Sep 17 00:00:00 2001
From: chxghee <francis001021@gmail.com>
Date: Thu, 14 May 2026 21:11:30 +0900
Subject: [PATCH 07/20] =?UTF-8?q?feat:=20v2=20300=EC=82=AC=EC=9D=B4?=
 =?UTF-8?q?=EC=A6=88=20=EC=B2=AD=ED=82=B9=20=EA=B5=AC=ED=98=84=20=EB=B0=8F?=
 =?UTF-8?q?=20=ED=8F=89=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data/eval_result.json                           | 17 +++++++++++------
 .../chatbot/infrastructure/DocumentLoader.java  | 17 ++++++++++++-----
 2 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/data/eval_result.json b/data/eval_result.json
index 35d6e84..cbe2c23 100644
--- a/data/eval_result.json
+++ b/data/eval_result.json
@@ -1,16 +1,16 @@
 {
   "total": 150,
-  "correct": 80,
-  "incorrect": 70,
+  "correct": 72,
+  "incorrect": 78,
   "error": 0,
-  "accuracy": 0.5333333333333333,
+  "accuracy": 0.48,
   "tier_results": {
     "easy": {
-      "correct": 18,
+      "correct": 14,
       "total": 30
     },
     "medium": {
-      "correct": 51,
+      "correct": 47,
       "total": 94
     },
     "hard": {
@@ -18,5 +18,10 @@
       "total": 26
     }
   },
-  "elapsed_seconds": 664.7394919395447
+  "elapsed_seconds": 938.9365682601929,
+  "chatbot_token_usage": {
+    "prompt_tokens": 177714,
+    "completion_tokens": 18674,
+    "total_tokens": 196388
+  }
 }
\ No newline at end of file
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/DocumentLoader.java b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/DocumentLoader.java
index 71f0937..dda9f7a 100644
--- a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/DocumentLoader.java
+++ b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/DocumentLoader.java
@@ -3,6 +3,7 @@
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.ai.document.Document;
 import org.springframework.ai.reader.TextReader;
+import org.springframework.ai.transformer.splitter.TokenTextSplitter;
 import org.springframework.core.io.Resource;
 import org.springframework.core.io.support.PathMatchingResourcePatternResolver;
 import org.springframework.stereotype.Component;
@@ -20,8 +21,13 @@ public class DocumentLoader {
     private static final String LAYER_FAQ = "faq";
     private static final String LAYER_POLICY = "policy";
 
+    private static final int CHUNK_SIZE = 300;
+    private static final int OVERLAP_SIZE = 50;
+
     private final PathMatchingResourcePatternResolver resolver =
             new PathMatchingResourcePatternResolver();
+    private final TokenTextSplitter splitter =
+            new TokenTextSplitter(CHUNK_SIZE, OVERLAP_SIZE, 5, 10000, true);
 
     public List<Document> loadFaq() {
         return load(FAQ_PATTERN, LAYER_FAQ);
@@ -45,7 +51,7 @@ private List<Document> load(String pattern, String layer) {
                 result.addAll(toDocuments(resource, layer));
             }
 
-            log.info("문서 로드 완료: layer={}, 총 {}개", layer, result.size());
+            log.info("문서 로드 완료: layer={}, 총 {}개 청크", layer, result.size());
             return result;
 
         } catch (IOException e) {
@@ -54,10 +60,11 @@ private List<Document> load(String pattern, String layer) {
     }
 
     private List<Document> toDocuments(Resource resource, String layer) {
-        List<Document> docs = new TextReader(resource).get();
-        docs.forEach(doc -> attachMetadata(doc, resource, layer));
-        log.debug("파일 로드: {}", resource.getFilename());
-        return docs;
+        List<Document> raw = new TextReader(resource).get();
+        List<Document> chunks = splitter.apply(raw);
+        chunks.forEach(doc -> attachMetadata(doc, resource, layer));
+        log.debug("파일 청킹: {} → {}개 청크", resource.getFilename(), chunks.size());
+        return chunks;
     }
 
     private void attachMetadata(Document doc, Resource resource, String layer) {

From eb5799bcdce1adb8a51384e1deda069b27905d1c Mon Sep 17 00:00:00 2001
From: chxghee <francis001021@gmail.com>
Date: Thu, 14 May 2026 21:33:58 +0900
Subject: [PATCH 08/20] =?UTF-8?q?refactor:=20=ED=8F=89=EA=B0=80=20?=
 =?UTF-8?q?=EB=B3=91=EB=A0=AC=20=EC=8B=A4=ED=96=89=20=EA=B8=B0=EB=8A=A5=20?=
 =?UTF-8?q?=EC=B6=94=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data/evaluate.py | 148 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 95 insertions(+), 53 deletions(-)

diff --git a/data/evaluate.py b/data/evaluate.py
index 447afe5..3d08446 100644
--- a/data/evaluate.py
+++ b/data/evaluate.py
@@ -11,8 +11,9 @@
 실행:
   # 서버가 localhost:8080에서 실행 중이어야 합니다
   .venv/bin/python evaluate.py
-  .venv/bin/python evaluate.py --verbose    # 질문별 상세 출력
-  .venv/bin/python evaluate.py --limit 10   # 처음 10개만 평가
+  .venv/bin/python evaluate.py --verbose       # 질문별 상세 출력
+  .venv/bin/python evaluate.py --limit 10      # 처음 10개만 평가
+  .venv/bin/python evaluate.py --parallel 10   # 병렬 워커 10개로 가속
 
 비용:
   judge 모델(gpt-4o-mini) 사용, 100문항 기준 약 $0.3~0.5
@@ -22,6 +23,7 @@
 import os
 import argparse
 import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
 
 import requests
@@ -110,15 +112,46 @@ def judge_answer(question: str, expected: str, actual: str) -> dict:
     return result
 
 
+# ─── 워커 ─────────────────────────────────────────────────────────────────────
+
+def process_question(q: dict, idx: int) -> dict:
+    """질문 1건을 처리해 결과 dict를 반환합니다. (스레드 안전)"""
+    start = time.time()
+    qid = q.get("id", f"Q{idx+1}")
+    question_ko = q["question_ko"]
+    expected = q["expected_answer"]
+    tier = q.get("tier", "unknown")
+
+    response = ask_server(question_ko)
+    if response is None:
+        return {"qid": qid, "tier": tier, "status": "error", "question": question_ko,
+                "token_usage": {}, "duration": time.time() - start}
+
+    actual_answer = response.get("answer", "")
+    token_usage = response.get("tokenUsage", {})
+    judgment = judge_answer(question_ko, expected, actual_answer)
+
+    return {
+        "qid": qid,
+        "tier": tier,
+        "status": "ok",
+        "score": judgment.get("score", 0),
+        "reason": judgment.get("reason", ""),
+        "question": question_ko,
+        "token_usage": token_usage,
+        "duration": time.time() - start,
+    }
+
+
 # ─── 메인 ─────────────────────────────────────────────────────────────────────
 
 def main():
     parser = argparse.ArgumentParser(description="챗봇 품질 평가")
     parser.add_argument("--verbose", action="store_true", help="질문별 상세 출력")
     parser.add_argument("--limit", type=int, default=0, help="평가할 질문 수 제한 (0=전체)")
+    parser.add_argument("--parallel", type=int, default=1, help="병렬 워커 수 (default: 1, 순차 실행)")
     args = parser.parse_args()
 
-    # 테스트 질문 로드
     questions_path = DATA_DIR / "test_questions.json"
     with open(questions_path) as f:
         questions = json.load(f)
@@ -130,9 +163,10 @@ def main():
     print(f"서버: {SERVER_URL}")
     print(f"질문 수: {len(questions)}")
     print(f"판정 모델: {JUDGE_MODEL}")
+    if args.parallel > 1:
+        print(f"병렬 워커: {args.parallel}")
     print()
 
-    # 서버 연결 확인
     test_resp = ask_server("test")
     if test_resp is None:
         print("서버에 연결할 수 없습니다. 서버가 실행 중인지 확인하세요:")
@@ -142,56 +176,29 @@ def main():
     results = {"correct": 0, "incorrect": 0, "error": 0}
     tier_results = {}
     chatbot_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
+    durations = []
     start_time = time.time()
 
-    for i, q in enumerate(questions):
-        qid = q.get("id", f"Q{i+1}")
-        question_ko = q["question_ko"]
-        expected = q["expected_answer"]
-        tier = q.get("tier", "unknown")
-
-        if tier not in tier_results:
-            tier_results[tier] = {"correct": 0, "total": 0}
-        tier_results[tier]["total"] += 1
-
-        # 서버에 질문
-        response = ask_server(question_ko)
-        if response is None:
-            results["error"] += 1
-            if args.verbose:
-                print(f"[{qid}] ERROR — 서버 응답 없음")
-            continue
-
-        actual_answer = response.get("answer", "")
-        token_usage = response.get("tokenUsage", {})
-        chatbot_usage["prompt_tokens"] += token_usage.get("promptTokens", 0)
-        chatbot_usage["completion_tokens"] += token_usage.get("completionTokens", 0)
-        chatbot_usage["total_tokens"] += token_usage.get("totalTokens", 0)
-
-        # LLM 판정
-        judgment = judge_answer(question_ko, expected, actual_answer)
-        score = judgment.get("score", 0)
-
-        if score == 1:
-            results["correct"] += 1
-            tier_results[tier]["correct"] += 1
-            marker = "✓"
-        else:
-            results["incorrect"] += 1
-            marker = "✗"
-
-        if args.verbose:
-            print(f"[{qid}] {marker} ({tier}) {question_ko[:40]}...")
-            if score == 0:
-                print(f"        이유: {judgment.get('reason', '')[:80]}")
-
-        # 진행률 (10개마다)
-        if not args.verbose and (i + 1) % 10 == 0:
-            print(f"  진행: {i+1}/{len(questions)}")
-
-    # 결과 출력
+    # ─── 실행 (순차 / 병렬 공통 집계) ────────────────────────────────────────
+    if args.parallel > 1:
+        with ThreadPoolExecutor(max_workers=args.parallel) as executor:
+            futures = [executor.submit(process_question, q, i) for i, q in enumerate(questions)]
+            for completed, fut in enumerate(as_completed(futures), 1):
+                r = fut.result()
+                _aggregate(r, results, tier_results, chatbot_usage, durations, args.verbose)
+                if not args.verbose and completed % 10 == 0:
+                    print(f"  진행: {completed}/{len(questions)}")
+    else:
+        for i, q in enumerate(questions):
+            r = process_question(q, i)
+            _aggregate(r, results, tier_results, chatbot_usage, durations, args.verbose)
+            if not args.verbose and (i + 1) % 10 == 0:
+                print(f"  진행: {i+1}/{len(questions)}")
+
+    # ─── 결과 출력 ────────────────────────────────────────────────────────────
     elapsed = time.time() - start_time
     total = results["correct"] + results["incorrect"] + results["error"]
+    evaluated = total - results["error"]
 
     print()
     print(f"=== 평가 결과 ===")
@@ -208,15 +215,14 @@ def main():
         print(f"\n  에러: {results['error']}건")
 
     print(f"\n소요 시간: {elapsed:.1f}초")
-    print(f"평균 응답: {elapsed/max(total,1):.1f}초/질문")
+    if durations:
+        print(f"평균 응답: {sum(durations)/len(durations):.1f}초/질문")
 
-    evaluated = total - results["error"]
     print(f"\n=== 챗봇 토큰 사용량 ===")
     print(f"  prompt    : 합계 {chatbot_usage['prompt_tokens']:,} / 평균 {chatbot_usage['prompt_tokens']//max(evaluated,1):,}")
     print(f"  completion: 합계 {chatbot_usage['completion_tokens']:,} / 평균 {chatbot_usage['completion_tokens']//max(evaluated,1):,}")
     print(f"  total     : 합계 {chatbot_usage['total_tokens']:,} / 평균 {chatbot_usage['total_tokens']//max(evaluated,1):,}")
 
-    # 결과 저장
     result_file = DATA_DIR / "eval_result.json"
     with open(result_file, "w") as f:
         json.dump({
@@ -227,10 +233,46 @@ def main():
             "accuracy": results["correct"] / max(total, 1),
             "tier_results": tier_results,
             "elapsed_seconds": elapsed,
+            "avg_response_seconds": (sum(durations) / len(durations)) if durations else 0,
             "chatbot_token_usage": chatbot_usage,
         }, f, indent=2, ensure_ascii=False)
     print(f"\n결과 저장: {result_file}")
 
+def _aggregate(r: dict, results: dict, tier_results: dict, chatbot_usage: dict,
+               durations: list, verbose: bool):
+    """process_question 결과 1건을 집계합니다."""
+    tier = r["tier"]
+    durations.append(r["duration"])
+
+    if tier not in tier_results:
+        tier_results[tier] = {"correct": 0, "total": 0}
+    tier_results[tier]["total"] += 1
+
+    if r["status"] == "error":
+        results["error"] += 1
+        if verbose:
+            print(f"[{r['qid']}] ERROR — 서버 응답 없음")
+        return
+
+    token_usage = r["token_usage"]
+    chatbot_usage["prompt_tokens"] += token_usage.get("promptTokens", 0)
+    chatbot_usage["completion_tokens"] += token_usage.get("completionTokens", 0)
+    chatbot_usage["total_tokens"] += token_usage.get("totalTokens", 0)
+
+    score = r["score"]
+    if score == 1:
+        results["correct"] += 1
+        tier_results[tier]["correct"] += 1
+        marker = "✓"
+    else:
+        results["incorrect"] += 1
+        marker = "✗"
+
+    if verbose:
+        print(f"[{r['qid']}] {marker} ({tier}) {r['question'][:40]}...")
+        if score == 0:
+            print(f"        이유: {r['reason'][:80]}")
+
 
 if __name__ == "__main__":
     main()

From 6ef60bddf4bdc319282805f96df0927234965028 Mon Sep 17 00:00:00 2001
From: chxghee <francis001021@gmail.com>
Date: Thu, 14 May 2026 21:45:52 +0900
Subject: [PATCH 09/20] =?UTF-8?q?feat:=20v3=20markdown=20=ED=97=A4?=
 =?UTF-8?q?=EB=8D=94=20=EB=B3=84=20=EC=B2=AD=ED=82=B9=20=EC=A0=84=EB=9E=B5?=
 =?UTF-8?q?=20=EA=B5=AC=ED=98=84=20=EB=B0=8F=20=ED=8F=89=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data/eval_result.json                         | 19 ++++++++-------
 .../infrastructure/DocumentLoader.java        | 22 ++++++++---------
 .../MarkdownHeadingSplitter.java              | 24 +++++++++++++++++++
 3 files changed, 44 insertions(+), 21 deletions(-)
 create mode 100644 src/main/java/com/cholog/bootcamp/chatbot/infrastructure/MarkdownHeadingSplitter.java

diff --git a/data/eval_result.json b/data/eval_result.json
index cbe2c23..f8919dd 100644
--- a/data/eval_result.json
+++ b/data/eval_result.json
@@ -1,27 +1,28 @@
 {
   "total": 150,
-  "correct": 72,
-  "incorrect": 78,
+  "correct": 69,
+  "incorrect": 81,
   "error": 0,
-  "accuracy": 0.48,
+  "accuracy": 0.46,
   "tier_results": {
     "easy": {
       "correct": 14,
       "total": 30
     },
     "medium": {
-      "correct": 47,
+      "correct": 46,
       "total": 94
     },
     "hard": {
-      "correct": 11,
+      "correct": 9,
       "total": 26
     }
   },
-  "elapsed_seconds": 938.9365682601929,
+  "elapsed_seconds": 60.47133016586304,
+  "avg_response_seconds": 3.8450408871968587,
   "chatbot_token_usage": {
-    "prompt_tokens": 177714,
-    "completion_tokens": 18674,
-    "total_tokens": 196388
+    "prompt_tokens": 70729,
+    "completion_tokens": 18003,
+    "total_tokens": 88732
   }
 }
\ No newline at end of file
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/DocumentLoader.java b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/DocumentLoader.java
index dda9f7a..5434ccc 100644
--- a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/DocumentLoader.java
+++ b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/DocumentLoader.java
@@ -3,7 +3,6 @@
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.ai.document.Document;
 import org.springframework.ai.reader.TextReader;
-import org.springframework.ai.transformer.splitter.TokenTextSplitter;
 import org.springframework.core.io.Resource;
 import org.springframework.core.io.support.PathMatchingResourcePatternResolver;
 import org.springframework.stereotype.Component;
@@ -21,23 +20,20 @@ public class DocumentLoader {
     private static final String LAYER_FAQ = "faq";
     private static final String LAYER_POLICY = "policy";
 
-    private static final int CHUNK_SIZE = 300;
-    private static final int OVERLAP_SIZE = 50;
-
     private final PathMatchingResourcePatternResolver resolver =
             new PathMatchingResourcePatternResolver();
-    private final TokenTextSplitter splitter =
-            new TokenTextSplitter(CHUNK_SIZE, OVERLAP_SIZE, 5, 10000, true);
+    private final MarkdownHeadingSplitter faqSplitter = new MarkdownHeadingSplitter("###");
+    private final MarkdownHeadingSplitter policySplitter = new MarkdownHeadingSplitter("##");
 
     public List<Document> loadFaq() {
-        return load(FAQ_PATTERN, LAYER_FAQ);
+        return load(FAQ_PATTERN, LAYER_FAQ, faqSplitter);
     }
 
     public List<Document> loadPolicies() {
-        return load(POLICY_PATTERN, LAYER_POLICY);
+        return load(POLICY_PATTERN, LAYER_POLICY, policySplitter);
     }
 
-    private List<Document> load(String pattern, String layer) {
+    private List<Document> load(String pattern, String layer, MarkdownHeadingSplitter splitter) {
         try {
             Resource[] resources = resolver.getResources(pattern);
 
@@ -48,7 +44,7 @@ private List<Document> load(String pattern, String layer) {
 
             List<Document> result = new ArrayList<>();
             for (Resource resource : resources) {
-                result.addAll(toDocuments(resource, layer));
+                result.addAll(toDocuments(resource, layer, splitter));
             }
 
             log.info("문서 로드 완료: layer={}, 총 {}개 청크", layer, result.size());
@@ -59,9 +55,11 @@ private List<Document> load(String pattern, String layer) {
         }
     }
 
-    private List<Document> toDocuments(Resource resource, String layer) {
+    private List<Document> toDocuments(Resource resource, String layer, MarkdownHeadingSplitter splitter) {
         List<Document> raw = new TextReader(resource).get();
-        List<Document> chunks = splitter.apply(raw);
+        List<Document> chunks = raw.stream()
+                .flatMap(doc -> splitter.split(doc).stream())
+                .toList();
         chunks.forEach(doc -> attachMetadata(doc, resource, layer));
         log.debug("파일 청킹: {} → {}개 청크", resource.getFilename(), chunks.size());
         return chunks;
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/MarkdownHeadingSplitter.java b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/MarkdownHeadingSplitter.java
new file mode 100644
index 0000000..a54c174
--- /dev/null
+++ b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/MarkdownHeadingSplitter.java
@@ -0,0 +1,24 @@
+package com.cholog.bootcamp.chatbot.infrastructure;
+
+import org.springframework.ai.document.Document;
+
+import java.util.Arrays;
+import java.util.List;
+
+public class MarkdownHeadingSplitter {
+
+    private final String heading;
+
+    public MarkdownHeadingSplitter(String heading) {
+        this.heading = heading;
+    }
+
+    public List<Document> split(Document document) {
+        String[] sections = document.getText().split("(?m)^(?=" + heading + " )");
+        return Arrays.stream(sections)
+                .map(String::strip)
+                .filter(s -> s.startsWith(heading))
+                .map(Document::new)
+                .toList();
+    }
+}
\ No newline at end of file

From 7bf00bc71426587e5a5917db8a7bc70f601f3104 Mon Sep 17 00:00:00 2001
From: chxghee <francis001021@gmail.com>
Date: Thu, 14 May 2026 22:16:34 +0900
Subject: [PATCH 10/20] =?UTF-8?q?feat:=20v4=20=EC=B2=AD=ED=82=B9=20?=
 =?UTF-8?q?=EB=8B=A8=EC=9C=84=EB=B3=84=20=EC=B9=B4=ED=85=8C=EA=B3=A0?=
 =?UTF-8?q?=EB=A6=AC(=EC=A0=9C=EB=AA=A9)=20=EC=B6=94=EA=B0=80=20=EB=B0=8F?=
 =?UTF-8?q?=20=ED=8F=89=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data/eval_result.json                         | 22 +++++++++----------
 .../MarkdownHeadingSplitter.java              | 14 ++++++++++--
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/data/eval_result.json b/data/eval_result.json
index f8919dd..8729b86 100644
--- a/data/eval_result.json
+++ b/data/eval_result.json
@@ -1,28 +1,28 @@
 {
   "total": 150,
-  "correct": 69,
-  "incorrect": 81,
+  "correct": 77,
+  "incorrect": 73,
   "error": 0,
-  "accuracy": 0.46,
+  "accuracy": 0.5133333333333333,
   "tier_results": {
     "easy": {
-      "correct": 14,
+      "correct": 13,
       "total": 30
     },
     "medium": {
-      "correct": 46,
+      "correct": 53,
       "total": 94
     },
     "hard": {
-      "correct": 9,
+      "correct": 11,
       "total": 26
     }
   },
-  "elapsed_seconds": 60.47133016586304,
-  "avg_response_seconds": 3.8450408871968587,
+  "elapsed_seconds": 51.745585918426514,
+  "avg_response_seconds": 3.345907696088155,
   "chatbot_token_usage": {
-    "prompt_tokens": 70729,
-    "completion_tokens": 18003,
-    "total_tokens": 88732
+    "prompt_tokens": 75505,
+    "completion_tokens": 17780,
+    "total_tokens": 93285
   }
 }
\ No newline at end of file
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/MarkdownHeadingSplitter.java b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/MarkdownHeadingSplitter.java
index a54c174..c50cf26 100644
--- a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/MarkdownHeadingSplitter.java
+++ b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/MarkdownHeadingSplitter.java
@@ -14,11 +14,21 @@ public MarkdownHeadingSplitter(String heading) {
     }
 
     public List<Document> split(Document document) {
-        String[] sections = document.getText().split("(?m)^(?=" + heading + " )");
+        String text = document.getText();
+        String title = extractTitle(text);
+
+        String[] sections = text.split("(?m)^(?=" + heading + " )");
         return Arrays.stream(sections)
                 .map(String::strip)
                 .filter(s -> s.startsWith(heading))
-                .map(Document::new)
+                .map(s -> new Document(title + "\n" + s))
                 .toList();
     }
+
+    private String extractTitle(String text) {
+        return text.lines()
+                .filter(line -> line.startsWith("# "))
+                .findFirst()
+                .orElse("");
+    }
 }
\ No newline at end of file

From f6ee7156d8e86b840c64477cc3814a70fe6f7908 Mon Sep 17 00:00:00 2001
From: chxghee <francis001021@gmail.com>
Date: Sun, 17 May 2026 18:14:54 +0900
Subject: [PATCH 11/20] =?UTF-8?q?feat;=20v5=20=EC=8B=9C=EC=8A=A4=ED=85=9C?=
 =?UTF-8?q?=20=ED=94=84=EB=A1=AC=ED=94=84=ED=8A=B8=20=EC=88=98=EC=A0=95=20?=
 =?UTF-8?q?=EB=B0=8F=20topk=20=EC=A1=B0=EC=A0=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data/eval_result.json                         | 22 +++++++++----------
 data/evaluate.py                              | 21 ++++++++++++------
 .../chatbot/application/ChatbotService.java   | 15 ++++++++++++-
 .../com/cholog/bootcamp/config/AiConfig.java  | 22 ++++++++++++++++++-
 4 files changed, 60 insertions(+), 20 deletions(-)

diff --git a/data/eval_result.json b/data/eval_result.json
index 8729b86..7cf21aa 100644
--- a/data/eval_result.json
+++ b/data/eval_result.json
@@ -1,28 +1,28 @@
 {
   "total": 150,
-  "correct": 77,
-  "incorrect": 73,
+  "correct": 85,
+  "incorrect": 65,
   "error": 0,
-  "accuracy": 0.5133333333333333,
+  "accuracy": 0.5666666666666667,
   "tier_results": {
     "easy": {
-      "correct": 13,
+      "correct": 22,
       "total": 30
     },
     "medium": {
-      "correct": 53,
+      "correct": 51,
       "total": 94
     },
     "hard": {
-      "correct": 11,
+      "correct": 12,
       "total": 26
     }
   },
-  "elapsed_seconds": 51.745585918426514,
-  "avg_response_seconds": 3.345907696088155,
+  "elapsed_seconds": 37.96412801742554,
+  "avg_response_seconds": 2.4435594256718955,
   "chatbot_token_usage": {
-    "prompt_tokens": 75505,
-    "completion_tokens": 17780,
-    "total_tokens": 93285
+    "prompt_tokens": 183020,
+    "completion_tokens": 7544,
+    "total_tokens": 190564
   }
 }
\ No newline at end of file
diff --git a/data/evaluate.py b/data/evaluate.py
index 3d08446..ee0a0cb 100644
--- a/data/evaluate.py
+++ b/data/evaluate.py
@@ -74,17 +74,24 @@ def ask_server(question: str) -> dict | None:
 def judge_answer(question: str, expected: str, actual: str) -> dict:
     """LLM으로 답변의 사실적 일치도를 판정합니다. usage 포함하여 반환."""
     prompt = f"""당신은 FAQ 챗봇 답변의 품질을 평가하는 판정자입니다.
+제공된 [질문], [기대 답변], 그리고 챗봇의 [실제 답변]을 비교하여 [평가 기준]에 따라 정답 여부(정답:1/오답:0)를 판정하세요.
 
-질문: {question}
+[]질문]: {question}
 
-기대 답변 (정답): {expected}
+[기대 답변 (정답)]: {expected}
 
-실제 답변 (챗봇): {actual}
+[실제 답변 (챗봇)]: {actual}
+
+[평가 기준]
+실제 답변이 기대 답변의 팩트와 사실적으로 일치하는지 아래 기준에 따라 평가하세요.
+
+- 핵심 팩트 필수 일치 (1점): 실제 답변이 [질문]의 핵심에 대응하는 정확한 정답(수치나 내용)을 명확히 포함해야 합니다. 표현이 달라도 핵심 사실이 같다면 정답입니다.
+- 부가 정보의 선택적 허용: [기대 답변]에 적힌 주변 부가 정보(지급 시점, 사후 처리 등)를 실제 답변이 생략했더라도, 핵심 팩트가 맞다면 감점하지 마세요.
+- 감점 조항 (0점):
+  1. 질문에 대한 핵심 사실이 누락되었거나 틀린 경우.
+  2. 실제 답변이 부가 정보를 '선택적으로 제공'했으나, 그 내용이 [기대 답변]의 사실과 다를 경우.
+  3. 핵심 팩트 없이 부가 정보만 부분적으로 맞춘 경우.
 
-실제 답변이 기대 답변과 사실적으로 일치하는지 평가하세요.
-- 표현이 달라도 핵심 사실이 같으면 정답입니다
-- 핵심 사실이 빠져있거나 틀렸으면 오답입니다
-- 부분적으로만 맞으면 오답으로 처리하세요
 
 JSON으로만 응답하세요:
 {{"score": 1, "reason": "..."}}  (정답)
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java b/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
index cfa828f..456b95a 100644
--- a/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
+++ b/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
@@ -2,6 +2,7 @@
 
 import com.cholog.bootcamp.chatbot.application.dto.ChatbotResult;
 import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
 import org.springframework.ai.chat.client.ChatClient;
 
 import org.springframework.ai.chat.model.ChatResponse;
@@ -13,6 +14,7 @@
 import java.util.List;
 import java.util.stream.Collectors;
 
+@Slf4j
 @Service
 @RequiredArgsConstructor
 public class ChatbotService {
@@ -43,13 +45,24 @@ private String searchRelevantDocuments(String question) {
         List<Document> docs = vectorStore.similaritySearch(
                 SearchRequest.builder()
                         .query(question)
-                        .topK(5)
+                        .topK(8)
                         .build()
         );
 
+        //loggingSearchedDocs(question, docs);
+
         return docs.stream()
                 .map(Document::getText)
                 .collect(Collectors.joining("\n\n"));
     }
 
+    private static void loggingSearchedDocs(String question, List<Document> docs) {
+        log.info("=== [RAG] 검색된 문서 ({}개) for: {} ===", docs.size(), question);
+        for (int i = 0; i < docs.size(); i++) {
+            Document doc = docs.get(i);
+            String preview = doc.getText().substring(0, Math.min(120, doc.getText().length())).replace("\n", " ");
+            log.info("[{}] metadata={} | text={}", i + 1, doc.getMetadata(), preview);
+        }
+    }
+
 }
diff --git a/src/main/java/com/cholog/bootcamp/config/AiConfig.java b/src/main/java/com/cholog/bootcamp/config/AiConfig.java
index 75f87e6..2e91113 100644
--- a/src/main/java/com/cholog/bootcamp/config/AiConfig.java
+++ b/src/main/java/com/cholog/bootcamp/config/AiConfig.java
@@ -12,7 +12,27 @@ public class AiConfig {
 
     private static final String SYSTEM_PROMPT = """
             당신은 초록 코퍼레이션의 친절한 고객센터 상담원입니다.
-            [고객 질문]에 대해 [참고 문서]를 기반으로 답변하세요.
+            [참고 문서]를 기반으로 [고객 질문]에 대해 답변해야 하고, 아래의 [답변 구조 표준]과 [제약 사항]을 엄격히 준수하세요.
+            
+            [답변 전 내부 판단 단계]
+            1. 질문의 핵심 주제를 한 단어로 특정하세요.
+              예) "반품은 며칠 안에?" → 주제: '반품 신청 기간'
+            2. [참고 문서]에서 그 주제를 직접 다루는 섹션만 선별하세요.
+            3. 선별된 섹션만을 근거로 답변하세요.
+            
+            [답변 구조 표준]
+            반드시 다음 2단계 구조에 맞추어 두괄식으로 출력하세요. 각 단계 사이에는 줄바꿈(Enter)을 두세요.
+            1단계: 핵심 답변 (첫 문장)
+                - 사용자가 묻는 말에 대한 직구 정답을 한두 문장 이내로 가장 먼저 명확하게 답변하세요.
+            2단계: 절제된 추가 정보 (둘째 줄 - 선택 사항)
+                - [참고 문서]에 사용자의 질문과 밀접하게 연관된 유용한 팁, 혜택, 혹은 치명적인 예외 조항(ex: 특정 등급 제한, 마켓플레이스 예외)이 있다면 딱 1개만 핵심 요약하여 덧붙이세요.
+                - 연관된 추가 정보가 없거나 불확실하다면 2단계는 완전히 생략하고 1단계만 출력합니다.
+
+            [제약 사항]
+            - 문서에 없는 내용을 추측하거나 지어내어 답변하지 마세요.
+            - 문서에 없는 내용이거나 불확실하다면, 아는 척하지 말고 반드시 아래의 지정된 거절 문구만을 출력하세요.
+                - 거절 문구: "죄송합니다. 요청하신 정보는 정확한 안내가 어렵습니다. 고객센터로 문의해 주세요."
+            - 어조: 유저가 반말이나 구어체(`어케함?`, `언제옴ㅋ`)로 질문하더라도, 상담원은 흔들리지 않고 친절하 정중한 표준어(~입니다, ~합니다)를 유지하세요.
             """;
 
     @Bean

From 69227c274af8dd79257b24dd31952f36c503d8d2 Mon Sep 17 00:00:00 2001
From: chxghee <francis001021@gmail.com>
Date: Sun, 17 May 2026 19:00:20 +0900
Subject: [PATCH 12/20] =?UTF-8?q?feat:=20v6=20=ED=94=84=EB=A1=AC=ED=94=84?=
 =?UTF-8?q?=ED=8A=B8=20=EC=95=95=EC=B6=95=20=EB=B0=8F=20=EA=B3=BC=EB=8F=84?=
 =?UTF-8?q?=ED=95=9C=20=EB=8B=B5=EB=B3=80=20=EA=B1=B0=EC=A0=88=20=ED=94=84?=
 =?UTF-8?q?=EB=A1=AC=ED=94=84=ED=8A=B8=20=EA=B0=9C=EC=84=A0=20=EA=B5=AC?=
 =?UTF-8?q?=EC=96=B4=EC=B2=B4=20=EB=B3=80=ED=99=98=20=EA=B3=BC=EC=A0=95=20?=
 =?UTF-8?q?=EC=B6=94=EA=B0=80=20=EA=B3=A0=EA=B0=9D=EC=9D=98=20=ED=8B=80?=
 =?UTF-8?q?=EB=A6=B0=EC=A0=95=EB=B3=B4=20=EC=A0=9C=EA=B3=B5=20=EB=B0=A9?=
 =?UTF-8?q?=EC=96=B4=20=EC=B6=94=EA=B0=80(hard=20=EC=9A=A9)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data/eval_result.json                         | 22 ++++-----
 .../chatbot/application/ChatbotService.java   |  4 +-
 .../com/cholog/bootcamp/config/AiConfig.java  | 45 ++++++++++---------
 3 files changed, 36 insertions(+), 35 deletions(-)

diff --git a/data/eval_result.json b/data/eval_result.json
index 7cf21aa..d004e6a 100644
--- a/data/eval_result.json
+++ b/data/eval_result.json
@@ -1,28 +1,28 @@
 {
   "total": 150,
-  "correct": 85,
-  "incorrect": 65,
+  "correct": 110,
+  "incorrect": 40,
   "error": 0,
-  "accuracy": 0.5666666666666667,
+  "accuracy": 0.7333333333333333,
   "tier_results": {
     "easy": {
-      "correct": 22,
+      "correct": 25,
       "total": 30
     },
     "medium": {
-      "correct": 51,
+      "correct": 66,
       "total": 94
     },
     "hard": {
-      "correct": 12,
+      "correct": 19,
       "total": 26
     }
   },
-  "elapsed_seconds": 37.96412801742554,
-  "avg_response_seconds": 2.4435594256718955,
+  "elapsed_seconds": 40.24573802947998,
+  "avg_response_seconds": 2.556360289255778,
   "chatbot_token_usage": {
-    "prompt_tokens": 183020,
-    "completion_tokens": 7544,
-    "total_tokens": 190564
+    "prompt_tokens": 177297,
+    "completion_tokens": 10421,
+    "total_tokens": 187718
   }
 }
\ No newline at end of file
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java b/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
index 456b95a..72cfb44 100644
--- a/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
+++ b/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
@@ -57,11 +57,11 @@ private String searchRelevantDocuments(String question) {
     }
 
     private static void loggingSearchedDocs(String question, List<Document> docs) {
-        log.info("=== [RAG] 검색된 문서 ({}개) for: {} ===", docs.size(), question);
+        log.debug("=== [RAG] 검색된 문서 ({}개) for: {} ===", docs.size(), question);
         for (int i = 0; i < docs.size(); i++) {
             Document doc = docs.get(i);
             String preview = doc.getText().substring(0, Math.min(120, doc.getText().length())).replace("\n", " ");
-            log.info("[{}] metadata={} | text={}", i + 1, doc.getMetadata(), preview);
+            log.debug("[{}] metadata={} | text={}", i + 1, doc.getMetadata(), preview);
         }
     }
 
diff --git a/src/main/java/com/cholog/bootcamp/config/AiConfig.java b/src/main/java/com/cholog/bootcamp/config/AiConfig.java
index 2e91113..ddb272f 100644
--- a/src/main/java/com/cholog/bootcamp/config/AiConfig.java
+++ b/src/main/java/com/cholog/bootcamp/config/AiConfig.java
@@ -11,28 +11,29 @@
 public class AiConfig {
 
     private static final String SYSTEM_PROMPT = """
-            당신은 초록 코퍼레이션의 친절한 고객센터 상담원입니다.
-            [참고 문서]를 기반으로 [고객 질문]에 대해 답변해야 하고, 아래의 [답변 구조 표준]과 [제약 사항]을 엄격히 준수하세요.
-            
-            [답변 전 내부 판단 단계]
-            1. 질문의 핵심 주제를 한 단어로 특정하세요.
-              예) "반품은 며칠 안에?" → 주제: '반품 신청 기간'
-            2. [참고 문서]에서 그 주제를 직접 다루는 섹션만 선별하세요.
-            3. 선별된 섹션만을 근거로 답변하세요.
-            
-            [답변 구조 표준]
-            반드시 다음 2단계 구조에 맞추어 두괄식으로 출력하세요. 각 단계 사이에는 줄바꿈(Enter)을 두세요.
-            1단계: 핵심 답변 (첫 문장)
-                - 사용자가 묻는 말에 대한 직구 정답을 한두 문장 이내로 가장 먼저 명확하게 답변하세요.
-            2단계: 절제된 추가 정보 (둘째 줄 - 선택 사항)
-                - [참고 문서]에 사용자의 질문과 밀접하게 연관된 유용한 팁, 혜택, 혹은 치명적인 예외 조항(ex: 특정 등급 제한, 마켓플레이스 예외)이 있다면 딱 1개만 핵심 요약하여 덧붙이세요.
-                - 연관된 추가 정보가 없거나 불확실하다면 2단계는 완전히 생략하고 1단계만 출력합니다.
-
-            [제약 사항]
-            - 문서에 없는 내용을 추측하거나 지어내어 답변하지 마세요.
-            - 문서에 없는 내용이거나 불확실하다면, 아는 척하지 말고 반드시 아래의 지정된 거절 문구만을 출력하세요.
-                - 거절 문구: "죄송합니다. 요청하신 정보는 정확한 안내가 어렵습니다. 고객센터로 문의해 주세요."
-            - 어조: 유저가 반말이나 구어체(`어케함?`, `언제옴ㅋ`)로 질문하더라도, 상담원은 흔들리지 않고 친절하 정중한 표준어(~입니다, ~합니다)를 유지하세요.
+            당신은 초록 코퍼레이션의 고객센터 상담원입니다. [참고 문서]만을 근거로 답변하세요.
+
+            [답변 형식] 두괄식 2단계로 작성하세요.
+            - 1단계(필수): 핵심 정답을 한두 문장으로 직접 답변
+            - 2단계(선택): 밀접하게 연관된 예외/조건/팁이 있으면 1개만 추가, 없으면 생략
+
+            [답변 전 내부 처리]
+            1. 비격식 표현을 표준어로 해석 후 핵심 주제를 파악하세요.
+               예) "비번 어케바꿈?" → 비밀번호 변경 방법 / "반품 며칠까지?" → 반품 신청 기간
+            2. [참고 문서]에서 해당 주제를 직접 다루는 섹션을 찾아 그것만 근거로 답변하세요.
+
+            [특수 상황]
+            - 고객이 틀린 정보를 전제로 물을 때: 문서 기준으로 정중히 정정하세요.
+              예) Q: "[고객의 틀린 전제] 아닌가요?" → A: "현행 정책에 따르면 [참고 문서의 정확한 팩트]입니다."
+            - 고객이 "~라고 들었는데 맞나요?" 형태로 확인 요청 시: 현재 문서 기준을 안내하세요.
+              예) Q: "[과거 정보] 아닌가요?" → A: "현재 기준으로는 [참고 문서의 최신 기준]입니다."
+            - 과거 사례/다른 고객 경험 질문 시: 문서에 명시된 보상·처리 기준을 안내하세요.
+
+            [거절 기준] [참고 문서]에 관련 내용이 전혀 없는 경우에만 아래 문구를 사용하세요.
+            관련 내용이 있다면 반드시 그것을 근거로 답변하세요.
+            거절 문구: "죄송합니다. 요청하신 정보는 정확한 안내가 어렵습니다. 고객센터로 문의해 주세요."
+
+            [어조] 고객이 반말·구어체를 사용해도 상담원은 항상 친절하고 정중한 표준어(~입니다, ~합니다)를 유지하세요.
             """;
 
     @Bean

From 6028a9c3fb994937989c9f78b08f3107a4dca25a Mon Sep 17 00:00:00 2001
From: chxghee <francis001021@gmail.com>
Date: Wed, 20 May 2026 09:00:34 +0900
Subject: [PATCH 13/20] =?UTF-8?q?feat:=20=ED=8F=89=EA=B0=80=20=EC=8A=A4?=
 =?UTF-8?q?=ED=81=AC=EB=A6=BD=ED=8A=B8=20=EC=83=88=EB=A1=9C=EC=9A=B4=20KPI?=
 =?UTF-8?q?=20=EA=B8=B0=EC=A4=80=20=EB=B0=98=EC=98=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data/evaluate.py | 158 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 107 insertions(+), 51 deletions(-)

diff --git a/data/evaluate.py b/data/evaluate.py
index ee0a0cb..ea11014 100644
--- a/data/evaluate.py
+++ b/data/evaluate.py
@@ -2,7 +2,15 @@
 챗봇 품질 평가 스크립트
 
 실행 중인 서버(localhost:8080)에 테스트 질문을 보내고,
-LLM 판정으로 정확도를 측정합니다.
+LLM 판정으로 KPI 지표를 측정합니다.
+
+KPI 지표:
+  core_response  핵심 응답 성공  — 질문이 요구하는 정보에 직접 답변했는가
+  factuality     사실성          — 기대 답변과 모순되는 내용이 없는가
+  front_loaded   두괄식 응답     — 첫 문장에 직접 답변이 있는가
+  restraint      정보 절제력     — 부가 정보(논리 단위)가 1개 이하인가
+  conciseness    간결성          — 응답이 200자 이하인가 (Python 계산)
+  final_pass     최종 통과       — core_response × factuality
 
 사전 준비:
   python -m venv .venv
@@ -37,8 +45,8 @@
 
 SERVER_URL = "http://localhost:8080/api/chat"
 JUDGE_MODEL = "gpt-4o-mini"
+CONCISENESS_MAX_CHARS = 200
 
-# .env에서 API 키 로드
 env_path = ROOT_DIR / ".env"
 env_vars = dotenv_values(env_path)
 OPENAI_API_KEY = env_vars.get("OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY")
@@ -72,31 +80,38 @@ def ask_server(question: str) -> dict | None:
 # ─── LLM 판정 ─────────────────────────────────────────────────────────────────
 
 def judge_answer(question: str, expected: str, actual: str) -> dict:
-    """LLM으로 답변의 사실적 일치도를 판정합니다. usage 포함하여 반환."""
-    prompt = f"""당신은 FAQ 챗봇 답변의 품질을 평가하는 판정자입니다.
-제공된 [질문], [기대 답변], 그리고 챗봇의 [실제 답변]을 비교하여 [평가 기준]에 따라 정답 여부(정답:1/오답:0)를 판정하세요.
+    """LLM으로 4개 KPI 지표를 단일 호출로 판정합니다."""
+    prompt = f"""당신은 챗봇 답변 품질을 평가하는 판정자입니다.
 
-[]질문]: {question}
+[질문]: {question}
+[기대 답변]: {expected}
+[실제 답변]: {actual}
 
-[기대 답변 (정답)]: {expected}
+아래 4개 지표를 각각 판정하세요.
 
-[실제 답변 (챗봇)]: {actual}
+1. core_response — [질문]이 요구하는 정보에 실제 답변이 직접 응답했는가?
+  [질문]에서 사용자가 묻는 것(수치·기간·조건·방법 등)을 먼저 파악하세요.
+  [기대 답변]은 정답 팩트 확인 기준으로만 사용하세요.
+  1: 질문이 요구하는 정보에 직접 답변하며 [기대 답변]의 사실과 일치
+  0: 질문에 대한 답변 누락·오류, 또는 기대 답변이 있는데 거절한 경우
 
-[평가 기준]
-실제 답변이 기대 답변의 팩트와 사실적으로 일치하는지 아래 기준에 따라 평가하세요.
+2. factuality — 실제 답변 전체에 [기대 답변]과 모순되는 사실이 없는가?
+  1: [질문]에 대한 답변과 부가 정보 모두 [기대 답변]과 모순 없음
+  1: 거절 응답 (허위 정보 없음)
+  0: [기대 답변]의 사실과 충돌하는 내용 포함
 
-- 핵심 팩트 필수 일치 (1점): 실제 답변이 [질문]의 핵심에 대응하는 정확한 정답(수치나 내용)을 명확히 포함해야 합니다. 표현이 달라도 핵심 사실이 같다면 정답입니다.
-- 부가 정보의 선택적 허용: [기대 답변]에 적힌 주변 부가 정보(지급 시점, 사후 처리 등)를 실제 답변이 생략했더라도, 핵심 팩트가 맞다면 감점하지 마세요.
-- 감점 조항 (0점):
-  1. 질문에 대한 핵심 사실이 누락되었거나 틀린 경우.
-  2. 실제 답변이 부가 정보를 '선택적으로 제공'했으나, 그 내용이 [기대 답변]의 사실과 다를 경우.
-  3. 핵심 팩트 없이 부가 정보만 부분적으로 맞춘 경우.
+3. front_loaded — 첫 문장에 [질문]에 대한 직접 답변이 있는가?
+  1: 첫 문장에 질문이 요구하는 정보를 직접 전달
+  1: 거절 응답 (거절 의사가 첫 문장에 명확히 표현)
+  0: 서론·공감·확인 문구("안녕하세요", "좋은 질문이에요" 등)로 시작
 
+4. restraint — [질문]에 대한 직접 답변 외 부가 정보(논리 단위)가 1개 이하인가?
+  [질문]이 여러 항목을 묻는 경우, 각 항목의 답변은 직접 답변으로 간주 (부가 집계 제외)
+  1: 부가 정보 1개 이하
+  0: 부가 정보 2개 이상
 
-JSON으로만 응답하세요:
-{{"score": 1, "reason": "..."}}  (정답)
-{{"score": 0, "reason": "..."}}  (오답)
-"""
+JSON으로만 응답:
+{{"core_response":1,"factuality":1,"front_loaded":1,"restraint":1,"reasons":{{"core_response":"...","factuality":"...","front_loaded":"...","restraint":"..."}}}}"""
 
     resp = openai_client.chat.completions.create(
         model=JUDGE_MODEL,
@@ -109,7 +124,10 @@ def judge_answer(question: str, expected: str, actual: str) -> dict:
     try:
         result = json.loads(resp.choices[0].message.content)
     except json.JSONDecodeError:
-        result = {"score": 0, "reason": "판정 파싱 실패"}
+        result = {
+            "core_response": 0, "factuality": 0, "front_loaded": 0, "restraint": 0,
+            "reasons": {k: "판정 파싱 실패" for k in ("core_response", "factuality", "front_loaded", "restraint")},
+        }
 
     result["judge_usage"] = {
         "prompt_tokens": usage.prompt_tokens,
@@ -138,15 +156,25 @@ def process_question(q: dict, idx: int) -> dict:
     token_usage = response.get("tokenUsage", {})
     judgment = judge_answer(question_ko, expected, actual_answer)
 
+    core_response = judgment.get("core_response", 0)
+    factuality = judgment.get("factuality", 0)
+
     return {
         "qid": qid,
         "tier": tier,
         "status": "ok",
-        "score": judgment.get("score", 0),
-        "reason": judgment.get("reason", ""),
         "question": question_ko,
         "token_usage": token_usage,
         "duration": time.time() - start,
+        "kpi": {
+            "core_response": core_response,
+            "factuality": factuality,
+            "front_loaded": judgment.get("front_loaded", 0),
+            "restraint": judgment.get("restraint", 0),
+            "conciseness": int(len(actual_answer) <= CONCISENESS_MAX_CHARS),
+            "final_pass": core_response * factuality,
+        },
+        "reasons": judgment.get("reasons", {}),
     }
 
 
@@ -180,46 +208,65 @@ def main():
         print(f"  ./gradlew bootRun")
         return
 
-    results = {"correct": 0, "incorrect": 0, "error": 0}
+    error_count = 0
+    kpi_totals = {k: 0 for k in ("core_response", "factuality", "front_loaded", "restraint", "conciseness", "final_pass")}
     tier_results = {}
     chatbot_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
     durations = []
     start_time = time.time()
 
+    def handle_result(r):
+        nonlocal error_count
+        _aggregate(r, kpi_totals, tier_results, chatbot_usage, durations, args.verbose)
+        if r["status"] == "error":
+            error_count += 1
+
     # ─── 실행 (순차 / 병렬 공통 집계) ────────────────────────────────────────
     if args.parallel > 1:
         with ThreadPoolExecutor(max_workers=args.parallel) as executor:
             futures = [executor.submit(process_question, q, i) for i, q in enumerate(questions)]
             for completed, fut in enumerate(as_completed(futures), 1):
-                r = fut.result()
-                _aggregate(r, results, tier_results, chatbot_usage, durations, args.verbose)
+                handle_result(fut.result())
                 if not args.verbose and completed % 10 == 0:
                     print(f"  진행: {completed}/{len(questions)}")
     else:
         for i, q in enumerate(questions):
-            r = process_question(q, i)
-            _aggregate(r, results, tier_results, chatbot_usage, durations, args.verbose)
+            handle_result(process_question(q, i))
             if not args.verbose and (i + 1) % 10 == 0:
                 print(f"  진행: {i+1}/{len(questions)}")
 
     # ─── 결과 출력 ────────────────────────────────────────────────────────────
     elapsed = time.time() - start_time
-    total = results["correct"] + results["incorrect"] + results["error"]
-    evaluated = total - results["error"]
+    total = len(questions)
+    evaluated = total - error_count
 
     print()
-    print(f"=== 평가 결과 ===")
-    print(f"전체: {results['correct']}/{total} ({results['correct']/max(total,1)*100:.1f}%)")
-    print()
+    print(f"=== KPI 결과 ({total}문항) ===")
+    kpi_labels = [
+        ("core_response", "핵심 응답 성공"),
+        ("factuality",    "사실성        "),
+        ("front_loaded",  "두괄식 응답   "),
+        ("restraint",     "정보 절제력   "),
+        ("conciseness",   f"간결성 ≤{CONCISENESS_MAX_CHARS}자  "),
+    ]
+    for key, label in kpi_labels:
+        n = kpi_totals[key]
+        pct = n / max(evaluated, 1) * 100
+        print(f"  {label}: {n:3d}/{evaluated} ({pct:.1f}%)")
+    print(f"  {'─' * 36}")
+    n = kpi_totals["final_pass"]
+    pct = n / max(evaluated, 1) * 100
+    print(f"  최종 통과 (정확성)  : {n:3d}/{evaluated} ({pct:.1f}%)")
 
-    print("난이도별:")
+    print()
+    print("난이도별 (최종 통과):")
     for tier in sorted(tier_results.keys()):
         t = tier_results[tier]
         pct = t["correct"] / max(t["total"], 1) * 100
         print(f"  {tier:8s}: {t['correct']:2d}/{t['total']:2d} ({pct:.0f}%)")
 
-    if results["error"] > 0:
-        print(f"\n  에러: {results['error']}건")
+    if error_count > 0:
+        print(f"\n  에러: {error_count}건")
 
     print(f"\n소요 시간: {elapsed:.1f}초")
     if durations:
@@ -234,10 +281,18 @@ def main():
     with open(result_file, "w") as f:
         json.dump({
             "total": total,
-            "correct": results["correct"],
-            "incorrect": results["incorrect"],
-            "error": results["error"],
-            "accuracy": results["correct"] / max(total, 1),
+            "correct": kpi_totals["final_pass"],
+            "incorrect": evaluated - kpi_totals["final_pass"],
+            "error": error_count,
+            "accuracy": round(kpi_totals["final_pass"] / max(evaluated, 1), 4),
+            "kpi": {
+                key: {
+                    "correct": kpi_totals[key],
+                    "total": evaluated,
+                    "rate": round(kpi_totals[key] / max(evaluated, 1), 4),
+                }
+                for key in ("core_response", "factuality", "front_loaded", "restraint", "conciseness", "final_pass")
+            },
             "tier_results": tier_results,
             "elapsed_seconds": elapsed,
             "avg_response_seconds": (sum(durations) / len(durations)) if durations else 0,
@@ -245,7 +300,8 @@ def main():
         }, f, indent=2, ensure_ascii=False)
     print(f"\n결과 저장: {result_file}")
 
-def _aggregate(r: dict, results: dict, tier_results: dict, chatbot_usage: dict,
+
+def _aggregate(r: dict, kpi_totals: dict, tier_results: dict, chatbot_usage: dict,
                durations: list, verbose: bool):
     """process_question 결과 1건을 집계합니다."""
     tier = r["tier"]
@@ -256,7 +312,6 @@ def _aggregate(r: dict, results: dict, tier_results: dict, chatbot_usage: dict,
     tier_results[tier]["total"] += 1
 
     if r["status"] == "error":
-        results["error"] += 1
         if verbose:
             print(f"[{r['qid']}] ERROR — 서버 응답 없음")
         return
@@ -266,19 +321,20 @@ def _aggregate(r: dict, results: dict, tier_results: dict, chatbot_usage: dict,
     chatbot_usage["completion_tokens"] += token_usage.get("completionTokens", 0)
     chatbot_usage["total_tokens"] += token_usage.get("totalTokens", 0)
 
-    score = r["score"]
-    if score == 1:
-        results["correct"] += 1
+    kpi = r["kpi"]
+    for key in kpi_totals:
+        kpi_totals[key] += kpi[key]
+
+    if kpi["final_pass"] == 1:
         tier_results[tier]["correct"] += 1
-        marker = "✓"
-    else:
-        results["incorrect"] += 1
-        marker = "✗"
 
     if verbose:
+        marker = "✓" if kpi["final_pass"] == 1 else "✗"
         print(f"[{r['qid']}] {marker} ({tier}) {r['question'][:40]}...")
-        if score == 0:
-            print(f"        이유: {r['reason'][:80]}")
+        if kpi["final_pass"] == 0:
+            for k, v in r.get("reasons", {}).items():
+                if kpi.get(k, 1) == 0:
+                    print(f"        [{k}=0] {str(v)[:80]}")
 
 
 if __name__ == "__main__":

From 858504ffb6b3ca254deec3f0ae87d8078f0ffecb Mon Sep 17 00:00:00 2001
From: chxghee <francis001021@gmail.com>
Date: Wed, 20 May 2026 09:03:50 +0900
Subject: [PATCH 14/20] =?UTF-8?q?feat:=20v7=20=EC=B1=84=ED=8C=85=20?=
 =?UTF-8?q?=EB=A1=9C=EA=B7=B8=20=EB=AC=B8=EC=84=9C=20=EC=B6=94=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data/eval_result.json                         | 22 ++---
 .../chatbot/application/ChatbotService.java   |  6 +-
 .../chatbot/infrastructure/ChatlogParser.java | 87 +++++++++++++++++++
 .../infrastructure/DocumentLoader.java        | 25 ++++++
 .../VectorStoreInitializer.java               |  6 +-
 5 files changed, 130 insertions(+), 16 deletions(-)
 create mode 100644 src/main/java/com/cholog/bootcamp/chatbot/infrastructure/ChatlogParser.java

diff --git a/data/eval_result.json b/data/eval_result.json
index d004e6a..c0536cb 100644
--- a/data/eval_result.json
+++ b/data/eval_result.json
@@ -1,28 +1,28 @@
 {
   "total": 150,
-  "correct": 110,
-  "incorrect": 40,
+  "correct": 108,
+  "incorrect": 42,
   "error": 0,
-  "accuracy": 0.7333333333333333,
+  "accuracy": 0.72,
   "tier_results": {
     "easy": {
-      "correct": 25,
+      "correct": 24,
       "total": 30
     },
     "medium": {
-      "correct": 66,
+      "correct": 62,
       "total": 94
     },
     "hard": {
-      "correct": 19,
+      "correct": 22,
       "total": 26
     }
   },
-  "elapsed_seconds": 40.24573802947998,
-  "avg_response_seconds": 2.556360289255778,
+  "elapsed_seconds": 50.64455032348633,
+  "avg_response_seconds": 3.2682551924387613,
   "chatbot_token_usage": {
-    "prompt_tokens": 177297,
-    "completion_tokens": 10421,
-    "total_tokens": 187718
+    "prompt_tokens": 177303,
+    "completion_tokens": 10457,
+    "total_tokens": 187760
   }
 }
\ No newline at end of file
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java b/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
index 72cfb44..85b0dfc 100644
--- a/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
+++ b/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
@@ -49,7 +49,7 @@ private String searchRelevantDocuments(String question) {
                         .build()
         );
 
-        //loggingSearchedDocs(question, docs);
+        loggingSearchedDocs(question, docs);
 
         return docs.stream()
                 .map(Document::getText)
@@ -57,11 +57,11 @@ private String searchRelevantDocuments(String question) {
     }
 
     private static void loggingSearchedDocs(String question, List<Document> docs) {
-        log.debug("=== [RAG] 검색된 문서 ({}개) for: {} ===", docs.size(), question);
+        log.info("=== [RAG] 검색된 문서 ({}개) for: {} ===", docs.size(), question);
         for (int i = 0; i < docs.size(); i++) {
             Document doc = docs.get(i);
             String preview = doc.getText().substring(0, Math.min(120, doc.getText().length())).replace("\n", " ");
-            log.debug("[{}] metadata={} | text={}", i + 1, doc.getMetadata(), preview);
+            log.info("[{}] metadata={} | text={}", i + 1, doc.getMetadata(), preview);
         }
     }
 
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/ChatlogParser.java b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/ChatlogParser.java
new file mode 100644
index 0000000..af3d508
--- /dev/null
+++ b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/ChatlogParser.java
@@ -0,0 +1,87 @@
+package com.cholog.bootcamp.chatbot.infrastructure;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.ai.document.Document;
+import org.springframework.core.io.Resource;
+import org.springframework.stereotype.Component;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+
+@Slf4j
+@Component
+public class ChatlogParser {
+
+    private static final String LAYER_CHATLOG = "chatlog";
+    private static final String ACCURACY_CORRECT = "correct";
+
+    private final ObjectMapper objectMapper = new ObjectMapper();
+
+    public List<Document> parse(Resource resource) throws IOException {
+        List<Document> result = new ArrayList<>();
+
+        try (BufferedReader reader = new BufferedReader(new InputStreamReader(resource.getInputStream()))) {
+            String line;
+            while ((line = reader.readLine()) != null) {
+                if (line.isBlank()) continue;
+                Document doc = parseLine(line, resource.getFilename());
+                if (doc != null) result.add(doc);
+            }
+        }
+
+        log.debug("챗로그 파싱: {} → {}개 청크", resource.getFilename(), result.size());
+        return result;
+    }
+
+    private Document parseLine(String line, String filename) throws IOException {
+        JsonNode node = objectMapper.readTree(line);
+        if (!ACCURACY_CORRECT.equals(node.path("agent_accuracy").asText())) return null;
+
+        String text = buildText(node);
+        if (text == null) return null;
+
+        return toDocument(text, filename);
+    }
+
+    private Document toDocument(String text, String filename) {
+        Document doc = new Document(text);
+        doc.getMetadata().put("layer", LAYER_CHATLOG);
+        doc.getMetadata().put("source", filename);
+        return doc;
+    }
+
+    private String buildText(JsonNode node) {
+        StringBuilder sb = new StringBuilder();
+        appendTags(sb, node);
+        boolean hasAgentTurn = appendTurns(sb, node);
+        return hasAgentTurn ? sb.toString().trim() : null;
+    }
+
+    private void appendTags(StringBuilder sb, JsonNode node) {
+        List<String> tags = new ArrayList<>();
+        node.path("tags").forEach(tag -> tags.add(tag.asText()));
+        if (!tags.isEmpty()) {
+            sb.append("[태그: ").append(String.join(", ", tags)).append("]\n");
+        }
+    }
+
+    private boolean appendTurns(StringBuilder sb, JsonNode node) {
+        boolean hasAgentTurn = false;
+        for (JsonNode turn : node.path("turns")) {
+            String text = turn.path("text").asText();
+
+            String role = turn.path("role").asText();
+            String prefix = "customer".equals(role) ? "고객" : "상담원";
+            sb.append(prefix).append(": ").append(text).append("\n");
+
+            if ("agent".equals(role)) hasAgentTurn = true;
+        }
+        return hasAgentTurn;
+    }
+
+}
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/DocumentLoader.java b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/DocumentLoader.java
index 5434ccc..4613b4b 100644
--- a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/DocumentLoader.java
+++ b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/DocumentLoader.java
@@ -1,5 +1,6 @@
 package com.cholog.bootcamp.chatbot.infrastructure;
 
+import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.ai.document.Document;
 import org.springframework.ai.reader.TextReader;
@@ -13,13 +14,16 @@
 
 @Slf4j
 @Component
+@RequiredArgsConstructor
 public class DocumentLoader {
 
     private static final String FAQ_PATTERN = "file:data/layer1_faq/*.md";
     private static final String POLICY_PATTERN = "file:data/layer2_policies/current/*.md";
+    private static final String CHATLOG_PATTERN = "file:data/layer3_chatlogs/*.jsonl";
     private static final String LAYER_FAQ = "faq";
     private static final String LAYER_POLICY = "policy";
 
+    private final ChatlogParser chatlogParser;
     private final PathMatchingResourcePatternResolver resolver =
             new PathMatchingResourcePatternResolver();
     private final MarkdownHeadingSplitter faqSplitter = new MarkdownHeadingSplitter("###");
@@ -33,6 +37,27 @@ public List<Document> loadPolicies() {
         return load(POLICY_PATTERN, LAYER_POLICY, policySplitter);
     }
 
+    public List<Document> loadChatlogs() {
+        try {
+            Resource[] resources = resolver.getResources(CHATLOG_PATTERN);
+            if (resources.length == 0) {
+                log.warn("챗로그 파일 없음");
+                return List.of();
+            }
+
+            List<Document> result = new ArrayList<>();
+            for (Resource resource : resources) {
+                result.addAll(chatlogParser.parse(resource));
+            }
+
+            log.info("챗로그 로드 완료: 총 {}개 청크", result.size());
+            return result;
+
+        } catch (IOException e) {
+            throw new IllegalStateException("챗로그 로딩 실패", e);
+        }
+    }
+
     private List<Document> load(String pattern, String layer, MarkdownHeadingSplitter splitter) {
         try {
             Resource[] resources = resolver.getResources(pattern);
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/VectorStoreInitializer.java b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/VectorStoreInitializer.java
index c73584d..8087b8d 100644
--- a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/VectorStoreInitializer.java
+++ b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/VectorStoreInitializer.java
@@ -23,10 +23,12 @@ public class VectorStoreInitializer implements ApplicationRunner {
     public void run(ApplicationArguments args) {
         List<Document> faqDocs = documentLoader.loadFaq();
         List<Document> policyDocs = documentLoader.loadPolicies();
+        //List<Document> chatlogDocs = documentLoader.loadChatlogs();
 
         List<Document> all = new ArrayList<>();
         all.addAll(faqDocs);
         all.addAll(policyDocs);
+        //all.addAll(chatlogDocs);
 
         if (all.isEmpty()) {
             log.warn("적재할 문서 없음. data/ 폴더 확인 필요");
@@ -34,7 +36,7 @@ public void run(ApplicationArguments args) {
         }
 
         vectorStore.add(all);
-        log.info("임베딩 완료: 총 {}개 (faq={}, policy={})",
-                all.size(), faqDocs.size(), policyDocs.size());
+        log.info("임베딩 완료: 총 {}개 (faq={}, policy={}, chatlog={})",
+                all.size(), faqDocs.size(), policyDocs.size(), 0);
     }
 }

From 28e0cf0e8010ea1108b72d7cc620687c1e0f1f38 Mon Sep 17 00:00:00 2001
From: chxghee <francis001021@gmail.com>
Date: Wed, 20 May 2026 10:12:53 +0900
Subject: [PATCH 15/20] =?UTF-8?q?refactor:=20=ED=8C=8C=EC=8B=B1=20?=
 =?UTF-8?q?=EB=A1=9C=EC=A7=81=20=ED=8C=A8=ED=82=A4=EC=A7=80=20=EC=9D=B4?=
 =?UTF-8?q?=EB=8F=99?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../bootcamp/chatbot/infrastructure/DocumentLoader.java     | 2 ++
 .../chatbot/infrastructure/VectorStoreInitializer.java      | 6 +++---
 .../chatbot/{infrastructure => util}/ChatlogParser.java     | 4 ++--
 .../{infrastructure => util}/MarkdownHeadingSplitter.java   | 2 +-
 4 files changed, 8 insertions(+), 6 deletions(-)
 rename src/main/java/com/cholog/bootcamp/chatbot/{infrastructure => util}/ChatlogParser.java (98%)
 rename src/main/java/com/cholog/bootcamp/chatbot/{infrastructure => util}/MarkdownHeadingSplitter.java (94%)

diff --git a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/DocumentLoader.java b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/DocumentLoader.java
index 4613b4b..5232dc4 100644
--- a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/DocumentLoader.java
+++ b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/DocumentLoader.java
@@ -1,5 +1,7 @@
 package com.cholog.bootcamp.chatbot.infrastructure;
 
+import com.cholog.bootcamp.chatbot.util.ChatlogParser;
+import com.cholog.bootcamp.chatbot.util.MarkdownHeadingSplitter;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.ai.document.Document;
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/VectorStoreInitializer.java b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/VectorStoreInitializer.java
index 8087b8d..186c07b 100644
--- a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/VectorStoreInitializer.java
+++ b/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/VectorStoreInitializer.java
@@ -23,12 +23,12 @@ public class VectorStoreInitializer implements ApplicationRunner {
     public void run(ApplicationArguments args) {
         List<Document> faqDocs = documentLoader.loadFaq();
         List<Document> policyDocs = documentLoader.loadPolicies();
-        //List<Document> chatlogDocs = documentLoader.loadChatlogs();
+        List<Document> chatlogDocs = documentLoader.loadChatlogs();
 
         List<Document> all = new ArrayList<>();
         all.addAll(faqDocs);
         all.addAll(policyDocs);
-        //all.addAll(chatlogDocs);
+        all.addAll(chatlogDocs);
 
         if (all.isEmpty()) {
             log.warn("적재할 문서 없음. data/ 폴더 확인 필요");
@@ -37,6 +37,6 @@ public void run(ApplicationArguments args) {
 
         vectorStore.add(all);
         log.info("임베딩 완료: 총 {}개 (faq={}, policy={}, chatlog={})",
-                all.size(), faqDocs.size(), policyDocs.size(), 0);
+                all.size(), faqDocs.size(), policyDocs.size(), chatlogDocs.size());
     }
 }
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/ChatlogParser.java b/src/main/java/com/cholog/bootcamp/chatbot/util/ChatlogParser.java
similarity index 98%
rename from src/main/java/com/cholog/bootcamp/chatbot/infrastructure/ChatlogParser.java
rename to src/main/java/com/cholog/bootcamp/chatbot/util/ChatlogParser.java
index af3d508..9e5bd5c 100644
--- a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/ChatlogParser.java
+++ b/src/main/java/com/cholog/bootcamp/chatbot/util/ChatlogParser.java
@@ -1,4 +1,4 @@
-package com.cholog.bootcamp.chatbot.infrastructure;
+package com.cholog.bootcamp.chatbot.util;
 
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
@@ -84,4 +84,4 @@ private boolean appendTurns(StringBuilder sb, JsonNode node) {
         return hasAgentTurn;
     }
 
-}
+}
\ No newline at end of file
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/MarkdownHeadingSplitter.java b/src/main/java/com/cholog/bootcamp/chatbot/util/MarkdownHeadingSplitter.java
similarity index 94%
rename from src/main/java/com/cholog/bootcamp/chatbot/infrastructure/MarkdownHeadingSplitter.java
rename to src/main/java/com/cholog/bootcamp/chatbot/util/MarkdownHeadingSplitter.java
index c50cf26..9f1dfe3 100644
--- a/src/main/java/com/cholog/bootcamp/chatbot/infrastructure/MarkdownHeadingSplitter.java
+++ b/src/main/java/com/cholog/bootcamp/chatbot/util/MarkdownHeadingSplitter.java
@@ -1,4 +1,4 @@
-package com.cholog.bootcamp.chatbot.infrastructure;
+package com.cholog.bootcamp.chatbot.util;
 
 import org.springframework.ai.document.Document;
 

From b0f63281183b666f66f6e882606c63d8b1c53509 Mon Sep 17 00:00:00 2001
From: chxghee <francis001021@gmail.com>
Date: Wed, 20 May 2026 11:21:22 +0900
Subject: [PATCH 16/20] =?UTF-8?q?feat:=20v8=20=EB=A0=88=EC=9D=B4=EC=96=B4?=
 =?UTF-8?q?=EB=B3=84=20top-k=20=EB=8F=84=EC=9E=85=20=EB=B0=8F=20=ED=94=84?=
 =?UTF-8?q?=EB=A1=AC=ED=94=84=ED=8A=B8=20=EC=88=98=EC=A0=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data/eval_result.json                         | 54 +++++++++++++++----
 .../chatbot/application/ChatbotService.java   | 20 ++++---
 .../bootcamp/chatbot/util/ChatlogParser.java  |  1 +
 .../com/cholog/bootcamp/config/AiConfig.java  | 16 ++++--
 4 files changed, 70 insertions(+), 21 deletions(-)

diff --git a/data/eval_result.json b/data/eval_result.json
index c0536cb..b40b60b 100644
--- a/data/eval_result.json
+++ b/data/eval_result.json
@@ -1,28 +1,60 @@
 {
   "total": 150,
-  "correct": 108,
-  "incorrect": 42,
+  "correct": 122,
+  "incorrect": 28,
   "error": 0,
-  "accuracy": 0.72,
+  "accuracy": 0.8133,
+  "kpi": {
+    "core_response": {
+      "correct": 122,
+      "total": 150,
+      "rate": 0.8133
+    },
+    "factuality": {
+      "correct": 142,
+      "total": 150,
+      "rate": 0.9467
+    },
+    "front_loaded": {
+      "correct": 126,
+      "total": 150,
+      "rate": 0.84
+    },
+    "restraint": {
+      "correct": 141,
+      "total": 150,
+      "rate": 0.94
+    },
+    "conciseness": {
+      "correct": 150,
+      "total": 150,
+      "rate": 1.0
+    },
+    "final_pass": {
+      "correct": 122,
+      "total": 150,
+      "rate": 0.8133
+    }
+  },
   "tier_results": {
     "easy": {
-      "correct": 24,
+      "correct": 28,
       "total": 30
     },
     "medium": {
-      "correct": 62,
+      "correct": 74,
       "total": 94
     },
     "hard": {
-      "correct": 22,
+      "correct": 20,
       "total": 26
     }
   },
-  "elapsed_seconds": 50.64455032348633,
-  "avg_response_seconds": 3.2682551924387613,
+  "elapsed_seconds": 82.33759212493896,
+  "avg_response_seconds": 5.253884649276733,
   "chatbot_token_usage": {
-    "prompt_tokens": 177303,
-    "completion_tokens": 10457,
-    "total_tokens": 187760
+    "prompt_tokens": 225234,
+    "completion_tokens": 6430,
+    "total_tokens": 231664
   }
 }
\ No newline at end of file
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java b/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
index 85b0dfc..db7b907 100644
--- a/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
+++ b/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
@@ -42,12 +42,10 @@ public ChatbotResult chat(String question) {
     }
 
     private String searchRelevantDocuments(String question) {
-        List<Document> docs = vectorStore.similaritySearch(
-                SearchRequest.builder()
-                        .query(question)
-                        .topK(8)
-                        .build()
-        );
+        List<Document> docs = new java.util.ArrayList<>();
+        docs.addAll(searchByLayer(question, "faq", 3));
+        docs.addAll(searchByLayer(question, "policy", 3));
+        docs.addAll(searchByLayer(question, "chatlog", 2));
 
         loggingSearchedDocs(question, docs);
 
@@ -56,6 +54,16 @@ private String searchRelevantDocuments(String question) {
                 .collect(Collectors.joining("\n\n"));
     }
 
+    private List<Document> searchByLayer(String question, String layer, int topK) {
+        return vectorStore.similaritySearch(
+                SearchRequest.builder()
+                        .query(question)
+                        .topK(topK)
+                        .filterExpression("layer == '" + layer + "'")
+                        .build()
+        );
+    }
+
     private static void loggingSearchedDocs(String question, List<Document> docs) {
         log.info("=== [RAG] 검색된 문서 ({}개) for: {} ===", docs.size(), question);
         for (int i = 0; i < docs.size(); i++) {
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/util/ChatlogParser.java b/src/main/java/com/cholog/bootcamp/chatbot/util/ChatlogParser.java
index 9e5bd5c..73939b2 100644
--- a/src/main/java/com/cholog/bootcamp/chatbot/util/ChatlogParser.java
+++ b/src/main/java/com/cholog/bootcamp/chatbot/util/ChatlogParser.java
@@ -63,6 +63,7 @@ private String buildText(JsonNode node) {
     }
 
     private void appendTags(StringBuilder sb, JsonNode node) {
+        sb.append("[예시 대화]\n");
         List<String> tags = new ArrayList<>();
         node.path("tags").forEach(tag -> tags.add(tag.asText()));
         if (!tags.isEmpty()) {
diff --git a/src/main/java/com/cholog/bootcamp/config/AiConfig.java b/src/main/java/com/cholog/bootcamp/config/AiConfig.java
index ddb272f..29b9efb 100644
--- a/src/main/java/com/cholog/bootcamp/config/AiConfig.java
+++ b/src/main/java/com/cholog/bootcamp/config/AiConfig.java
@@ -13,9 +13,9 @@ public class AiConfig {
     private static final String SYSTEM_PROMPT = """
             당신은 초록 코퍼레이션의 고객센터 상담원입니다. [참고 문서]만을 근거로 답변하세요.
 
-            [답변 형식] 두괄식 2단계로 작성하세요.
-            - 1단계(필수): 핵심 정답을 한두 문장으로 직접 답변
-            - 2단계(선택): 밀접하게 연관된 예외/조건/팁이 있으면 1개만 추가, 없으면 생략
+            [답변 형식] 두괄식으로 작성하세요. 레이블("1단계", "2단계" 등)은 절대 출력하지 마세요.
+            - 핵심 정답을 한두 문장으로 먼저 직접 답변하세요.
+            - 밀접하게 연관된 부가 정보(예외/조건/팁)가 있으면 최대 1가지만 추가하고, 없으면 생략하세요.
 
             [답변 전 내부 처리]
             1. 비격식 표현을 표준어로 해석 후 핵심 주제를 파악하세요.
@@ -29,11 +29,19 @@ public class AiConfig {
               예) Q: "[과거 정보] 아닌가요?" → A: "현재 기준으로는 [참고 문서의 최신 기준]입니다."
             - 과거 사례/다른 고객 경험 질문 시: 문서에 명시된 보상·처리 기준을 안내하세요.
 
+            [개인 계정 정보] 포인트 잔액, 주문 내역, 배송 조회 등 특정 고객의 계정에 종속된 정보는 이 챗봇이 시스템상 접근할 수 없습니다.
+            이 경우 "고객님의 [정보]는 마이페이지에서 직접 확인하실 수 있습니다."라고 안내하세요.
+
+            [예시 대화 처리] [참고 문서] 중 [예시 대화]로 표시된 항목은 과거 상담 사례입니다.
+            예시 대화 속 고객의 주문 상태·날짜 등 개별 상황은 현재 고객과 무관합니다.
+            상담원의 답변 방식과 정책 안내 패턴만 참고하고, 예시 고객의 상황을 현재 고객에게 적용하지 마세요.
+
             [거절 기준] [참고 문서]에 관련 내용이 전혀 없는 경우에만 아래 문구를 사용하세요.
             관련 내용이 있다면 반드시 그것을 근거로 답변하세요.
             거절 문구: "죄송합니다. 요청하신 정보는 정확한 안내가 어렵습니다. 고객센터로 문의해 주세요."
 
-            [어조] 고객이 반말·구어체를 사용해도 상담원은 항상 친절하고 정중한 표준어(~입니다, ~합니다)를 유지하세요.
+            [어조] 고객이 반말·구어체를 사용해도 항상 친절하고 정중한 표준어(~입니다, ~합니다)를 유지하세요.
+            불편을 겪은 고객에게는 공감 표현("불편을 드려 죄송합니다")을 자연스럽게 한 문장 추가하세요.
             """;
 
     @Bean

From 37e854872e92e76ab0b0cec7ff829e5d93fdc068 Mon Sep 17 00:00:00 2001
From: chxghee <francis001021@gmail.com>
Date: Sat, 23 May 2026 16:10:22 +0900
Subject: [PATCH 17/20] =?UTF-8?q?test:=20KPI=20=EC=88=98=EC=A0=95=EC=97=90?=
 =?UTF-8?q?=20=EB=94=B0=EB=A5=B8=20=ED=85=8C=EC=8A=A4=ED=8A=B8=20=ED=94=84?=
 =?UTF-8?q?=EB=A1=AC=ED=94=84=20=EC=88=98=EC=A0=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data/eval_result.json                         |  66 +++---
 data/evaluate.py                              | 189 ++++++++++++------
 data/test_questions.json                      |   2 +-
 .../com/cholog/bootcamp/config/AiConfig.java  |   2 +-
 4 files changed, 167 insertions(+), 92 deletions(-)

diff --git a/data/eval_result.json b/data/eval_result.json
index b40b60b..fe4ee6b 100644
--- a/data/eval_result.json
+++ b/data/eval_result.json
@@ -1,60 +1,68 @@
 {
   "total": 150,
-  "correct": 122,
-  "incorrect": 28,
+  "correct": 135,
+  "incorrect": 15,
   "error": 0,
-  "accuracy": 0.8133,
+  "accuracy": 0.9,
   "kpi": {
+    "refusal_quality": {
+      "score_0": 4,
+      "score_1": 0,
+      "score_2": 146,
+      "total": 150
+    },
     "core_response": {
-      "correct": 122,
-      "total": 150,
-      "rate": 0.8133
+      "score_0": 10,
+      "score_1": 4,
+      "score_2": 136,
+      "total": 150
     },
     "factuality": {
-      "correct": 142,
-      "total": 150,
-      "rate": 0.9467
+      "score_0": 6,
+      "score_1": 8,
+      "score_2": 136,
+      "total": 150
     },
     "front_loaded": {
-      "correct": 126,
-      "total": 150,
-      "rate": 0.84
+      "score_0": 13,
+      "score_1": 137,
+      "total": 150
     },
     "restraint": {
-      "correct": 141,
-      "total": 150,
-      "rate": 0.94
+      "score_0": 6,
+      "score_1": 144,
+      "total": 150
     },
     "conciseness": {
-      "correct": 150,
-      "total": 150,
-      "rate": 1.0
+      "score_0": 0,
+      "score_1": 150,
+      "total": 150
     },
     "final_pass": {
-      "correct": 122,
-      "total": 150,
-      "rate": 0.8133
+      "score_0": 15,
+      "score_1": 135,
+      "total": 150
     }
   },
   "tier_results": {
     "easy": {
-      "correct": 28,
+      "correct": 29,
       "total": 30
     },
     "medium": {
-      "correct": 74,
+      "correct": 83,
       "total": 94
     },
     "hard": {
-      "correct": 20,
+      "correct": 23,
       "total": 26
     }
   },
-  "elapsed_seconds": 82.33759212493896,
-  "avg_response_seconds": 5.253884649276733,
+  "elapsed_seconds": 77.15323114395142,
+  "avg_response_seconds": 5.031613259315491,
   "chatbot_token_usage": {
-    "prompt_tokens": 225234,
-    "completion_tokens": 6430,
-    "total_tokens": 231664
+    "prompt_tokens": 225534,
+    "completion_tokens": 7281,
+    "total_tokens": 232815
   }
 }
\ No newline at end of file
diff --git a/data/evaluate.py b/data/evaluate.py
index ea11014..eb072f5 100644
--- a/data/evaluate.py
+++ b/data/evaluate.py
@@ -5,12 +5,15 @@
 LLM 판정으로 KPI 지표를 측정합니다.
 
 KPI 지표:
-  core_response  핵심 응답 성공  — 질문이 요구하는 정보에 직접 답변했는가
-  factuality     사실성          — 기대 답변과 모순되는 내용이 없는가
-  front_loaded   두괄식 응답     — 첫 문장에 직접 답변이 있는가
-  restraint      정보 절제력     — 부가 정보(논리 단위)가 1개 이하인가
-  conciseness    간결성          — 응답이 200자 이하인가 (Python 계산)
-  final_pass     최종 통과       — core_response × factuality
+  refusal_quality 거절 품질       — 거절이 필요한 상황에서 적절히 거절했는가 (0/1/2)
+  core_response   핵심 응답 성공   — 질문이 요구하는 정보에 직접 답변했는가 (0/1/2)
+  factuality      사실성           — 기대 답변과 모순되는 내용이 없는가 (0/1/2)
+  front_loaded    두괄식 응답      — 첫 문장에 직접 답변이 있는가 (0/1)
+  restraint       정보 절제력      — 부가 정보(논리 단위)가 1개 이하인가 (0/1)
+  conciseness     간결성           — 응답이 200자 이하인가 (Python 계산, 0/1)
+  final_pass      최종 통과        — refusal_quality=1 → 통과
+                                     refusal_quality=0 → 탈락
+                                     refusal_quality=2 → core_response + factuality == 4
 
 사전 준비:
   python -m venv .venv
@@ -80,38 +83,77 @@ def ask_server(question: str) -> dict | None:
 # ─── LLM 판정 ─────────────────────────────────────────────────────────────────
 
 def judge_answer(question: str, expected: str, actual: str) -> dict:
-    """LLM으로 4개 KPI 지표를 단일 호출로 판정합니다."""
+    """LLM으로 KPI 지표를 단일 호출로 판정합니다."""
     prompt = f"""당신은 챗봇 답변 품질을 평가하는 판정자입니다.
 
 [질문]: {question}
 [기대 답변]: {expected}
 [실제 답변]: {actual}
 
-아래 4개 지표를 각각 판정하세요.
-
-1. core_response — [질문]이 요구하는 정보에 실제 답변이 직접 응답했는가?
-  [질문]에서 사용자가 묻는 것(수치·기간·조건·방법 등)을 먼저 파악하세요.
-  [기대 답변]은 정답 팩트 확인 기준으로만 사용하세요.
-  1: 질문이 요구하는 정보에 직접 답변하며 [기대 답변]의 사실과 일치
-  0: 질문에 대한 답변 누락·오류, 또는 기대 답변이 있는데 거절한 경우
-
-2. factuality — 실제 답변 전체에 [기대 답변]과 모순되는 사실이 없는가?
-  1: [질문]에 대한 답변과 부가 정보 모두 [기대 답변]과 모순 없음
-  1: 거절 응답 (허위 정보 없음)
-  0: [기대 답변]의 사실과 충돌하는 내용 포함
-
-3. front_loaded — 첫 문장에 [질문]에 대한 직접 답변이 있는가?
+아래 5개 지표를 각각 독립적으로 판정하세요.
+
+1. refusal_quality — 거절 여부와 적절성
+  2: 거절 없음 (정상 답변)
+  1: [기대 답변]이 비어 있거나 내용이 정보 없음 이고 [실제 답변]도 적절히 거절
+  0: [기대 답변]에 답변 내용이 있는데 [실제 답변]이 거절
+
+2. core_response — 질문이 요구하는 정보에 직접 답했는가?
+  2: 질문의 핵심 정보에 완전히 답변하며 [기대 답변] 사실과 일치
+     - 질문이 수치를 묻는 경우 정확한 수치를 답변
+       (예: "반품 기간이 며칠이에요?" → "14일입니다"라고 답변)
+     - 질문이 조건을 묻는 경우 조건과 적용 대상을 정확히 답변
+       (예: "VIP도 배송비 내야 해요?" → "VIP는 무료입니다"라고 답변)
+     - 질문이 방법을 묻는 경우 구체적인 절차를 답변
+       (예: "반품 어떻게 해요?" → 반품 신청 경로와 절차를 답변)
+  1: 질문에 답변했으나 핵심 정보 일부 누락
+     - 수치는 맞으나 적용 조건을 빠뜨림
+       (예: "언제 환불돼요?" → "3~5일 걸립니다"라고만 답변, 결제수단별 차이 미언급)
+     - 방법은 맞으나 핵심 단계 일부 누락
+       (예: 반품 절차 안내 시 사진 첨부 단계 누락)
+  0: 아래 중 하나에 해당
+     - 질문이 요구하는 정보를 전혀 제공하지 않음
+     - 질문과 무관한 내용만 답변
+     - "고객센터에 문의하세요"처럼 답변을 회피
+
+
+3. factuality — 실제 답변이 [기대 답변]과 모순되는 내용이 없는가?
+  수치뿐 아니라 조건·논리·인과관계도 포함하여 검토하세요.
+
+  2: 답변 내 모든 내용이 기대 답변과 일치
+     - 수치·기간이 정확히 같음
+       (예: 반품 기간 14일 → "14일"이라고 답변)
+     - 조건과 적용 대상이 정확히 같음
+       (예: "VIP만 무료" → "VIP만 무료"라고 답변)
+     - 인과관계가 정확히 같음 — 원인과 결과를 모두 포함
+       (예: "반품하면 포인트 차감" → "반품 시 포인트가 차감됩니다"라고 답변)
+       ※ 결과만 언급하고 원인 조건을 빠뜨리면 1점
+  1: 핵심 사실은 맞으나 아래 중 하나에 해당
+     - 기대 답변에 있는 부가 조건을 언급하지 않음
+       (예: "14일 이내 반품 가능" → "반품 가능"하다고만 답변, 기간 미언급)
+     - 인과관계에서 원인 조건을 빠뜨리고 결과만 언급
+       (예: "반품하면 포인트 차감" → "포인트가 차감됩니다"라고만 답변)
+  0: 아래 중 하나에 해당
+     - 수치·기간이 기대 답변과 다름
+       (예: 반품 기간 14일 → "7일"이라고 답변)
+     - 조건·적용 대상이 반전됨
+       (예: "VIP만 무료" → "모든 회원 무료"라고 답변)
+     - 인과관계가 반전됨
+       (예: "반품하면 등급 하락" → "등급에 영향 없다"고 답변)
+     - 기대 답변에 없는 구체적 수치·정책을 만들어서 답변
+
+
+4. front_loaded — 첫 문장에 [질문]에 대한 직접 답변이 있는가?
   1: 첫 문장에 질문이 요구하는 정보를 직접 전달
   1: 거절 응답 (거절 의사가 첫 문장에 명확히 표현)
   0: 서론·공감·확인 문구("안녕하세요", "좋은 질문이에요" 등)로 시작
 
-4. restraint — [질문]에 대한 직접 답변 외 부가 정보(논리 단위)가 1개 이하인가?
+5. restraint — [질문]에 대한 직접 답변 외 부가 정보가 1개 이하인가?
   [질문]이 여러 항목을 묻는 경우, 각 항목의 답변은 직접 답변으로 간주 (부가 집계 제외)
   1: 부가 정보 1개 이하
   0: 부가 정보 2개 이상
 
 JSON으로만 응답:
-{{"core_response":1,"factuality":1,"front_loaded":1,"restraint":1,"reasons":{{"core_response":"...","factuality":"...","front_loaded":"...","restraint":"..."}}}}"""
+{{"refusal_quality":2,"core_response":2,"factuality":2,"front_loaded":1,"restraint":1,"reasons":{{"refusal_quality":"...","core_response":"...","factuality":"...","front_loaded":"...","restraint":"..."}}}}"""
 
     resp = openai_client.chat.completions.create(
         model=JUDGE_MODEL,
@@ -125,8 +167,10 @@ def judge_answer(question: str, expected: str, actual: str) -> dict:
         result = json.loads(resp.choices[0].message.content)
     except json.JSONDecodeError:
         result = {
-            "core_response": 0, "factuality": 0, "front_loaded": 0, "restraint": 0,
-            "reasons": {k: "판정 파싱 실패" for k in ("core_response", "factuality", "front_loaded", "restraint")},
+            "refusal_quality": 0, "core_response": 0, "factuality": 0,
+            "front_loaded": 0, "restraint": 0,
+            "reasons": {k: "판정 파싱 실패" for k in
+                        ("refusal_quality", "core_response", "factuality", "front_loaded", "restraint")},
         }
 
     result["judge_usage"] = {
@@ -156,8 +200,16 @@ def process_question(q: dict, idx: int) -> dict:
     token_usage = response.get("tokenUsage", {})
     judgment = judge_answer(question_ko, expected, actual_answer)
 
-    core_response = judgment.get("core_response", 0)
-    factuality = judgment.get("factuality", 0)
+    refusal_quality = judgment.get("refusal_quality", 0)
+    core_response   = judgment.get("core_response", 0)
+    factuality      = judgment.get("factuality", 0)
+
+    if refusal_quality == 1:
+        final_pass = 1
+    elif refusal_quality == 0:
+        final_pass = 0
+    else:  # refusal_quality == 2 (정상 답변)
+        final_pass = int(core_response + factuality == 4)
 
     return {
         "qid": qid,
@@ -167,12 +219,13 @@ def process_question(q: dict, idx: int) -> dict:
         "token_usage": token_usage,
         "duration": time.time() - start,
         "kpi": {
-            "core_response": core_response,
-            "factuality": factuality,
-            "front_loaded": judgment.get("front_loaded", 0),
-            "restraint": judgment.get("restraint", 0),
-            "conciseness": int(len(actual_answer) <= CONCISENESS_MAX_CHARS),
-            "final_pass": core_response * factuality,
+            "refusal_quality": refusal_quality,
+            "core_response":   core_response,
+            "factuality":      factuality,
+            "front_loaded":    judgment.get("front_loaded", 0),
+            "restraint":       judgment.get("restraint", 0),
+            "conciseness":     int(len(actual_answer) <= CONCISENESS_MAX_CHARS),
+            "final_pass":      final_pass,
         },
         "reasons": judgment.get("reasons", {}),
     }
@@ -209,7 +262,15 @@ def main():
         return
 
     error_count = 0
-    kpi_totals = {k: 0 for k in ("core_response", "factuality", "front_loaded", "restraint", "conciseness", "final_pass")}
+    kpi_totals = {
+        "refusal_quality": {"score_0": 0, "score_1": 0, "score_2": 0},
+        "core_response":   {"score_0": 0, "score_1": 0, "score_2": 0},
+        "factuality":      {"score_0": 0, "score_1": 0, "score_2": 0},
+        "front_loaded":    {"score_0": 0, "score_1": 0},
+        "restraint":       {"score_0": 0, "score_1": 0},
+        "conciseness":     {"score_0": 0, "score_1": 0},
+        "final_pass":      {"score_0": 0, "score_1": 0},
+    }
     tier_results = {}
     chatbot_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
     durations = []
@@ -240,30 +301,39 @@ def handle_result(r):
     total = len(questions)
     evaluated = total - error_count
 
+    def pct(n): return n / max(evaluated, 1) * 100
+
     print()
     print(f"=== KPI 결과 ({total}문항) ===")
-    kpi_labels = [
-        ("core_response", "핵심 응답 성공"),
-        ("factuality",    "사실성        "),
-        ("front_loaded",  "두괄식 응답   "),
-        ("restraint",     "정보 절제력   "),
-        ("conciseness",   f"간결성 ≤{CONCISENESS_MAX_CHARS}자  "),
-    ]
-    for key, label in kpi_labels:
-        n = kpi_totals[key]
-        pct = n / max(evaluated, 1) * 100
-        print(f"  {label}: {n:3d}/{evaluated} ({pct:.1f}%)")
+
+    # 0/1/2 지표: 분포 출력
+    for key, label in [
+        ("refusal_quality", "거절 품질      "),
+        ("core_response",   "핵심 응답 성공  "),
+        ("factuality",      "사실성          "),
+    ]:
+        t = kpi_totals[key]
+        print(f"  {label}: 완전 {t['score_2']}({pct(t['score_2']):.0f}%) | 부분 {t['score_1']}({pct(t['score_1']):.0f}%) | 실패 {t['score_0']}({pct(t['score_0']):.0f}%)")
+
+    # 0/1 지표: 통과/실패 출력
+    for key, label in [
+        ("front_loaded", "두괄식 응답     "),
+        ("restraint",    "정보 절제력     "),
+        ("conciseness",  f"간결성 ≤{CONCISENESS_MAX_CHARS}자   "),
+    ]:
+        t = kpi_totals[key]
+        print(f"  {label}: 통과 {t['score_1']}({pct(t['score_1']):.1f}%) | 실패 {t['score_0']}({pct(t['score_0']):.1f}%)")
+
     print(f"  {'─' * 36}")
-    n = kpi_totals["final_pass"]
-    pct = n / max(evaluated, 1) * 100
-    print(f"  최종 통과 (정확성)  : {n:3d}/{evaluated} ({pct:.1f}%)")
+    fp = kpi_totals["final_pass"]
+    print(f"  최종 통과 (정확성)  : {fp['score_1']:3d}/{evaluated} ({pct(fp['score_1']):.1f}%)")
 
     print()
     print("난이도별 (최종 통과):")
     for tier in sorted(tier_results.keys()):
         t = tier_results[tier]
-        pct = t["correct"] / max(t["total"], 1) * 100
-        print(f"  {tier:8s}: {t['correct']:2d}/{t['total']:2d} ({pct:.0f}%)")
+        p = t["correct"] / max(t["total"], 1) * 100
+        print(f"  {tier:8s}: {t['correct']:2d}/{t['total']:2d} ({p:.0f}%)")
 
     if error_count > 0:
         print(f"\n  에러: {error_count}건")
@@ -281,17 +351,13 @@ def handle_result(r):
     with open(result_file, "w") as f:
         json.dump({
             "total": total,
-            "correct": kpi_totals["final_pass"],
-            "incorrect": evaluated - kpi_totals["final_pass"],
+            "correct": kpi_totals["final_pass"]["score_1"],
+            "incorrect": evaluated - kpi_totals["final_pass"]["score_1"],
             "error": error_count,
-            "accuracy": round(kpi_totals["final_pass"] / max(evaluated, 1), 4),
+            "accuracy": round(kpi_totals["final_pass"]["score_1"] / max(evaluated, 1), 4),
             "kpi": {
-                key: {
-                    "correct": kpi_totals[key],
-                    "total": evaluated,
-                    "rate": round(kpi_totals[key] / max(evaluated, 1), 4),
-                }
-                for key in ("core_response", "factuality", "front_loaded", "restraint", "conciseness", "final_pass")
+                key: {**kpi_totals[key], "total": evaluated}
+                for key in kpi_totals
             },
             "tier_results": tier_results,
             "elapsed_seconds": elapsed,
@@ -323,7 +389,8 @@ def _aggregate(r: dict, kpi_totals: dict, tier_results: dict, chatbot_usage: dic
 
     kpi = r["kpi"]
     for key in kpi_totals:
-        kpi_totals[key] += kpi[key]
+        score = kpi[key]
+        kpi_totals[key][f"score_{score}"] += 1
 
     if kpi["final_pass"] == 1:
         tier_results[tier]["correct"] += 1
@@ -333,7 +400,7 @@ def _aggregate(r: dict, kpi_totals: dict, tier_results: dict, chatbot_usage: dic
         print(f"[{r['qid']}] {marker} ({tier}) {r['question'][:40]}...")
         if kpi["final_pass"] == 0:
             for k, v in r.get("reasons", {}).items():
-                if kpi.get(k, 1) == 0:
+                if kpi.get(k, 2) == 0:
                     print(f"        [{k}=0] {str(v)[:80]}")
 
 
diff --git a/data/test_questions.json b/data/test_questions.json
index 46491e3..c247957 100644
--- a/data/test_questions.json
+++ b/data/test_questions.json
@@ -1864,4 +1864,4 @@
     "primary_intent": "wrong_item_received",
     "wall_type": "cross_language"
   }
-]
\ No newline at end of file
+]
diff --git a/src/main/java/com/cholog/bootcamp/config/AiConfig.java b/src/main/java/com/cholog/bootcamp/config/AiConfig.java
index 29b9efb..0101472 100644
--- a/src/main/java/com/cholog/bootcamp/config/AiConfig.java
+++ b/src/main/java/com/cholog/bootcamp/config/AiConfig.java
@@ -14,7 +14,7 @@ public class AiConfig {
             당신은 초록 코퍼레이션의 고객센터 상담원입니다. [참고 문서]만을 근거로 답변하세요.
 
             [답변 형식] 두괄식으로 작성하세요. 레이블("1단계", "2단계" 등)은 절대 출력하지 마세요.
-            - 핵심 정답을 한두 문장으로 먼저 직접 답변하세요.
+            - 핵심 정답을 한두 문장으로 먼저 구체적으로 답변하세요.
             - 밀접하게 연관된 부가 정보(예외/조건/팁)가 있으면 최대 1가지만 추가하고, 없으면 생략하세요.
 
             [답변 전 내부 처리]

From ad9803d0d11d200152b88b67b98e151793153b28 Mon Sep 17 00:00:00 2001
From: chxghee <francis001021@gmail.com>
Date: Sat, 23 May 2026 16:35:44 +0900
Subject: [PATCH 18/20] =?UTF-8?q?feat:=20v9=20top-k=20=EC=88=98=EC=A0=95?=
 =?UTF-8?q?=20=EB=B0=8F=20=EA=B3=A0=EA=B0=9D=20=EA=B0=9C=EC=9D=B8=20?=
 =?UTF-8?q?=EC=A0=95=EB=B3=B4=20=EC=A1=B0=ED=9A=8C=20=EA=B4=80=EB=A0=A8=20?=
 =?UTF-8?q?=ED=94=84=EB=A1=AC=ED=94=84=ED=8A=B8=20=EC=B6=94=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data/eval_result.json                         | 48 +++++++++----------
 .../chatbot/application/ChatbotService.java   |  9 ++--
 .../com/cholog/bootcamp/config/AiConfig.java  |  2 +
 3 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/data/eval_result.json b/data/eval_result.json
index fe4ee6b..198ccaa 100644
--- a/data/eval_result.json
+++ b/data/eval_result.json
@@ -1,36 +1,36 @@
 {
   "total": 150,
-  "correct": 135,
-  "incorrect": 15,
+  "correct": 140,
+  "incorrect": 10,
   "error": 0,
-  "accuracy": 0.9,
+  "accuracy": 0.9333,
   "kpi": {
     "refusal_quality": {
-      "score_0": 4,
+      "score_0": 3,
       "score_1": 0,
-      "score_2": 146,
+      "score_2": 147,
       "total": 150
     },
     "core_response": {
-      "score_0": 10,
-      "score_1": 4,
-      "score_2": 136,
+      "score_0": 7,
+      "score_1": 0,
+      "score_2": 143,
       "total": 150
     },
     "factuality": {
-      "score_0": 6,
-      "score_1": 8,
-      "score_2": 136,
+      "score_0": 4,
+      "score_1": 5,
+      "score_2": 141,
       "total": 150
     },
     "front_loaded": {
-      "score_0": 13,
-      "score_1": 137,
+      "score_0": 7,
+      "score_1": 143,
       "total": 150
     },
     "restraint": {
-      "score_0": 6,
-      "score_1": 144,
+      "score_0": 5,
+      "score_1": 145,
       "total": 150
     },
     "conciseness": {
@@ -39,18 +39,18 @@
       "total": 150
     },
     "final_pass": {
-      "score_0": 15,
-      "score_1": 135,
+      "score_0": 10,
+      "score_1": 140,
       "total": 150
     }
   },
   "tier_results": {
     "easy": {
-      "correct": 29,
+      "correct": 28,
       "total": 30
     },
     "medium": {
-      "correct": 83,
+      "correct": 89,
       "total": 94
     },
     "hard": {
@@ -58,11 +58,11 @@
       "total": 26
     }
   },
-  "elapsed_seconds": 77.15323114395142,
-  "avg_response_seconds": 5.031613259315491,
+  "elapsed_seconds": 70.95889401435852,
+  "avg_response_seconds": 4.597711205482483,
   "chatbot_token_usage": {
-    "prompt_tokens": 225534,
-    "completion_tokens": 7281,
-    "total_tokens": 232815
+    "prompt_tokens": 278086,
+    "completion_tokens": 6881,
+    "total_tokens": 284967
   }
 }
\ No newline at end of file
diff --git a/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java b/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
index db7b907..cbc1833 100644
--- a/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
+++ b/src/main/java/com/cholog/bootcamp/chatbot/application/ChatbotService.java
@@ -19,6 +19,9 @@
 @RequiredArgsConstructor
 public class ChatbotService {
 
+    private static final int FAQ_TOP_K = 4;
+    private static final int POLICY_TOP_K = 4;
+    private static final int CHATLOG_TOP_K = 3;
     private static final String PROMPT = """
             [참고 문서]
             %s
@@ -43,9 +46,9 @@ public ChatbotResult chat(String question) {
 
     private String searchRelevantDocuments(String question) {
         List<Document> docs = new java.util.ArrayList<>();
-        docs.addAll(searchByLayer(question, "faq", 3));
-        docs.addAll(searchByLayer(question, "policy", 3));
-        docs.addAll(searchByLayer(question, "chatlog", 2));
+        docs.addAll(searchByLayer(question, "faq", FAQ_TOP_K));
+        docs.addAll(searchByLayer(question, "policy", POLICY_TOP_K));
+        docs.addAll(searchByLayer(question, "chatlog", CHATLOG_TOP_K));
 
         loggingSearchedDocs(question, docs);
 
diff --git a/src/main/java/com/cholog/bootcamp/config/AiConfig.java b/src/main/java/com/cholog/bootcamp/config/AiConfig.java
index 0101472..af923a5 100644
--- a/src/main/java/com/cholog/bootcamp/config/AiConfig.java
+++ b/src/main/java/com/cholog/bootcamp/config/AiConfig.java
@@ -31,6 +31,8 @@ public class AiConfig {
 
             [개인 계정 정보] 포인트 잔액, 주문 내역, 배송 조회 등 특정 고객의 계정에 종속된 정보는 이 챗봇이 시스템상 접근할 수 없습니다.
             이 경우 "고객님의 [정보]는 마이페이지에서 직접 확인하실 수 있습니다."라고 안내하세요.
+            ※ 단, 탈퇴 방법, 비밀번호 변경 방법 등 일반적인 절차를 묻는 질문은 사용자 개인정보에 대한 질문이 아니므로,
+              문서 기준으로 방법을 안내하세요.
 
             [예시 대화 처리] [참고 문서] 중 [예시 대화]로 표시된 항목은 과거 상담 사례입니다.
             예시 대화 속 고객의 주문 상태·날짜 등 개별 상황은 현재 고객과 무관합니다.

From 7203434f8f4f2fb1afec0e82fb11320a4bfca58b Mon Sep 17 00:00:00 2001
From: chxghee <francis001021@gmail.com>
Date: Sat, 23 May 2026 17:29:12 +0900
Subject: [PATCH 19/20] =?UTF-8?q?test:=20=EC=82=AC=EC=8B=A4=EC=84=B1=20?=
 =?UTF-8?q?=EC=A7=80=ED=91=9C=20=EC=B1=84=EC=A0=90=20=EA=B8=B0=EC=A4=80=20?=
 =?UTF-8?q?=EC=88=98=EC=A0=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data/eval_result.json | 36 ++++++++++++++++++------------------
 data/evaluate.py      | 17 ++++++++++++-----
 2 files changed, 30 insertions(+), 23 deletions(-)

diff --git a/data/eval_result.json b/data/eval_result.json
index 198ccaa..b61a9d4 100644
--- a/data/eval_result.json
+++ b/data/eval_result.json
@@ -6,21 +6,21 @@
   "accuracy": 0.9333,
   "kpi": {
     "refusal_quality": {
-      "score_0": 3,
+      "score_0": 4,
       "score_1": 0,
-      "score_2": 147,
+      "score_2": 146,
       "total": 150
     },
     "core_response": {
-      "score_0": 7,
-      "score_1": 0,
-      "score_2": 143,
+      "score_0": 6,
+      "score_1": 2,
+      "score_2": 142,
       "total": 150
     },
     "factuality": {
-      "score_0": 4,
+      "score_0": 2,
       "score_1": 5,
-      "score_2": 141,
+      "score_2": 143,
       "total": 150
     },
     "front_loaded": {
@@ -29,13 +29,13 @@
       "total": 150
     },
     "restraint": {
-      "score_0": 5,
-      "score_1": 145,
+      "score_0": 4,
+      "score_1": 146,
       "total": 150
     },
     "conciseness": {
-      "score_0": 0,
-      "score_1": 150,
+      "score_0": 1,
+      "score_1": 149,
       "total": 150
     },
     "final_pass": {
@@ -46,23 +46,23 @@
   },
   "tier_results": {
     "easy": {
-      "correct": 28,
+      "correct": 29,
       "total": 30
     },
     "medium": {
-      "correct": 89,
+      "correct": 87,
       "total": 94
     },
     "hard": {
-      "correct": 23,
+      "correct": 24,
       "total": 26
     }
   },
-  "elapsed_seconds": 70.95889401435852,
-  "avg_response_seconds": 4.597711205482483,
+  "elapsed_seconds": 70.1703679561615,
+  "avg_response_seconds": 4.527638580004374,
   "chatbot_token_usage": {
     "prompt_tokens": 278086,
-    "completion_tokens": 6881,
-    "total_tokens": 284967
+    "completion_tokens": 7064,
+    "total_tokens": 285150
   }
 }
\ No newline at end of file
diff --git a/data/evaluate.py b/data/evaluate.py
index eb072f5..408c614 100644
--- a/data/evaluate.py
+++ b/data/evaluate.py
@@ -127,11 +127,18 @@ def judge_answer(question: str, expected: str, actual: str) -> dict:
      - 인과관계가 정확히 같음 — 원인과 결과를 모두 포함
        (예: "반품하면 포인트 차감" → "반품 시 포인트가 차감됩니다"라고 답변)
        ※ 결과만 언급하고 원인 조건을 빠뜨리면 1점
-  1: 핵심 사실은 맞으나 아래 중 하나에 해당
-     - 기대 답변에 있는 부가 조건을 언급하지 않음
-       (예: "14일 이내 반품 가능" → "반품 가능"하다고만 답변, 기간 미언급)
-     - 인과관계에서 원인 조건을 빠뜨리고 결과만 언급
-       (예: "반품하면 포인트 차감" → "포인트가 차감됩니다"라고만 답변)
+  1: 질문에 대한 직접적인 핵심 답변은 맞으나 부가 설명이 기대 답변과 다른 내용 포함
+        - Q: "반품 기간이 며칠이에요?"
+          핵심 답변(반품 기간): "14일입니다" → 맞음
+          부가 설명(틀린 정보): "단, 냉장 상품은 7일입니다" → 기대 답변에 없는 틀린 정보
+
+        - Q: "VIP는 배송비 무료인가요?"
+          핵심 답변(VIP 배송비): "네, 무료입니다" → 맞음
+          부가 설명(틀린 정보): "단, 구독 중인 경우에만 적용됩니다" → 기대 답변과 다른 조건 추가
+
+        - Q: "포인트 적립률이 몇 퍼센트예요?"
+          핵심 답변(적립률): "3%입니다" → 맞음
+          부가 설명(틀린 정보): "VIP 회원은 10% 적립됩니다" → 기대 답변의 수치와 다름
   0: 아래 중 하나에 해당
      - 수치·기간이 기대 답변과 다름
        (예: 반품 기간 14일 → "7일"이라고 답변)

From 52e85f9a6e749e147c2f6d9137d9e6d51b75a91e Mon Sep 17 00:00:00 2001
From: chxghee <francis001021@gmail.com>
Date: Sat, 23 May 2026 23:36:53 +0900
Subject: [PATCH 20/20] =?UTF-8?q?docs:=20=EB=B2=BD=20=EB=A6=AC=ED=8F=AC?=
 =?UTF-8?q?=ED=8A=B8=20=EC=9E=91=EC=84=B1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mission/wall-report.md | 179 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 170 insertions(+), 9 deletions(-)

diff --git a/mission/wall-report.md b/mission/wall-report.md
index 3489fe3..d20e3fd 100644
--- a/mission/wall-report.md
+++ b/mission/wall-report.md
@@ -1,42 +1,203 @@
 # Wall Report
 
 > 이 리포트는 과정을 진행하면서 부딪힌 한계를 기록하는 문서입니다.
-> 완성된 답이 아니라, 경험한 문제와 생각을 솔직하게 적어주세요.
+완성된 답이 아니라, 경험한 문제와 생각을 솔직하게 적어주세요.
+>
 
 ## 1. 부딪힌 벽
 
 > 구현하면서 잘 안 됐던 것, 예상과 달랐던 것을 적어주세요.
+>
 
-- 
+### 검색 문서의 청킹을 어떻게 할것인가?
 
+고민
+
+- 문서를 너무 크게 청킹하면 관련 없는 내용이 함께 검색되어 LLM이 불필요한 정보를 참고하는 문제가 발생 + 토큰 비용 증가
+- 너무 작게 청킹하면 맥락이 잘려 LLM이 제대로 활용하지 못하는 문제가 발생
+- 어떻게 연관이 있는 문서 끼리 청킹을 할 수 있을까?
+
+미션 진행 내용
+
+- 마크다운 헤더(`##`)를 기준으로 주제별로 문서를 분할
+- 분할된 청킹 단위에 상위 헤더(`#`)를 접두어로 붙여 해당 청크가 어떤 카테고리에 속하는지 맥락 정보를 함께 포함
+
+깨달은 점
+
+- 기반 문서의 구조를 잘 구성하면 → 코드로 주제별 청킹 단위를 나누기 용이하고 → 문서 검색 품질을 향상으로 이어진다는 것을 알게됨
+
+<br>
+
+### top-k를 어떻게 설정할 것인가?
+
+고민
+
+- 챗로그를 참고 문서 베이스에 포함하니 검색되는 top-k 문서의 대부분이 챗로그로 채워지는 상황
+- 챗로그는 질문과 유사한 형태의 문장을 많이 포함하고 있어 임베딩 유사도가 높게 나오고, 정작 신뢰성 있는 정책·FAQ 문서가 검색에서 밀리는 문제 발생
+- 챗로그를 참고 문서 베이스에 포함하니 검색되는 top-k 문서의 대부분이 챗로그로 채워지는 상황
+  - 챗 로그의 질문 형태하는 형태가 높은 유사도를 가져서 더욱 신뢰성 있는 문서인 정책, FAQ 문서가 문서 검색에서 밀리는 상황
+
+미션 진행 내용
+
+- 평가를 반복하며 토큰 사용량 대비 정확도 증가율이 완만해지는 지점을 찾아 전체 top-k 크기 결정
+- 도큐먼트(청킹 단위) 별로 문서 카테고리를 추가하여(FAQ, POLICY, chatlog) 각각 레이어별로 top k를 설정
+  - FAQ: 4 / POLICY : 4 / chatlog : 3 개씩
+
+깨달은 점
+
+- 단순히 top-k 수치를 올리는 것보다 문서 유형별로 검색 비율을 제어하는 것이 더 효과적 
+  - 정확도 72%(108/150)개 → 82%(122/150)  = 10% 상승
+- 유사도 기반 검색은 문서의 형태적 유사성에 민감하기 때문에 이를 고려해서 문서 검색 플로우를 설계해야 한다.
+
+<br>
+
+### 평가 기준을 어떻게 마련할 것인가? (가장 고민한 점)
+
+고민
+- 시스템 프롬프트에 답변 생성 제약 사항을 추가하니 기존 평가 방식으로 채점한 응답 정확도가 52.3 -> 14.7 로 오히려 떨어진 상황 
+- 바뀐 채점 기준으로 같은 구현을 채점을 하니 점수가 대폭 상승함 -> 이게 바뀐 채점이 후해서 그런건지 아닌건지는 판단을 어떻게 할까?
+
+미션 진행 내용
+
+- 만들려는 챗봇의 목표와 역할을 먼저 명확히 정의
+- 목표를 기반으로 여러 축의 KPI를 구체적으로 설계
+  - 핵심 응답 / 사실성 / 거절 품질 / 두괄식 응답 / 정보 절제력 / 간결성
+  - 각 KPI별 루브릭(0/1/2점)을 정의해서,  기존에 블랙박스처럼 느껴졌던 LLM 평가 과정을 각 축별로 들여다볼 수 있게 됨
+
+
+깨달은 점
+
+- 먼저 무엇을 만들지가 명확히 정리해야 해당 결과물을 평가할 기준을 세울수 있구나 하는걸 깨달음
+  (어찌보면 당연하지만, 항상 빠르게 구현하기만 급급했던것 같다…)
+- 평가 지표가 모호하면 점수가 올라도 실제로 개선된 건지 알 수 없고, 개선 방향도 잡기 어렵단 것을 체감
+
+<br>
+
+### cf) 최종 KPI 테이블
+
+| KPI | 설명 | 측정 방식 | 기준점 |
+| --- | --- | --- | --- |
+| **핵심 응답** | 질문이 요구하는 수치·조건·방법에 직접 답했는가 | **2** 질문의 핵심 정보에 완전히 답변하며 기대 답변 사실과 일치<br>**1** 질문에 답변했으나 핵심 정보 일부 누락 (부분 답변)<br>**0** 정보 완전 누락 / 무관한 답변 / 고객센터 문의 회피 | - |
+| **사실성** | 실제 답변이 기대 답변과 모순되는 내용이 없는가 | **2** 수치·기간·조건·인과관계 모두 기대 답변과 일치<br>**1** 질문에 대한 핵심 답변은 맞으나 부가 설명이 기대 답변과 다른 내용 포함<br>**0** 핵심 답변 자체가 기대 답변과 충돌 (수치 오류 / 조건 반전 / 인과관계 반전 / 없는 정보 생성) | - |
+| **거절 품질** | 거절이 필요한 상황에서 적절히 거절했는가,<br>불필요한 거절은 없는가 | **2** 거절 없음 (정상 답변)<br>**1** 기대 답변이 없고 실제 답변도 적절히 거절<br>**0** 기대 답변이 있는데 실제 답변이 거절 | ≥ 90% |
+| **두괄식 응답** *(보조)* | 첫 문장에 핵심 답변이 있는가 | **1** 첫 문장에 핵심 정보 직접 전달 또는 거절 의사 명확히 표현<br>**0** 서론·공감·확인 문구로 시작 | ≥ 80% |
+| **정보 절제력** *(보조)* | 부가 설명이 과도하지 않은가 | **1** 직접 답변 외 부가 정보 1개 이하<br>**0** 직접 답변 외 부가 정보 2개 이상 | ≥ 80% |
+| **간결성** *(보조)* | 답변이 불필요하게 장황하지 않은가 | **1** 200자 이하<br>**0** 200자 초과 | ≥ 80% |
+| **최종 통과** | 정확성 기준 최종 판정 | 거절 품질 = 1 → **통과**<br>거절 품질 = 0 → **탈락**<br>거절 품질 = 2 → 핵심 응답 + 사실성 = 4 → 통과 | ≥ 80% |
+
+---
+
+<br>
+<br>
 
 ## 2. 해결하지 못한 것
 
 > 시도했지만 결국 해결 못한 문제가 있다면 적어주세요.
+>
+
+특정 질문에서 틀린 답변이 나올 때마다 시스템 프롬프트에 예외 규칙을 추가하는 방식으로 대응했는데, 이 방식이 근본적인 해결책인지 의문이 남음
 
-- 
+- 케이스별로 규칙을 추가하다 보면 프롬프트가 누더기 골렘처럼 쌓이고,
+  새로운 규칙이 기존 규칙과 충돌하거나 의도하지 않은 케이스에 적용해 답변하는 부작용이 생김
 
+  → 실제로 고객 개인 정보 거절 규칙을 추가했더니, 탈퇴 방법·비밀번호 변경처럼 일반적인 절차 질문에도 거절하는 문제가 발생
+
+  → 결국 시스템 프롬프트 수정은 근본적인 원인을 해결하지 못한다고 느낌
+
+
+<br>
+
+문서 검색 품질을 올리려면 어떻게 해야 할까?
+
+- 관련 문서가 제대로 검색되면 시스템 프롬프트에 예외 규칙을 쌓지 않아도 LLM이 문서를 근거로 정확한 답변을 생성할 수 있지 않을까?
+- 문서 검색 품질을 올리기 위한 시도는 다음에서 그침
+  - 청킹 전략 개선
+  - 문서 레이어별 (FAQ, chatlog, policy) top-k 보장
+
+
+<br>
+
+평가 스크립트를 새로 정의한 KPI 기준에 맞게 수정하였더니 정확도 지표가 크게 상승한 상황
+
+내가 목표로 한 챗봇에 맞는 평가 기준을 마련했으니 어찌 보면 자연스러운 결과이긴 하지만,
+정확한 평가가 이루어 진 건지는 불신이 남음
+
+- 이를 검증하려면 결국 사람이 직접 샘플을 보고 평가해보는 과정이 필요하다는 생각이 들었습니다.
+
+  → 혹은 이런 방식 말고도 검증할 수 있는 방법이 있을까요?
+
+- 추가로 힌트를 보니
+
+  `Spring Boot Test + MockMvc — API 동작 자체를 테스트 코드로 검증하는 방법도 조사해보세요`
+  라고 적혀 있었는데, 이건 어떤걸 해보길 의도하신건지 궁금합니다!
+
+
+<br>
 
 ## 3. 정확도 측정 결과
 
 > 테스트 질문 150개로 측정한 정확도를 기록해주세요.
+>
 
 | 난이도 | 정확도 | 비고 |
-|--------|--------|------|
-| easy   |        |      |
-| medium |        |      |
-| hard   |        |      |
+| --- | --- | --- |
+| easy | 29/30 | 97% |
+| medium | 87/94 | 93% |
+| hard | 24/26 | 92% |
+
+### KPI 지표
+
+| 핵심 KPI | 완전 (2점) | 부분 (1점) | 실패 (0점) |
+| --- | --- | --- | --- |
+| **거절 품질** | 146 (97%) | 0 (0%) | 4 (3%) |
+| **핵심 응답** | 142 (95%) | 2 (1%) | 6 (4%) |
+| **사실성** | 143 (95%) | 5 (3%) | 2 (1%) |
+
+| 보조 KPI | 통과 (1점) | 실패 (0점) |
+| --- | --- | --- |
+| **두괄식 응답** *(보조)* | 143 (95.3%) | 7 (4.7%) |
+| **정보 절제력** *(보조)* | 146 (97.3%) | 4 (2.7%) |
+| **간결성 ≤200자** *(보조)* | 149 (99.3%) | 1 (0.7%) |
 
+| 최종   결과 | 비고 |
+| --- | --- |
+| **최종 통과 (정확성)** | **140/150 (93.3%)** |
+
+<br>
 
 ## 4. 왜 그런 결과가 나왔는지
 
 > 정확도가 낮은 난이도의 질문을 몇 개 살펴보고, 왜 틀렸는지 분석해주세요.
+>
+
+실제 가상계좌 결제는 불가능 하지만 아예 반대로 대답하는 상황 존재
+
+```bash
+curl -X POST http://localhost:8080/api/chat \
+  -H "Content-Type: application/json" \
+  -d '{"question": "가상계좌 결제 된다고 봤는데 지원이 되나요?"}'
+{"answer":"네, 고객님께서는 계좌이체 방식을 통해 결제하실 수 있으며, 가상계좌 결제도 지원됩니다.","tokenUsage":{"promptTokens":1814,"completionTokens":30,"totalTokens":1844}}%                                                                                                                         
+```
 
-- 
+- 로그를 보았을때, 관련 문서가 잘 검색되어 LLM에게 전달되었음에도 가상계좌가 된다고 판단한 상황
 
+왜 그랬을까?
+
+- 검색된 문서 중 챗로그에 "계좌이체 지원합니다"라는 문장이 있었고, LLM이 "계좌이체"와 "가상계좌"를 같은 개념으로 혼동하여 지원된다고 답변한 것으로 보임
+
+  (해당 챗 로그가 가장 유사도가 높게 검색되었음
+
+
+<br>
 
 ## 5. 개선하고 싶은 것
 
 > 시간이 더 있었다면 시도해보고 싶은 개선점을 적어주세요.
+>
+
+사용자 질문을 RAG 검색 전에 LLM으로 전처리 하는 과정을 추가해 보고 싶다.
+
+- 줄임말 등 을 사용하면, 관련 문서 검색에 있어 유사도가 낮게 측정되어,
+  분명  해당 내용이 있는 문서임에도 해당 문서를 검색하지 못하는 상황이 왕왕 있었다.
+- llm을 한 번 더 호출하는 만큼 응답 시간의 지연이 발생할수 있고, 토큰의 사용량이 높아질 텐데 이 트레이드 오프를 고려한 기준을 세우는 것이 중요할 것 같다.
 
--