From 9224c7773c908823fb3fe29c939378f75101fd19 Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Sat, 6 Dec 2025 00:27:22 +0530
Subject: [PATCH 1/4] WIP: saving my work

---
 common/requirements.txt              |   2 +-
 common/utils/image_data_extractor.py | 165 ++-----
 common/utils/text_extractors.py      | 423 +++++++++++------
 graphrag/app/supportai/supportai.py  |  65 +--
 licenses/pymupdf4llm-AGPL-3.0.txt    | 661 +++++++++++++++++++++++++++
 5 files changed, 1013 insertions(+), 303 deletions(-)
 create mode 100644 licenses/pymupdf4llm-AGPL-3.0.txt

diff --git a/common/requirements.txt b/common/requirements.txt
index 562c2f6..84b5061 100644
--- a/common/requirements.txt
+++ b/common/requirements.txt
@@ -110,7 +110,7 @@ packaging==24.2
 pandas==2.2.3
 #pathtools==0.1.2
 pillow==11.2.1
-PyMuPDF==1.26.4
+pymupdf4llm==0.2.0
 platformdirs==4.3.8
 pluggy==1.6.0
 prometheus_client==0.22.1
diff --git a/common/utils/image_data_extractor.py b/common/utils/image_data_extractor.py
index bde9c97..bfd07cd 100644
--- a/common/utils/image_data_extractor.py
+++ b/common/utils/image_data_extractor.py
@@ -1,165 +1,62 @@
 import base64
 import io
 import logging
-import os
-import uuid
-import hashlib
-from pathlib import Path
 from langchain_core.messages import HumanMessage, SystemMessage
 
 from common.config import get_multimodal_service
 
 logger = logging.getLogger(__name__)
 
-
-
-def describe_image_with_llm(image_input):
+def describe_image_with_llm(file_path):
     """
-    Send image (pixmap or PIL image) to LLM vision model and return description.
-    Uses multimodal_service from config if available, otherwise falls back to completion_service.
-    Currently supports: OpenAI, Azure OpenAI, Google GenAI, and Google VertexAI
+    Read image file and convert to base64 to send to LLM.
     """
     try:
+        from PIL import Image as PILImage
+        
         client = get_multimodal_service()
         if not client:
             return "[Image: Failed to create multimodal LLM client]"
-        
+
+        # Read image and convert to base64
+        pil_image = PILImage.open(file_path)
         buffer = io.BytesIO()
-        # Convert to RGB if needed for better compatibility
-        if image_input.mode != 'RGB':
-            image_input = image_input.convert('RGB')
-        image_input.save(buffer, format="JPEG", quality=95)
-        b64_img = base64.b64encode(buffer.getvalue()).decode("utf-8")
+        if pil_image.mode != 'RGB':
+            pil_image = pil_image.convert('RGB')
+        pil_image.save(buffer, format="JPEG", quality=95)
+        image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
 
-        # Build messages (system + human)
         messages = [
-        SystemMessage(
-            content="You are a helpful assistant that describes images concisely for document analysis."
-        ),
-        HumanMessage(
-            content=[
-                {
-                    "type": "text",
-                    "text": (
-                        "Please describe what you see in this image and "
-                        "if the image has scanned text then extract all the text. "
-                        "if the image has any logo, icon, or branding element, try to describe it with text. "
-                        "Focus on any text, diagrams, charts, or other visual elements."
-                        "If the image is purely a logo, icon, or branding element, start your response with 'LOGO:' or 'ICON:'."
-                    ),
-                },
-                 {
-                     "type": "image_url",
-                     "image_url": {"url": f"data:image/jpeg;base64,{b64_img}"},
-                 },
-            ]
-        ),
+            SystemMessage(
+                content="You are a helpful assistant that describes images concisely for document analysis."
+            ),
+            HumanMessage(
+                content=[
+                    {
+                        "type": "text",
+                        "text": (
+                            "Please describe what you see in this image and "
+                            "if the image has scanned text then extract all the text. "
+                            "If the image has any graph, chart, table, or other diagram, describe it. "
+                        ),
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
+                    },
+                ],
+            ),
         ]
 
-        # Get response from LangChain LLM client
-        # Access the underlying LangChain client
         langchain_client = client.llm
         response = langchain_client.invoke(messages)
 
-        return response.content if hasattr(response, 'content') else str(response)
+        return response.content if hasattr(response, "content") else str(response)
 
     except Exception as e:
         logger.error(f"Failed to describe image with LLM: {str(e)}")
         return "[Image: Error processing image description]"
 
 
-def save_image_and_get_markdown(image_input, context_info="", graphname=None):
-    """
-    Save image locally to static/images/ folder and return markdown reference with description.
-    
-    LEGACY/OLD APPROACH: Used for backward compatibility with JSONL-based loading.
-    Images are saved as files and served via /ui/images/ endpoint with img:// protocol.
-    
-    For NEW direct loading approach, images are stored in Image vertex as base64
-    and served via /ui/image_vertex/ endpoint with image:// protocol.
-    
-    Args:
-        image_input: PIL Image object
-        context_info: Optional context (e.g., "page 3 of invoice.pdf")
-        graphname: Graph name to organize images by graph (optional)
-    
-    Returns:
-        dict with:
-            - 'markdown': Markdown string with img:// reference
-            - 'image_id': Unique identifier for the saved image
-            - 'image_path': Path where image was saved to static/images/
-    """
-    try:
-        # FIRST: Get description from LLM to check if it's a logo
-        description = describe_image_with_llm(image_input)
-        
-        # Check if the image is a logo, icon, or decorative element BEFORE saving
-        # These should be filtered out as they're not content-relevant
-        description_lower = description.lower()
-        logo_indicators = ['logo', 'icon', 'branding', 'watermark', 'trademark', 'company logo', 'brand logo']
-        
-        if any(indicator in description_lower for indicator in logo_indicators):
-            logger.info(f"Detected logo/icon in image, skipping: {description[:100]}")
-            return None
-        
-        # If not a logo, proceed with saving the image
-        # Generate unique image ID using hash of image content
-        buffer = io.BytesIO()
-        if image_input.mode != 'RGB':
-            image_input = image_input.convert('RGB')
-        image_input.save(buffer, format="JPEG", quality=95)
-        image_bytes = buffer.getvalue()
-        
-        # Create hash-based ID (deterministic for same image)
-        image_hash = hashlib.sha256(image_bytes).hexdigest()[:16]
-        image_id = f"{image_hash}.jpg"
-        
-        # Save image to local storage directory organized by graphname
-        project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-        
-        # If graphname is provided, organize images by graph
-        if graphname:
-            images_dir = os.path.join(project_root, "static", "images", graphname)
-            # Include graphname in the image reference for URL construction
-            image_reference = f"{graphname}/{image_id}"
-        else:
-            images_dir = os.path.join(project_root, "static", "images")
-            image_reference = image_id
-        
-        os.makedirs(images_dir, exist_ok=True)
-        
-        image_path = os.path.join(images_dir, image_id)
-        
-        # Save image file (skip if already exists with same hash)
-        if not os.path.exists(image_path):
-            with open(image_path, 'wb') as f:
-                f.write(image_bytes)
-            logger.info(f"Saved content image to: {image_path}")
-        else:
-            logger.debug(f"Image already exists: {image_path}")
-        
-        # Generate markdown with custom img:// protocol (will be replaced later)
-        # Format: ![description](img://graphname/image_id) or ![description](img://image_id)
-        markdown = f"![{description}](img://{image_reference})"
-        
-        logger.info(f"Created image reference: {image_reference} with description")
-        
-        return {
-            'markdown': markdown,
-            'image_id': image_reference,
-            'image_path': image_path,
-            'description': description
-        }
-        
-    except Exception as e:
-        logger.error(f"Failed to save image and generate markdown: {str(e)}")
-        # Fallback to text description only
-        fallback_desc = f"[Image: {context_info} - processing failed]"
-        return {
-            'markdown': fallback_desc,
-            'image_id': None,
-            'image_path': None,
-            'description': fallback_desc
-        }
 
 
diff --git a/common/utils/text_extractors.py b/common/utils/text_extractors.py
index da3e22d..9b5b652 100644
--- a/common/utils/text_extractors.py
+++ b/common/utils/text_extractors.py
@@ -8,6 +8,7 @@
 import uuid
 import base64
 import io
+import threading
 from pathlib import Path
 import shutil
 import asyncio
@@ -15,6 +16,9 @@
 
 logger = logging.getLogger(__name__)
 
+# Global lock for pymupdf4llm calls (not thread-safe)
+_pymupdf4llm_lock = threading.Lock()
+
 
 class TextExtractor:
     """Class for handling text extraction from various file formats and cleanup."""
@@ -38,10 +42,11 @@ def __init__(self):
             '.jpg': 'image/jpeg'
         }
 
-    async def _process_file_async(self, file_path, folder_path_obj, graphname):
+    async def _process_file_async(self, file_path, folder_path_obj, graphname, temp_folder, jsonl_file, jsonl_lock):
         """
         Async helper to process a single file.
         Runs in thread pool to avoid blocking on I/O operations.
+        Appends documents immediately to JSONL file.
         """
         try:
             loop = asyncio.get_event_loop()
@@ -53,10 +58,21 @@ async def _process_file_async(self, file_path, folder_path_obj, graphname):
                 graphname
             )
 
+            # Append each document to JSONL file immediately
+            if doc_entries:
+                # Use lock to ensure thread-safe writing to JSONL file
+                async with jsonl_lock:
+                    await loop.run_in_executor(
+                        None,
+                        self._append_to_jsonl,
+                        jsonl_file,
+                        doc_entries
+                    )
+            
+            # Return metadata only, documents already saved to JSONL
             return {
                 'success': True,
                 'file_path': str(file_path),
-                'documents': doc_entries,
                 'num_documents': len(doc_entries)
             }
 
@@ -67,11 +83,21 @@ async def _process_file_async(self, file_path, folder_path_obj, graphname):
         except Exception as e:
             logger.warning(f"Failed to process file {file_path}: {e}")
             return {'success': False, 'file_path': str(file_path), 'error': str(e)}
+    
+    def _append_to_jsonl(self, jsonl_file, doc_entries):
+        """
+        Append document entries to JSONL file.
+        Each document is written as a separate line.
+        """
+        with open(jsonl_file, 'a', encoding='utf-8') as f:
+            for doc_data in doc_entries:
+                json_line = json.dumps(doc_data, ensure_ascii=False)
+                f.write(json_line + '\n')
 
-    async def _process_folder_async(self, folder_path, graphname=None, max_concurrent=10):
+    async def _process_folder_async(self, folder_path, graphname, temp_folder, max_concurrent=10):
         """
         Async version of process_folder for parallel file processing.
-        This prevents conflicts when multiple users process folders simultaneously.
+        Saves all documents immediately to a single JSONL file as they are processed.
         """
         logger.info(f"Processing local folder ASYNC: {folder_path} for graph: {graphname} (max_concurrent={max_concurrent})")
 
@@ -83,6 +109,13 @@ async def _process_folder_async(self, folder_path, graphname=None, max_concurren
         if not folder_path_obj.is_dir():
             raise Exception(f"Path is not a directory: {folder_path}")
 
+        # Create temp folder and JSONL file
+        os.makedirs(temp_folder, exist_ok=True)
+        jsonl_file = os.path.join(temp_folder, "processed_documents.jsonl")
+        # Create async lock for thread-safe JSONL writing
+        jsonl_lock = asyncio.Lock()
+        logger.info(f"Saving processed documents to: {jsonl_file}")
+
         def safe_walk(path):
             try:
                 for item in path.iterdir():
@@ -110,13 +143,13 @@ def safe_walk(path):
 
         async def process_with_semaphore(file_path):
             async with semaphore:
-                return await self._process_file_async(file_path, folder_path_obj, graphname)
+                return await self._process_file_async(file_path, folder_path_obj, graphname, temp_folder, jsonl_file, jsonl_lock)
 
         tasks = [process_with_semaphore(fp) for fp in files_to_process]
         results = await asyncio.gather(*tasks, return_exceptions=True)
 
-        all_documents = []
         processed_files_info = []
+        total_docs = 0
 
         for result in results:
             if isinstance(result, Exception):
@@ -124,10 +157,12 @@ async def process_with_semaphore(file_path):
                 continue
 
             if result.get('success'):
-                all_documents.extend(result.get('documents', []))
+                num_docs = result.get('num_documents', 0)
+                total_docs += num_docs
+                
                 processed_files_info.append({
                     'file_path': result['file_path'],
-                    'num_documents': result.get('num_documents', len(result.get('documents', []))),
+                    'num_documents': num_docs,
                     'status': 'success'
                 })
             else:
@@ -137,23 +172,118 @@ async def process_with_semaphore(file_path):
                     'error': result.get('error', 'Unknown error')
                 })
 
-        logger.info(f"Processed {len(processed_files_info)} files, extracted {len(all_documents)} total documents")
+        logger.info(f"Processed {len(processed_files_info)} files, extracted {total_docs} total documents")
 
         return {
             'statusCode': 200,
-            'message': f'Processed {len(processed_files_info)} files, {len(all_documents)} documents',
-            'documents': all_documents,
+            'message': f'Processed {len(processed_files_info)} files, {total_docs} documents',
             'files': processed_files_info,
-            'num_documents': len(all_documents)
+            'num_documents': total_docs,
+            'temp_folder': temp_folder,
+            'jsonl_file': jsonl_file
         }
 
-    def process_folder(self, folder_path, graphname=None):
+    def process_folder(self, folder_path, graphname, temp_folder):
         """
         Process local folder with multiple file formats and extract text content.
         Uses async processing internally for parallel file handling.
+        Saves all documents to JSONL file immediately as they are processed.
+        
+        Args:
+            folder_path: Path to the folder containing files to process
+            graphname: Name of the graph (for context)
+            temp_folder: Path to save processed documents as JSONL file
         """
         logger.info(f"Processing local folder: {folder_path} for graph: {graphname}")
-        return asyncio.run(self._process_folder_async(folder_path, graphname))
+        return asyncio.run(self._process_folder_async(folder_path, graphname, temp_folder))
+    
+    def delete_file_from_jsonl(self, temp_folder, filename):
+        """
+        Delete all documents related to a specific file from the JSONL file.
+        
+        Args:
+            temp_folder: Path to the temp folder containing processed_documents.jsonl
+            filename: Original filename (e.g., "report.pdf", "stock_gs200.jpg")
+        
+        Returns:
+            dict with status and number of documents removed
+        """
+        jsonl_file = os.path.join(temp_folder, "processed_documents.jsonl")
+        
+        if not os.path.exists(jsonl_file):
+            logger.warning(f"JSONL file not found: {jsonl_file}")
+            return {'success': False, 'error': 'JSONL file not found'}
+        
+        # Get base name without extension to match doc_id
+        base_name = Path(filename).stem
+        logger.info(f"Deleting documents for file: {filename} (base_name: '{base_name}')")
+        
+        # Read all lines and filter out ones matching this file
+        remaining_lines = []
+        removed_count = 0
+        removed_doc_ids = []
+        
+        try:
+            with open(jsonl_file, 'r', encoding='utf-8') as f:
+                for line_num, line in enumerate(f, 1):
+                    line = line.strip()
+                    if not line:
+                        continue
+                    
+                    try:
+                        doc_data = json.loads(line)
+                        doc_id = doc_data.get('doc_id', '')
+                        
+                        # Check if doc_id matches the base_name or starts with base_name_
+                        # Handles: "stock_gs200" == "stock_gs200" or "stock_gs200_image_1".startswith("stock_gs200_")
+                        if doc_id == base_name or doc_id.startswith(f"{base_name}_"):
+                            removed_count += 1
+                            removed_doc_ids.append(doc_id)
+                            logger.info(f"Removing document: {doc_id}")
+                        else:
+                            remaining_lines.append(line)
+                    except json.JSONDecodeError as e:
+                        logger.warning(f"Skipping invalid JSON at line {line_num}: {e}")
+                        # Keep invalid lines in case they're important
+                        remaining_lines.append(line)
+            
+            if removed_count == 0:
+                logger.warning(f"No documents found matching base_name: '{base_name}'")
+                return {
+                    'success': False,
+                    'error': f'No documents found for {filename}',
+                    'removed_count': 0
+                }
+            
+            # If no lines remain, delete the entire temp folder
+            if not remaining_lines:
+                logger.info(f"No documents remaining, deleting temp folder: {temp_folder}")
+                import shutil
+                shutil.rmtree(temp_folder, ignore_errors=True)
+                return {
+                    'success': True,
+                    'removed_count': removed_count,
+                    'removed_doc_ids': removed_doc_ids,
+                    'temp_folder_deleted': True
+                }
+            
+            # Write remaining lines back to JSONL
+            with open(jsonl_file, 'w', encoding='utf-8') as f:
+                for line in remaining_lines:
+                    f.write(line + '\n')
+            
+            logger.info(f"Removed {removed_count} documents ({', '.join(removed_doc_ids)}), {len(remaining_lines)} remaining")
+            return {
+                'success': True,
+                'removed_count': removed_count,
+                'removed_doc_ids': removed_doc_ids,
+                'remaining_count': len(remaining_lines),
+                'temp_folder_deleted': False
+            }
+            
+        except Exception as e:
+            logger.error(f"Error deleting from JSONL: {e}")
+            return {'success': False, 'error': str(e)}
 
 
 def extract_text_from_file_with_images_as_docs(file_path, graphname=None):
@@ -183,137 +313,167 @@ def extract_text_from_file_with_images_as_docs(file_path, graphname=None):
 
 def _extract_pdf_with_images_as_docs(file_path, base_doc_id, graphname=None):
     """
-    Extract PDF as ONE markdown document with inline image references.
+    Extract PDF as ONE markdown document with inline image references using pymupdf4llm.
+    Uses unique temporary folder per PDF to allow parallel processing.
+    After processing, delete the extracted image folder.
     """
+    # Use unique folder per PDF to allow parallel processing without conflicts
+    unique_folder_id = uuid.uuid4().hex[:12]
+    image_output_folder = Path(f"tg_temp_{unique_folder_id}")
+
     try:
-        import fitz  # PyMuPDF
+        import pymupdf4llm
         from PIL import Image as PILImage
+        from common.utils.image_data_extractor import describe_image_with_llm
+        from common.utils.markdown_parsing import MarkdownProcessor
 
-        doc = fitz.open(file_path)
-        markdown_parts = []
-        image_entries = []
-        image_counter = 0
+        # Ensure clean slate - remove folder if it exists from failed previous run
+        if image_output_folder.exists():
+            shutil.rmtree(image_output_folder, ignore_errors=True)
 
-        for page_num, page in enumerate(doc, start=1):
-            if page_num > 1:
-                markdown_parts.append("\n\n")
-            markdown_parts.append(f"--- Page {page_num} ---\n") #Avoid to be splitted as a single chunk
+        # Convert PDF to markdown with extracted image files
+        # Use lock because pymupdf4llm's table extraction is not thread-safe
+        # See: https://github.com/pymupdf/PyMuPDF/issues/3241
+        with _pymupdf4llm_lock:
+            try:
+                markdown_content = pymupdf4llm.to_markdown(
+                    file_path,
+                    write_images=True,
+                    image_path=str(image_output_folder),  # unique folder per PDF
+                    margins=0,
+                    image_size_limit=0.08,
+                )
+            except Exception:
+                # Retry with table_strategy="lines" if first attempt fails
+                try:
+                    markdown_content = pymupdf4llm.to_markdown(
+                        file_path,
+                        write_images=True,
+                        image_path=str(image_output_folder),  # unique folder per PDF
+                        margins=0,
+                        image_size_limit=0.08,
+                        table_strategy="lines",
+                    )
+                except Exception as e:
+                    logger.error(f"pymupdf4llm failed for {file_path}: {e}")
+                    # Cleanup folder if it was created
+                    if image_output_folder.exists():
+                        shutil.rmtree(image_output_folder, ignore_errors=True)
+                    return [{
+                        "doc_id": base_doc_id,
+                        "doc_type": "markdown",
+                        "content": f"[PDF extraction failed: {e}]",
+                        "position": 0
+                    }]
+
+        if not markdown_content or not markdown_content.strip():
+            logger.warning(f"No content extracted from PDF: {file_path}")
+
+        # Extract image references from markdown
+        image_refs = MarkdownProcessor.extract_images(markdown_content)
+
+        if not image_refs:
+            # cleanup folder anyway
+            if image_output_folder.exists():
+                shutil.rmtree(image_output_folder, ignore_errors=True)
+
+            return [{
+                "doc_id": base_doc_id,
+                "doc_type": "markdown",
+                "content": markdown_content,
+                "position": 0
+            }]
 
-            blocks = page.get_text("blocks", sort=True)
-            text_blocks_with_pos = []
+        image_entries = []
+        image_counter = 0
 
-            for block in blocks:
-                block_type = block[6] if len(block) > 6 else 0
-                if block_type == 0:
-                    text = block[4].strip()
-                    if text:
-                        y_pos = block[1]
-                        text_blocks_with_pos.append({'type': 'text', 'content': text, 'y_pos': y_pos})
+        for img_ref in image_refs:
+            try:
+                img_path = Path(img_ref["path"])  # convert to Path
+                image_id = img_ref["image_id"]
+
+                # Image description
+                description = describe_image_with_llm(str(img_path))
+
+                markdown_content = MarkdownProcessor.insert_description_by_id(
+                    markdown_content,
+                    image_id,
+                    description
+                )
+
+                # Convert image to base64
+                pil_image = PILImage.open(img_path)
+                buffer = io.BytesIO()
+
+                if pil_image.mode != "RGB":
+                    pil_image = pil_image.convert("RGB")
+
+                pil_image.save(buffer, format="JPEG", quality=95)
+                image_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
+
+                image_counter += 1
+                image_doc_id = f"{base_doc_id}_image_{image_counter}"
+
+                # Replace file path with tg:// protocol reference in markdown
+                markdown_content = MarkdownProcessor.replace_path_with_tg_protocol(
+                    markdown_content,
+                    image_id,
+                    image_doc_id
+                )
+
+                image_entries.append({
+                    "doc_id": image_doc_id,
+                    "doc_type": "image",
+                    "image_description": description,
+                    "image_data": image_base64,
+                    "image_format": "jpg",
+                    "parent_doc": base_doc_id,
+                    "page_number": 0,
+                    "width": pil_image.width,
+                    "height": pil_image.height,
+                    "position": image_counter
+                })
 
-            image_list = page.get_images(full=True)
-            images_with_pos = []
+            except Exception as img_error:
+                logger.warning(f"Failed to process image {img_ref.get('path')}: {img_error}")
 
-            if image_list:
-                for img_index, img_info in enumerate(image_list):
-                    try:
-                        xref = img_info[0]
-                        base_image = doc.extract_image(xref)
-                        image_bytes = base_image["image"]
-                        image_ext = base_image["ext"]
-
-                        img_rects = page.get_image_rects(xref)
-                        y_pos = img_rects[0].y0 if img_rects else 999999
-
-                        pil_image = PILImage.open(io.BytesIO(image_bytes))
-                        if pil_image.width < 100 or pil_image.height < 100:
-                            continue
-
-                        from common.utils.image_data_extractor import describe_image_with_llm
-                        description = describe_image_with_llm(pil_image)
-                        description_lower = description.lower()
-                        logo_indicators = [
-                            'logo:', 'icon:', 'logo', 'icon', 'branding',
-                            'watermark', 'trademark', 'stylized letter',
-                            'stylized text', 'word "', "word '"
-                        ]
-                        if any(indicator in description_lower for indicator in logo_indicators):
-                            continue
-
-                        buffer = io.BytesIO()
-                        if pil_image.mode != 'RGB':
-                            pil_image = pil_image.convert('RGB')
-                        pil_image.save(buffer, format="JPEG", quality=95)
-                        image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
-
-                        image_counter += 1
-                        image_doc_id = f"{base_doc_id}_image_{image_counter}"
-
-                        images_with_pos.append({
-                            'type': 'image',
-                            'image_doc_id': image_doc_id,
-                            'description': description,
-                            'y_pos': y_pos,
-                            'image_data': image_base64,
-                            'image_format': image_ext,
-                            'width': pil_image.width,
-                            'height': pil_image.height
-                        })
-                    except Exception as img_error:
-                        logger.warning(f"Failed to extract image on page {page_num}: {img_error}")
-
-            all_elements = text_blocks_with_pos + images_with_pos
-            all_elements.sort(key=lambda x: x['y_pos'])
-
-            for element in all_elements:
-                if element['type'] == 'text':
-                    markdown_parts.append(element['content'])
-                    markdown_parts.append("\n\n")
-                else:
-                    # Add image description as text, then markdown image reference
-                    # Use short alt text in markdown, full description as regular text
-                    markdown_parts.append(f"![{element['description']}](tg://{element['image_doc_id']})\n\n")
-
-                    image_entries.append({
-                        "doc_id": element['image_doc_id'],
-                        "doc_type": "image",
-                        "image_description": element['description'],
-                        "image_data": element['image_data'],
-                        "image_format": element['image_format'],
-                        "parent_doc": base_doc_id,
-                        "page_number": page_num,
-                        "width": element['width'],
-                        "height": element['height'],
-                        "position": int(element['image_doc_id'].split('_')[-1])
-                    })
-
-        doc.close()
-
-        markdown_content = "".join(markdown_parts) if markdown_parts else "" #No content extracted from PDF
-        if not markdown_content:
-            return []
+        # FINAL CLEANUP — delete folder after processing everything
+        if image_output_folder.exists() and image_output_folder.is_dir():
+            try:
+                shutil.rmtree(image_output_folder)
+                logger.debug(f"Deleted image folder: {image_output_folder}")
+            except Exception as delete_err:
+                logger.warning(f"Failed to delete folder {image_output_folder}: {delete_err}")
 
+        # Build final result
         result = [{
             "doc_id": base_doc_id,
-            "doc_type": "",
+            "doc_type": "markdown",
             "content": markdown_content,
             "position": 0
         }]
         result.extend(image_entries)
+
         return result
 
-    except ImportError:
-        logger.error("PyMuPDF not available")
+    except ImportError as import_err:
+        logger.error(f"Required library missing: {import_err}")
+        # Cleanup on import error
+        if image_output_folder.exists():
+            shutil.rmtree(image_output_folder, ignore_errors=True)
         return [{
             "doc_id": base_doc_id,
-            "doc_type": "",
-            "content": "[PDF extraction requires PyMuPDF]",
+            "doc_type": "markdown",
+            "content": "[PDF extraction requires pymupdf4llm and PyMuPDF]",
             "position": 0
         }]
     except Exception as e:
         logger.error(f"Error extracting PDF: {e}")
+        # Cleanup on any other error
+        if image_output_folder.exists():
+            shutil.rmtree(image_output_folder, ignore_errors=True)
         raise
 
-
 def _extract_standalone_image_as_doc(file_path, base_doc_id, graphname=None):
     """
     Extract standalone image file as ONE markdown document with inline image reference.
@@ -324,25 +484,15 @@ def _extract_standalone_image_as_doc(file_path, base_doc_id, graphname=None):
 
         pil_image = PILImage.open(file_path)
         if pil_image.width < 100 or pil_image.height < 100:
-            return [{
-                "doc_id": base_doc_id,
-                "doc_type": "",
-                "content": f"[Skipped small image: {file_path.name}]",
-                "position": 0
-            }]
+            pass
 
-        description = describe_image_with_llm(pil_image)
+        description = describe_image_with_llm(str(Path(file_path).absolute()))
         description_lower = description.lower()
         logo_indicators = ['logo:', 'icon:', 'logo', 'icon', 'branding',
                            'watermark', 'trademark', 'stylized letter',
                            'stylized text', 'word "', "word '"]
         if any(indicator in description_lower for indicator in logo_indicators):
-            return [{
-                "doc_id": base_doc_id,
-                "doc_type": "",
-                "content": f"[Skipped logo/icon: {file_path.name}]",
-                "position": 0
-            }]
+            return []
 
         buffer = io.BytesIO()
         if pil_image.mode != 'RGB':
@@ -353,7 +503,6 @@ def _extract_standalone_image_as_doc(file_path, base_doc_id, graphname=None):
         image_id = f"{base_doc_id}_image_1"
         # Put description as text, then markdown image reference with short alt text
         content = f"![{description}](tg://{image_id})"
-
         return [
             {
                 "doc_id": base_doc_id,
@@ -379,7 +528,7 @@ def _extract_standalone_image_as_doc(file_path, base_doc_id, graphname=None):
         logger.error(f"Error extracting image: {e}")
         return [{
             "doc_id": base_doc_id,
-            "doc_type": "",
+            "doc_type": "markdown",
             "content": f"[Image extraction failed: {str(e)}]",
             "position": 0
         }]
@@ -441,12 +590,10 @@ def get_doc_type_from_extension(extension):
 
     if extension in ['.html', '.htm']:
         return 'html'
-    elif extension in ['.md']:
-        return 'markdown'
     elif extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp']:
         return 'image'
     else:
-        return ''
+        return 'markdown'
 
 
 def get_supported_extensions():
@@ -457,4 +604,4 @@ def get_supported_extensions():
 def is_supported_file(file_path):
     """Check if a file is supported for text extraction."""
     extension = Path(file_path).suffix.lower()
-    return extension in get_supported_extensions()
+    return extension in get_supported_extensions()
\ No newline at end of file
diff --git a/graphrag/app/supportai/supportai.py b/graphrag/app/supportai/supportai.py
index d2efe8a..70e03b4 100644
--- a/graphrag/app/supportai/supportai.py
+++ b/graphrag/app/supportai/supportai.py
@@ -648,42 +648,47 @@ def ingest(
             }
         elif ingest_config.get("data_source") == "server":
             try:
-                processed_files = []
                 data_source_id = ingest_config.get("data_source_id", "DocumentContent")
-                if ingest_config.get("server_jobs"):
-                    for doc_data in ingest_config.get("server_jobs"):
-                        if not doc_data.get("doc_id") or not doc_data.get("content"):
-                            continue
-                        if doc_data.get("image_data"):
-                            payload = {
-                                "doc_id": doc_data.get("doc_id", ""),
-                                "doc_type": "image",
-                                "image_data": doc_data.get("image_data", ""),
-                                "image_format": doc_data.get("image_format", "jpg"),
-                                "parent_doc": doc_data.get("parent_doc", ""),
-                                "page_number": doc_data.get("page_number", 0),
-                                "position": doc_data.get("position", 0),
-                                "content": ""
-                            }
-                        else:
-                            payload = {
-                                "doc_id": doc_data.get("doc_id", ""),
-                                "doc_type": doc_data.get("doc_type", "markdown"),
-                                "content": doc_data.get("content", "")
-                            }
-                        payload_json = json.dumps(payload)
-                        conn.runLoadingJobWithData(payload_json, data_source_id, loader_info.load_job_id)
-                        processed_files.append({
-                            'file_path': doc_data.get("doc_id", ""),
-                            'parent_doc': doc_data.get("parent_doc", ""),
-                        })
-                        logger.info(f"Data uploading done for doc_id: {doc_data.get('doc_id', 'unknown')}")
+                
+                # Read from temporary folder's JSONL file
+                temp_folder = ingest_config.get("temp_folder")
+                if not temp_folder or not os.path.exists(temp_folder):
+                    raise Exception(f"Temporary folder not found: {temp_folder}")
+                
+                # Read the entire JSONL file as a string
+                jsonl_file = os.path.join(temp_folder, "processed_documents.jsonl")
+                if not os.path.exists(jsonl_file):
+                    raise Exception(f"JSONL file not found: {jsonl_file}")
+                
+                logger.info(f"Reading JSONL file: {jsonl_file}")
+                
+                # Read entire JSONL content
+                with open(jsonl_file, 'r', encoding='utf-8') as f:
+                    jsonl_content = f.read()
+                
+                # Load all documents in one call - runLoadingJobWithData supports JSONL format
+                conn.runLoadingJobWithData(jsonl_content, data_source_id, loader_info.load_job_id)
+                
+                # Count documents for reporting
+                doc_count = sum(1 for line in jsonl_content.strip().split('\n') if line.strip())
+                logger.info(f"Successfully ingested {doc_count} documents from JSONL")
+                
+                # Clean up temp folder after successful ingestion
+                try:
+                    import shutil
+                    shutil.rmtree(temp_folder)
+                    logger.info(f"Cleaned up temporary folder: {temp_folder}")
+                except Exception as cleanup_error:
+                    logger.warning(f"Failed to cleanup temp folder {temp_folder}: {cleanup_error}")
+                    
             except Exception as e:
                 raise Exception(f"Error during server markdown extraction and TigerGraph loading: {e}")
             return {
                 "job_name": loader_info.load_job_id,
-                "summary": processed_files
+                "summary": f"Successfully ingested {doc_count} documents from JSONL",
+                "document_count": doc_count
             }
+
         else:
             raise Exception("Data source and file format combination not implemented")
     else:
diff --git a/licenses/pymupdf4llm-AGPL-3.0.txt b/licenses/pymupdf4llm-AGPL-3.0.txt
new file mode 100644
index 0000000..0ad25db
--- /dev/null
+++ b/licenses/pymupdf4llm-AGPL-3.0.txt
@@ -0,0 +1,661 @@
+                    GNU AFFERO GENERAL PUBLIC LICENSE
+                       Version 3, 19 November 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+
+  A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate.  Many developers of free software are heartened and
+encouraged by the resulting cooperation.  However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+
+  The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community.  It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server.  Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+
+  An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals.  This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU Affero General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Remote Network Interaction; Use with the GNU General Public License.
+
+  Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software.  This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU Affero General Public License from time to time.  Such new versions
+will be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU Affero General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published
+    by the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source.  For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code.  There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+<https://www.gnu.org/licenses/>.

From bb07f407a75a4b7ed09d4eb5fd0292e4e1073394 Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Mon, 8 Dec 2025 16:21:22 +0530
Subject: [PATCH 2/4] WIP: saving my work

---
 common/requirements.txt             |    1 +
 configs/nginx.conf                  |    1 -
 configs/server_config.json          |    1 -
 ecc/app/common                      |    1 -
 ecc/app/configs                     |    1 -
 graphrag-ui/src/pages/Setup.tsx     | 1291 ++++++++++++++-------------
 graphrag/app/common                 |    1 -
 graphrag/app/configs                |    1 -
 graphrag/app/routers/ui.py          |   35 +-
 graphrag/app/supportai/supportai.py |   31 +-
 10 files changed, 716 insertions(+), 648 deletions(-)
 delete mode 120000 configs/nginx.conf
 delete mode 120000 configs/server_config.json
 delete mode 120000 ecc/app/common
 delete mode 120000 ecc/app/configs
 delete mode 120000 graphrag/app/common
 delete mode 120000 graphrag/app/configs

diff --git a/common/requirements.txt b/common/requirements.txt
index 84b5061..d5a2d5b 100644
--- a/common/requirements.txt
+++ b/common/requirements.txt
@@ -110,6 +110,7 @@ packaging==24.2
 pandas==2.2.3
 #pathtools==0.1.2
 pillow==11.2.1
+PyMuPDF==1.26.6
 pymupdf4llm==0.2.0
 platformdirs==4.3.8
 pluggy==1.6.0
diff --git a/configs/nginx.conf b/configs/nginx.conf
deleted file mode 120000
index cffce04..0000000
--- a/configs/nginx.conf
+++ /dev/null
@@ -1 +0,0 @@
-../docs/tutorials/configs/nginx.conf
\ No newline at end of file
diff --git a/configs/server_config.json b/configs/server_config.json
deleted file mode 120000
index 04e4259..0000000
--- a/configs/server_config.json
+++ /dev/null
@@ -1 +0,0 @@
-../docs/tutorials/configs/server_config.json
\ No newline at end of file
diff --git a/ecc/app/common b/ecc/app/common
deleted file mode 120000
index dc879ab..0000000
--- a/ecc/app/common
+++ /dev/null
@@ -1 +0,0 @@
-../../common
\ No newline at end of file
diff --git a/ecc/app/configs b/ecc/app/configs
deleted file mode 120000
index 5992d10..0000000
--- a/ecc/app/configs
+++ /dev/null
@@ -1 +0,0 @@
-../../configs
\ No newline at end of file
diff --git a/graphrag-ui/src/pages/Setup.tsx b/graphrag-ui/src/pages/Setup.tsx
index b7d357d..e6f6275 100644
--- a/graphrag-ui/src/pages/Setup.tsx
+++ b/graphrag-ui/src/pages/Setup.tsx
@@ -2,7 +2,7 @@ import React, { useState, useEffect } from "react";
 import { useNavigate } from "react-router-dom";
 import { Button } from "@/components/ui/button";
 import { Input } from "@/components/ui/input";
-import { Database, Upload, RefreshCw, Loader2, Trash2, FolderUp, Cloud, ArrowLeft, CloudDownload, CloudLightning } from "lucide-react";
+import {Database,Upload,RefreshCw,Loader2,Trash2,FolderUp,Cloud,ArrowLeft,CloudDownload,CloudCog,CloudLightning} from "lucide-react";
 import {
   Dialog,
   DialogContent,
@@ -40,7 +40,7 @@ const Setup = () => {
   const navigate = useNavigate();
   const [confirm, confirmDialog, isConfirmDialogOpen] = useConfirm();
   const [availableGraphs, setAvailableGraphs] = useState<string[]>([]);
-  
+
   const [initializeGraphOpen, setInitializeGraphOpen] = useState(false);
   const [graphName, setGraphName] = useState("");
   const [isInitializing, setIsInitializing] = useState(false);
@@ -56,7 +56,13 @@ const Setup = () => {
   const [uploadMessage, setUploadMessage] = useState("");
   const [isIngesting, setIsIngesting] = useState(false);
   const [ingestMessage, setIngestMessage] = useState("");
-  const [activeTab, setActiveTab] = useState("upload");
+
+  // Ingestion temp files state
+  const [tempSessionId, setTempSessionId] = useState<string | null>(null);
+  const [tempFiles, setTempFiles] = useState<any[]>([]);
+  const [showTempFiles, setShowTempFiles] = useState(false);
+  const [ingestJobData, setIngestJobData] = useState<any>(null);
+  const [directIngestion, setDirectIngestion] = useState(false);
 
   // Refresh state
   const [refreshOpen, setRefreshOpen] = useState(false);
@@ -65,14 +71,15 @@ const Setup = () => {
   const [refreshGraphName, setRefreshGraphName] = useState("");
   const [isRebuildRunning, setIsRebuildRunning] = useState(false);
   const [isCheckingStatus, setIsCheckingStatus] = useState(false);
-  
-  // S3 state
+
+  // S3 / Bedrock state
+  const [fileFormat, setFileFormat] = useState<"json" | "multi">("multi"); // default to multi (documents)
   const [awsAccessKey, setAwsAccessKey] = useState("");
   const [awsSecretKey, setAwsSecretKey] = useState("");
+  const [dataPath, setDataPath] = useState("");
   const [inputBucket, setInputBucket] = useState("");
   const [outputBucket, setOutputBucket] = useState("");
   const [regionName, setRegionName] = useState("");
-  const [skipBDAProcessing, setSkipBDAProcessing] = useState(false);
 
   // Cloud Download state
   const [cloudProvider, setCloudProvider] = useState<"s3" | "gcs" | "azure">("s3");
@@ -93,6 +100,13 @@ const Setup = () => {
   const [isDownloading, setIsDownloading] = useState(false);
   const [downloadMessage, setDownloadMessage] = useState("");
 
+  // Active tab state
+  const [activeTab, setActiveTab] = useState("upload");
+
+  // -------------------------
+  // Networking helpers
+  // -------------------------
+
   // Fetch uploaded files
   const fetchUploadedFiles = async () => {
     if (!ingestGraphName) return;
@@ -109,7 +123,9 @@ const Setup = () => {
     }
   };
 
-  // Upload files
+  // -------------------------
+  // Upload handlers
+  // -------------------------
   const handleUploadFiles = async () => {
     if (!selectedFiles || selectedFiles.length === 0) {
       setUploadMessage("Please select files to upload");
@@ -122,14 +138,14 @@ const Setup = () => {
     }
 
     const filesArray = Array.from(selectedFiles);
-    
+
     // Check if any single file exceeds the server limit
     const oversizedFiles = filesArray.filter((file) => file.size > MAX_UPLOAD_SIZE_BYTES);
     if (oversizedFiles.length > 0) {
       const names = oversizedFiles.map((file) => `${file.name} (${formatBytes(file.size)})`).join(", ");
       setUploadMessage(
         `❌ ${names} ${oversizedFiles.length === 1 ? "exceeds" : "exceed"} the ${MAX_UPLOAD_SIZE_MB} MB limit per file. ` +
-        `Please split or compress ${oversizedFiles.length === 1 ? "this file" : "these files"}.`
+          `Please split or compress ${oversizedFiles.length === 1 ? "this file" : "these files"}.`
       );
       return;
     }
@@ -159,15 +175,23 @@ const Setup = () => {
 
       const data = await response.json();
       if (data.status === "success") {
-        setUploadMessage(`✅ ${data.message}`);
+        setUploadMessage("✅ Successfully uploaded the files. Wait for file processing");
         setSelectedFiles(null);
         await fetchUploadedFiles();
+        setIsUploading(false);
+
+        // Step 2: Call create_ingest to process uploaded files in background
+        console.log("Calling handleCreateIngestAfterUpload from main upload...");
+        handleCreateIngestAfterUpload("uploaded").catch((err) => {
+          console.error("Error in background processing:", err);
+        });
       } else {
         setUploadMessage(`⚠️ ${data.message}`);
+        setIsUploading(false);
       }
     } catch (error: any) {
+      console.error("Upload error:", error);
       setUploadMessage(`❌ Error: ${error.message}`);
-    } finally {
       setIsUploading(false);
     }
   };
@@ -187,9 +211,9 @@ const Setup = () => {
       for (let i = 0; i < filesArray.length; i++) {
         const file = filesArray[i];
         const fileNumber = i + 1;
-        
+
         setUploadMessage(`Uploading file ${fileNumber}/${totalFiles}: ${file.name} (${formatBytes(file.size)})...`);
-        
+
         const formData = new FormData();
         formData.append("files", file);
 
@@ -219,42 +243,63 @@ const Setup = () => {
 
       // Show final result
       if (failedCount === 0) {
-        setUploadMessage(`✅ Successfully uploaded all ${uploadedCount} files (uploaded individually).`);
+        setUploadMessage(`✅ Successfully uploaded all ${uploadedCount} files. Processing...`);
       } else {
-        setUploadMessage(`⚠️ Uploaded ${uploadedCount} files successfully, ${failedCount} failed.`);
+        setUploadMessage(`⚠️ Uploaded ${uploadedCount} files successfully, ${failedCount} failed. Processing...`);
       }
-      
+
       setSelectedFiles(null);
       await fetchUploadedFiles();
+
+      // Step 2: Call create_ingest to process uploaded files
+      console.log("Calling handleCreateIngestAfterUpload...");
+      await handleCreateIngestAfterUpload("uploaded");
+      console.log("handleCreateIngestAfterUpload completed");
     } catch (error: any) {
+      console.error("Upload error:", error);
       setUploadMessage(`❌ Batch upload error: ${error.message}`);
     } finally {
       setIsUploading(false);
     }
   };
 
-  // Delete a specific file
+  // -------------------------
+  // Delete uploaded / downloaded file handlers
+  // (these already included session_id logic)
+  // -------------------------
   const handleDeleteFile = async (filename: string) => {
     if (!ingestGraphName) return;
 
+    console.log("Deleting file:", filename);
+    console.log("tempSessionId:", tempSessionId);
+
     try {
       const creds = localStorage.getItem("creds");
-      const response = await fetch(
-        `/ui/${ingestGraphName}/uploads?filename=${encodeURIComponent(filename)}`,
-        {
-          method: "DELETE",
-          headers: { Authorization: `Basic ${creds}` },
-        }
-      );
+
+      // Delete original file (backend will also delete processed content from JSONL if session_id is provided)
+      const url = tempSessionId
+        ? `/ui/${ingestGraphName}/uploads?filename=${encodeURIComponent(filename)}&session_id=${tempSessionId}`
+        : `/ui/${ingestGraphName}/uploads?filename=${encodeURIComponent(filename)}`;
+
+      const response = await fetch(url, {
+        method: "DELETE",
+        headers: { Authorization: `Basic ${creds}` },
+      });
       const data = await response.json();
+
       setUploadMessage(`✅ ${data.message}`);
       await fetchUploadedFiles();
+
+      // Refresh temp files list if session exists
+      if (tempSessionId) {
+        await fetchTempFiles(tempSessionId);
+      }
     } catch (error: any) {
+      console.error("Delete error:", error);
       setUploadMessage(`❌ Error: ${error.message}`);
     }
   };
 
-  // Delete all files
   const handleDeleteAllFiles = async () => {
     if (!ingestGraphName) return;
 
@@ -268,6 +313,12 @@ const Setup = () => {
         headers: { Authorization: `Basic ${creds}` },
       });
       const data = await response.json();
+
+      // Also clear temp session
+      if (tempSessionId) {
+        await handleDeleteAllTempFiles();
+      }
+
       setUploadMessage(`✅ ${data.message}`);
       await fetchUploadedFiles();
     } catch (error: any) {
@@ -275,7 +326,9 @@ const Setup = () => {
     }
   };
 
-  // Fetch downloaded files from cloud
+  // -------------------------
+  // Cloud download handlers
+  // -------------------------
   const fetchDownloadedFiles = async () => {
     if (!ingestGraphName) return;
 
@@ -291,7 +344,6 @@ const Setup = () => {
     }
   };
 
-  // Handle cloud download
   const handleCloudDownload = async () => {
     if (!ingestGraphName) {
       setDownloadMessage("Please select a graph");
@@ -303,7 +355,7 @@ const Setup = () => {
 
     try {
       const creds = localStorage.getItem("creds");
-      
+
       // Prepare request body based on provider
       let requestBody: any = { provider: cloudProvider };
 
@@ -360,42 +412,56 @@ const Setup = () => {
 
       const data = await response.json();
       if (data.status === "success") {
-        setDownloadMessage(`✅ ${data.message}`);
+        setDownloadMessage("✅ Successfully downloaded the files. Wait for file processing");
         await fetchDownloadedFiles();
+        setIsDownloading(false);
+
+        // Step 2: Call create_ingest to process downloaded files in background
+        handleCreateIngestAfterUpload("downloaded").catch((err) => {
+          console.error("Error in background processing:", err);
+        });
       } else if (data.status === "warning") {
         setDownloadMessage(`⚠️ ${data.message}`);
+        setIsDownloading(false);
       } else {
         setDownloadMessage(`❌ ${data.message || "Download failed"}`);
+        setIsDownloading(false);
       }
     } catch (error: any) {
       setDownloadMessage(`❌ Error: ${error.message}`);
-    } finally {
       setIsDownloading(false);
     }
   };
 
-  // Delete a specific downloaded file
   const handleDeleteDownloadedFile = async (filename: string) => {
     if (!ingestGraphName) return;
 
     try {
       const creds = localStorage.getItem("creds");
-      const response = await fetch(
-        `/ui/${ingestGraphName}/cloud/delete?filename=${encodeURIComponent(filename)}`,
-        {
-          method: "DELETE",
-          headers: { Authorization: `Basic ${creds}` },
-        }
-      );
+
+      // Delete original file (backend will also delete processed content from JSONL if session_id is provided)
+      const url = tempSessionId
+        ? `/ui/${ingestGraphName}/cloud/delete?filename=${encodeURIComponent(filename)}&session_id=${tempSessionId}`
+        : `/ui/${ingestGraphName}/cloud/delete?filename=${encodeURIComponent(filename)}`;
+
+      const response = await fetch(url, {
+        method: "DELETE",
+        headers: { Authorization: `Basic ${creds}` },
+      });
       const data = await response.json();
+
       setDownloadMessage(`✅ ${data.message}`);
       await fetchDownloadedFiles();
+
+      // Refresh temp files list if session exists
+      if (tempSessionId) {
+        await fetchTempFiles(tempSessionId);
+      }
     } catch (error: any) {
       setDownloadMessage(`❌ Error: ${error.message}`);
     }
   };
 
-  // Delete all downloaded files
   const handleDeleteAllDownloadedFiles = async () => {
     if (!ingestGraphName) return;
 
@@ -416,16 +482,193 @@ const Setup = () => {
     }
   };
 
+  // -------------------------
+  // Temp session helpers (colleague flow)
+  // -------------------------
+  // Fetch temp processed files (server endpoint: GET /ui/{graph}/ingestion_temp/list?session_id=)
+  const fetchTempFiles = async (sessionId: string) => {
+    if (!ingestGraphName || !sessionId) return;
+
+    try {
+      const creds = localStorage.getItem("creds");
+      const response = await fetch(`/ui/${ingestGraphName}/ingestion_temp/list?session_id=${sessionId}`, {
+        headers: { Authorization: `Basic ${creds}` },
+      });
+      const data = await response.json();
+      if (data.status === "success" && data.sessions.length > 0) {
+        setTempFiles(data.sessions[0].files || []);
+        setShowTempFiles(true);
+      }
+    } catch (error) {
+      console.error("Error fetching temp files:", error);
+    }
+  };
+
+  // Delete a specific temp file
+  const handleDeleteTempFile = async (filename: string) => {
+    if (!ingestGraphName || !tempSessionId) return;
+
+    try {
+      const creds = localStorage.getItem("creds");
+      const response = await fetch(
+        `/ui/${ingestGraphName}/ingestion_temp/delete?session_id=${tempSessionId}&filename=${encodeURIComponent(filename)}`,
+        {
+          method: "DELETE",
+          headers: { Authorization: `Basic ${creds}` },
+        }
+      );
+      const data = await response.json();
+      if (data.status === "success") {
+        setIngestMessage(`✅ ${data.message}`);
+        // Refresh the temp files list
+        await fetchTempFiles(tempSessionId);
+      }
+    } catch (error: any) {
+      setIngestMessage(`❌ Error: ${error.message}`);
+    }
+  };
+
+  // Delete all temp files for session
+  const handleDeleteAllTempFiles = async () => {
+    if (!ingestGraphName || !tempSessionId) return;
+
+    try {
+      const creds = localStorage.getItem("creds");
+      const response = await fetch(
+        `/ui/${ingestGraphName}/ingestion_temp/delete?session_id=${tempSessionId}`,
+        {
+          method: "DELETE",
+          headers: { Authorization: `Basic ${creds}` },
+        }
+      );
+      const data = await response.json();
+      if (data.status === "success") {
+        setIngestMessage(`✅ ${data.message}`);
+        setTempFiles([]);
+        setShowTempFiles(false);
+        setTempSessionId(null);
+      }
+    } catch (error: any) {
+      setIngestMessage(`❌ Error: ${error.message}`);
+    }
+  };
+
+  // Delete temp files matching original filename (client convenience)
+  const handleDeleteTempFilesForOriginal = async (originalFilename: string) => {
+    console.log("handleDeleteTempFilesForOriginal called with:", originalFilename);
+
+    if (!ingestGraphName || !tempSessionId) {
+      console.log("No graph name or session ID, returning");
+      return;
+    }
+
+    try {
+      // Extract base name without extension (e.g., "document.pdf" -> "document")
+      const baseName = originalFilename.replace(/\.[^/.]+$/, "");
+      console.log("Base name:", baseName);
+
+      const creds = localStorage.getItem("creds");
+
+      // Fetch temp files to find matches
+      const response = await fetch(`/ui/${ingestGraphName}/ingestion_temp/list?session_id=${tempSessionId}`, {
+        headers: { Authorization: `Basic ${creds}` },
+      });
+      const data = await response.json();
+      console.log("Temp files list response:", data);
+
+      if (data.status === "success" && data.sessions.length > 0) {
+        const files = data.sessions[0].files || [];
+        console.log("All temp files:", files.map((f: any) => f.filename));
+
+        // Find temp files matching pattern: doc_{idx}_{baseName}*.json
+        const matchingFiles = files.filter((f: any) => f.filename.includes(`_${baseName}`));
+        console.log("Matching files to delete:", matchingFiles.map((f: any) => f.filename));
+
+        // Delete each matching file
+        for (const file of matchingFiles) {
+          console.log("Deleting temp file:", file.filename);
+          const deleteResponse = await fetch(
+            `/ui/${ingestGraphName}/ingestion_temp/delete?session_id=${tempSessionId}&filename=${encodeURIComponent(file.filename)}`,
+            {
+              method: "DELETE",
+              headers: { Authorization: `Basic ${creds}` },
+            }
+          );
+          const deleteData = await deleteResponse.json();
+          console.log("Delete response:", deleteData);
+        }
+
+        console.log(`Successfully deleted ${matchingFiles.length} temp file(s)`);
+      } else {
+        console.log("No temp files found or empty sessions");
+      }
+    } catch (error: any) {
+      console.error("Error deleting temp files:", error);
+    }
+  };
+
+  // -------------------------
+  // Ingest flows (create ingest, run ingest)
+  // -------------------------
+  const handleRunIngest = async () => {
+    if (!ingestJobData) {
+      setIngestMessage("❌ No ingest job data available");
+      return;
+    }
+
+    setIsIngesting(true);
+    setIngestMessage("Running final document ingest...");
+
+    try {
+      const creds = localStorage.getItem("creds");
+
+      const loadingInfo = {
+        load_job_id: ingestJobData.load_job_id,
+        data_source_id: ingestJobData.data_source_id,
+        file_path: ingestJobData.data_path,
+      };
+
+      const ingestResponse = await fetch(`/ui/${ingestGraphName}/ingest`, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          Authorization: `Basic ${creds}`,
+        },
+        body: JSON.stringify(loadingInfo),
+      });
+
+      if (!ingestResponse.ok) {
+        const errorData = await ingestResponse.json();
+        throw new Error(errorData.detail || `Failed to run ingest: ${ingestResponse.statusText}`);
+      }
+
+      const ingestData = await ingestResponse.json();
+      console.log("Ingest response:", ingestData);
+
+      setIngestMessage(`✅ Data ingested successfully! Processed ${tempFiles.length} documents.`);
+
+      // Clear temp state
+      setTempFiles([]);
+      setShowTempFiles(false);
+      setTempSessionId(null);
+      setIngestJobData(null);
+    } catch (error: any) {
+      console.error("Error running ingest:", error);
+      setIngestMessage(`❌ Error: ${error.message}`);
+    } finally {
+      setIsIngesting(false);
+    }
+  };
+
   // Ingest files into knowledge graph (uploaded or downloaded)
+  // Creates ingest job and either shows temp session or runs ingestion directly
   const handleIngestDocuments = async (sourceType: "uploaded" | "downloaded" = "uploaded") => {
     if (!ingestGraphName) {
       setIngestMessage("Please select a graph");
       return;
     }
 
-    const folderPath = sourceType === "uploaded" 
-      ? `uploads/${ingestGraphName}`
-      : `downloaded_files_cloud/${ingestGraphName}`;
+    const folderPath = sourceType === "uploaded" ? `uploads/${ingestGraphName}` : `downloaded_files_cloud/${ingestGraphName}`;
 
     setIsIngesting(true);
     setIngestMessage("Step 1/2: Creating ingest job...");
@@ -437,10 +680,10 @@ const Setup = () => {
       const createIngestConfig = {
         data_source: "server",
         data_source_config: {
-          folder_path: folderPath
+          data_path: folderPath,
         },
         loader_config: {},
-        file_format: "multi"
+        file_format: "multi",
       };
 
       const createResponse = await fetch(`/ui/${ingestGraphName}/create_ingest`, {
@@ -458,44 +701,144 @@ const Setup = () => {
       }
 
       const createData = await createResponse.json();
-      //console.log("Create ingest response:", createData);
+      console.log("Create ingest response:", createData);
 
-      // Step 2: Run ingest
-      setIngestMessage("Step 2/2: Running document ingest...");
+      // Check if temp files were created (for server data source)
+      const sessionId = createData.data_source_id?.temp_session_id;
 
-      const loadingInfo = {
-        load_job_id: createData.load_job_id,
-        data_source_id: createData.data_source_id,
-        file_path: createData.data_path || createData.file_path, // Handle both field names
+      if (sessionId && !directIngestion) {
+        // Files are saved to temp storage - show them for review (only if not direct ingestion)
+        setTempSessionId(sessionId);
+        setIngestJobData({
+          load_job_id: createData.load_job_id,
+          data_source_id: createData.data_source_id,
+          data_path: createData.data_path || createData.file_path,
+        });
+        setIngestMessage(`✅ Processed ${createData.data_source_id.file_count} files. Review them below before ingesting.`);
+        await fetchTempFiles(sessionId);
+        setIsIngesting(false);
+      } else {
+        // No temp files (e.g., S3 Bedrock) OR direct ingestion enabled - proceed directly to ingest
+        setIngestMessage("Step 2/2: Running document ingest...");
+
+        const loadingInfo = {
+          load_job_id: createData.load_job_id,
+          data_source_id: createData.data_source_id,
+          file_path: createData.data_path || createData.file_path,
+        };
+
+        const ingestResponse = await fetch(`/ui/${ingestGraphName}/ingest`, {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            Authorization: `Basic ${creds}`,
+          },
+          body: JSON.stringify(loadingInfo),
+        });
+
+        if (!ingestResponse.ok) {
+          const errorData = await ingestResponse.json();
+          throw new Error(errorData.detail || `Failed to run ingest: ${ingestResponse.statusText}`);
+        }
+
+        const ingestData = await ingestResponse.json();
+        console.log("Ingest response:", ingestData);
+
+        setIngestMessage(`✅ Data ingested successfully! Processed documents from ${folderPath}/`);
+        setIsIngesting(false);
+      }
+    } catch (error: any) {
+      console.error("Error ingesting data:", error);
+      setIngestMessage(`❌ Error: ${error.message}`);
+      setIsIngesting(false);
+    }
+  };
+
+  // Called automatically after upload or cloud download finishes.
+  // Creates an ingest job that processes files into a temp session (JSONL) and stores session id in client.
+  const handleCreateIngestAfterUpload = async (sourceType: "uploaded" | "downloaded" = "uploaded") => {
+    console.log("handleCreateIngestAfterUpload called with sourceType:", sourceType);
+    console.log("ingestGraphName:", ingestGraphName);
+
+    if (!ingestGraphName) {
+      console.log("No graph name, returning early");
+      return;
+    }
+
+    const folderPath = sourceType === "uploaded" ? `uploads/${ingestGraphName}` : `downloaded_files_cloud/${ingestGraphName}`;
+
+    console.log("folderPath:", folderPath);
+
+    try {
+      const creds = localStorage.getItem("creds");
+
+      // Call create_ingest to process files
+      const createIngestConfig = {
+        data_source: "server",
+        data_source_config: {
+          data_path: folderPath,
+        },
+        loader_config: {},
+        file_format: "multi",
       };
 
-      const ingestResponse = await fetch(`/ui/${ingestGraphName}/ingest`, {
+      console.log("Calling create_ingest with config:", createIngestConfig);
+
+      const createResponse = await fetch(`/ui/${ingestGraphName}/create_ingest`, {
         method: "POST",
         headers: {
           "Content-Type": "application/json",
           Authorization: `Basic ${creds}`,
         },
-        body: JSON.stringify(loadingInfo),
+        body: JSON.stringify(createIngestConfig),
       });
 
-      if (!ingestResponse.ok) {
-        const errorData = await ingestResponse.json();
-        throw new Error(errorData.detail || `Failed to run ingest: ${ingestResponse.statusText}`);
+      console.log("create_ingest response status:", createResponse.status);
+
+      if (!createResponse.ok) {
+        const errorData = await createResponse.json();
+        console.error("create_ingest error:", errorData);
+        throw new Error(errorData.detail || `Failed to create ingest job: ${createResponse.statusText}`);
       }
 
-      const ingestData = await ingestResponse.json();
-      //console.log("Ingest response:", ingestData);
+      const createData = await createResponse.json();
+      console.log("create_ingest response data:", createData);
+
+      const sessionId = createData.data_source_id?.temp_session_id;
+      console.log("Session ID:", sessionId);
+
+      if (sessionId) {
+        // Save session ID for later ingest
+        setTempSessionId(sessionId);
+        setIngestJobData({
+          load_job_id: createData.load_job_id,
+          data_source_id: createData.data_source_id,
+          data_path: createData.data_path || createData.file_path,
+        });
+
+        console.log("Direct ingestion enabled:", directIngestion);
 
-      setIngestMessage(`✅ Data ingested successfully! Processed documents from ${folderPath}/`);
+        if (directIngestion) {
+          // Direct ingestion - proceed to ingest immediately
+          setUploadMessage("Running direct ingestion...");
+          await handleRunIngest();
+        } else {
+          // Save for later - files ready for ingestion
+          setUploadMessage(`✅ Successfully processed ${createData.data_source_id.file_count} files. Ready for ingestion.`);
+        }
+      } else {
+        console.warn("No session ID returned from create_ingest");
+      }
     } catch (error: any) {
-      console.error("Error ingesting data:", error);
-      setIngestMessage(`❌ Error: ${error.message}`);
-    } finally {
-      setIsIngesting(false);
+      console.error("Error in create_ingest:", error);
+      setUploadMessage(`❌ Processing error: ${error.message}`);
     }
   };
 
-  // Ingest files from S3 with Amazon BDA
+  // -------------------------
+  // S3 / Bedrock ingest
+  // Replace/merge your AmazonBDA flow with this consolidated handler
+  // -------------------------
   const handleAmazonBDAIngest = async () => {
     if (!ingestGraphName) {
       setIngestMessage("Please select a graph");
@@ -508,112 +851,92 @@ const Setup = () => {
       return;
     }
 
-    if (skipBDAProcessing) {
-      // When skipping BDA, only output bucket and region are required
-      if (!outputBucket || !regionName) {
-        setIngestMessage("❌ Please provide Output Bucket and Region Name");
-        return;
-      }
-    } else {
-      // When using BDA, all fields are required
+    if (fileFormat === "multi") {
       if (!inputBucket || !outputBucket || !regionName) {
         setIngestMessage("❌ Please provide Input Bucket, Output Bucket, and Region Name");
         return;
       }
-    }
 
-    // Ask for confirmation
-    const confirmMessage = skipBDAProcessing
-      ? `You're skipping Amazon BDA processing and will ingest directly from the output bucket (${outputBucket}). Please confirm to proceed.`
-      : `You're using Amazon BDA for multimodal document processing. This will trigger Amazon BDA to process your documents from the input bucket (${inputBucket}) and store the results in the output bucket (${outputBucket}) and then ingest them into your knowledge graph. Please confirm to proceed.`;
-    
-    const shouldProceed = await confirm(confirmMessage);
-    if (!shouldProceed) {
-      setIngestMessage("Operation cancelled by user.");
-      return;
+      // Ask for confirmation if using Bedrock (multi format)
+      const shouldProceed = await confirm(
+        `You're using AWS Bedrock for multimodal document processing. This will trigger AWS Bedrock BDA to process your documents from the input bucket (${inputBucket}) and store the results in the output bucket (${outputBucket}). Please confirm to proceed.`
+      );
+      if (!shouldProceed) {
+        setIngestMessage("Operation cancelled by user.");
+        return;
+      }
+    } else if (fileFormat === "json") {
+      if (!dataPath) {
+        setIngestMessage("❌ Please provide Data Path (e.g., s3://bucket-name/path/to/data)");
+        return;
+      }
     }
 
     setIsIngesting(true);
+    setIngestMessage("Step 1/2: Creating ingest job...");
 
     try {
       const creds = localStorage.getItem("creds");
-      let loadingInfo: any = {};
 
-      if (skipBDAProcessing) {
-        // Skip BDA processing - create ingest job that reads directly from output bucket
-        const runIngestConfig: any = {
-          data_source: "bda",
+      // Step 1: Create ingest job
+      const createIngestConfig: any = {
+        data_source: "s3",
+        data_source_config: {
           aws_access_key: awsAccessKey,
           aws_secret_key: awsSecretKey,
-          output_bucket: outputBucket,
-          region_name: regionName,
-          bda_jobs:[],
-          loader_config: {
-            doc_id_field: "doc_id",
-            content_field: "content",
-            doc_type: "markdown",
-          },
-          file_format: "multi"
-        };
-
-        setIngestMessage("Step 1/2: Creating ingest job from output bucket...");
-
-        // Run ingest directly
-        loadingInfo = {
-          load_job_id: "load_documents_content_json",
-          data_source_id: runIngestConfig,
-          file_path: outputBucket,
-        };
-        setIngestMessage(`Step 2/2: Running document ingestion for all files in ${outputBucket}...`);
-      } else {
-        // Step 1: Create ingest job with BDA processing
-        const createIngestConfig: any = {
-          data_source: "bda",
-          data_source_config: {
-            aws_access_key: awsAccessKey,
-            aws_secret_key: awsSecretKey,
-            input_bucket: inputBucket,
-            output_bucket: outputBucket,
-            region_name: regionName,
-          },
-          loader_config: {
-            doc_id_field: "doc_id",
-            content_field: "content",
-            doc_type: "markdown",
-          },
-          file_format: "multi"
-        };
+        },
+        loader_config: {
+          doc_id_field: "doc_id",
+          content_field: "content",
+          doc_type: fileFormat === "multi" ? "markdown" : "",
+        },
+        file_format: fileFormat,
+      };
 
-        setIngestMessage("Step 1/2: Triggering Amazon BDA processing and creating ingest job...");
+      // Add format-specific configuration
+      if (fileFormat === "multi") {
+        createIngestConfig.data_source_config.input_bucket = inputBucket;
+        createIngestConfig.data_source_config.output_bucket = outputBucket;
+        createIngestConfig.data_source_config.region_name = regionName;
+        setIngestMessage("Step 1/2: Creating ingest job and triggering AWS Bedrock BDA processing...");
+      } else if (fileFormat === "json") {
+        createIngestConfig.loader_config.doc_id_field = "url";
+      }
 
-        const createResponse = await fetch(`/ui/${ingestGraphName}/create_ingest`, {
-          method: "POST",
-          headers: {
-            "Content-Type": "application/json",
-            Authorization: `Basic ${creds}`,
-          },
-          body: JSON.stringify(createIngestConfig),
-        });
+      const createResponse = await fetch(`/ui/${ingestGraphName}/create_ingest`, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          Authorization: `Basic ${creds}`,
+        },
+        body: JSON.stringify(createIngestConfig),
+      });
 
-        if (!createResponse.ok) {
-          const errorData = await createResponse.json();
-          throw new Error(errorData.detail || `Failed to create ingest job: ${createResponse.statusText}`);
-        }
+      if (!createResponse.ok) {
+        const errorData = await createResponse.json();
+        throw new Error(errorData.detail || `Failed to create ingest job: ${createResponse.statusText}`);
+      }
 
-        const createData = await createResponse.json();
-        //console.log("Create ingest response:", createData);
+      const createData = await createResponse.json();
+      console.log("Create ingest response:", createData);
 
-        // Step 2: Run ingest
-        loadingInfo = {
-          load_job_id: createData.load_job_id,
-          data_source_id: createData.data_source_id,
-          file_path: outputBucket,
-        };
+      // Step 2: Run ingest
+      setIngestMessage("Step 2/2: Running document ingest...");
 
-        const filesToIngest = createData.data_source_id.bda_jobs.map((job: any) => job.jobId.split("/")[-1]);
-        setIngestMessage(`Step 2/2: Running document ingest for ${filesToIngest.length} files in ${outputBucket}...`);
+      // Determine file path based on format
+      let filePath = "";
+      if (fileFormat === "multi") {
+        filePath = outputBucket; // For multi format, use output bucket
+      } else if (fileFormat === "json") {
+        filePath = dataPath; // For json format, use the provided data path
       }
 
+      const loadingInfo = {
+        load_job_id: createData.load_job_id,
+        data_source_id: createData.data_source_id,
+        file_path: filePath,
+      };
+
       const ingestResponse = await fetch(`/ui/${ingestGraphName}/ingest`, {
         method: "POST",
         headers: {
@@ -629,20 +952,26 @@ const Setup = () => {
       }
 
       const ingestData = await ingestResponse.json();
-      //console.log("Ingest response:", ingestData);
-      const filesIngested = ingestData.summary.map((file: any) => file.file_path);
-
-      setIngestMessage(`✅ Document ingestion completed successfully! Ingested ${filesIngested.length} into your knowledge graph.`);
+      console.log("Ingest response:", ingestData);
 
+      if (fileFormat === "multi") {
+        setIngestMessage(
+          `✅ Data ingested successfully! AWS Bedrock BDA processed documents from ${inputBucket} and loaded results from ${outputBucket}.`
+        );
+      } else {
+        setIngestMessage(`✅ Data ingested successfully! Processed documents from ${dataPath}.`);
+      }
     } catch (error: any) {
-      console.error("Error ingesting files:", error);
+      console.error("Error ingesting S3 data:", error);
       setIngestMessage(`❌ Error: ${error.message}`);
     } finally {
       setIsIngesting(false);
     }
   };
 
-  // Check rebuild status
+  // -------------------------
+  // Rebuild / Refresh handlers
+  // -------------------------
   const checkRebuildStatus = async (graphName: string, showLoadingMessage: boolean = false) => {
     if (!graphName) return;
 
@@ -664,9 +993,9 @@ const Setup = () => {
         const statusData = await statusResponse.json();
         const wasRunning = isRebuildRunning;
         const isCurrentlyRunning = statusData.is_running || false;
-        
+
         setIsRebuildRunning(isCurrentlyRunning);
-        
+
         if (isCurrentlyRunning) {
           const startTime = statusData.started_at ? new Date(statusData.started_at * 1000).toLocaleString() : "unknown time";
           setRefreshMessage(`⚠️ A rebuild is already in progress for "${graphName}" (started at ${startTime}). Please wait for it to complete.`);
@@ -695,7 +1024,6 @@ const Setup = () => {
     }
   };
 
-  // Handle refresh knowledge graph
   const handleRefreshGraph = async () => {
     if (!refreshGraphName) {
       setRefreshMessage("Please select a graph");
@@ -722,7 +1050,7 @@ const Setup = () => {
 
     try {
       const creds = localStorage.getItem("creds");
-      
+
       const response = await fetch(`/ui/${refreshGraphName}/rebuild_graph`, {
         method: "POST",
         headers: {
@@ -748,22 +1076,9 @@ const Setup = () => {
     }
   };
 
-  // Check rebuild status when graph selection or dialog state changes
-  useEffect(() => {
-    if (refreshOpen && refreshGraphName) {
-      // Check status immediately when dialog opens
-      checkRebuildStatus(refreshGraphName, true);
-      
-      // Set up polling to check status every 5 seconds while dialog is open
-      const intervalId = setInterval(() => {
-        checkRebuildStatus(refreshGraphName, false);
-      }, 5000);
-      
-      return () => clearInterval(intervalId);
-    }
-  }, [refreshOpen, refreshGraphName]);
-
-  // Load available graphs from localStorage on mount
+  // -------------------------
+  // Init effects and helpers
+  // -------------------------
   useEffect(() => {
     const store = JSON.parse(localStorage.getItem("site") || "{}");
     if (store.graphs && Array.isArray(store.graphs)) {
@@ -823,9 +1138,7 @@ const Setup = () => {
       if (createData.status !== "success") {
         if (createData.message && createData.message.includes("already exists")) {
           // Ask user to confirm before proceeding with initialization
-          const shouldInitialize = await confirm(
-            `Graph "${graphName}" already exists. Do you want to initialize it with GraphRAG schema?`
-          );
+          const shouldInitialize = await confirm(`Graph "${graphName}" already exists. Do you want to initialize it with GraphRAG schema?`);
           if (!shouldInitialize) {
             setStatusMessage("Operation cancelled by user.");
             setStatusType("error");
@@ -858,13 +1171,13 @@ const Setup = () => {
         setIsInitializing(false);
         return;
       }
-      
+
       setStatusMessage(`✅ Graph "${graphName}" created and initialized successfully! You can now close this dialog.`);
       setStatusType("success");
-      
+
       // Add the new graph to the available graphs list
       const newGraph = graphName;
-      setAvailableGraphs(prev => {
+      setAvailableGraphs((prev) => {
         if (!prev.includes(newGraph)) {
           const updated = [...prev, newGraph];
           // Update localStorage as well
@@ -875,7 +1188,7 @@ const Setup = () => {
         }
         return prev;
       });
-      
+
       // Set the newly created graph as selected for ingestion
       setIngestGraphName(graphName);
       setRefreshGraphName(graphName);
@@ -889,47 +1202,49 @@ const Setup = () => {
     }
   };
 
+  // Check rebuild status when graph selection or dialog state changes
+  useEffect(() => {
+    if (refreshOpen && refreshGraphName) {
+      // Check status immediately when dialog opens
+      checkRebuildStatus(refreshGraphName, true);
+
+      // Set up polling to check status every 5 seconds while dialog is open
+      const intervalId = setInterval(() => {
+        checkRebuildStatus(refreshGraphName, false);
+      }, 5000);
+
+      return () => clearInterval(intervalId);
+    }
+  }, [refreshOpen, refreshGraphName]);
+
+  // -------------------------
+  // Render
+  // -------------------------
   return (
     <div className="h-[100vh] w-full bg-white dark:bg-background p-8">
       <div className="max-w-7xl mx-auto">
         <div className="mb-10">
-          <Button
-            variant="outline"
-            onClick={() => navigate("/chat")}
-            className="mb-4 dark:border-[#3D3D3D]"
-          >
+          <Button variant="outline" onClick={() => navigate("/chat")} className="mb-4 dark:border-[#3D3D3D]">
             <ArrowLeft className="h-4 w-4 mr-2" />
             Back to Chat
           </Button>
-          <h1 className="text-2xl font-bold mb-2 text-black dark:text-white">
-            Knowledge Graph Administration
-          </h1>
-          <p className="text-sm text-gray-600 dark:text-[#D9D9D9]">
-            Configure and manage your knowledge graphs
-          </p>
+          <h1 className="text-2xl font-bold mb-2 text-black dark:text-white">Knowledge Graph Administration</h1>
+          <p className="text-sm text-gray-600 dark:text-[#D9D9D9]">Configure and manage your knowledge graphs</p>
         </div>
 
         {/* Three cards displayed horizontally */}
         <div className="grid grid-cols-1 lg:grid-cols-3 gap-6">
-          
           {/* Section 1: Initialize Knowledge Graph */}
           <div className="border border-gray-300 dark:border-[#3D3D3D] rounded-lg p-6 bg-white dark:bg-shadeA flex flex-col h-full">
             <div className="mb-4">
               <div className="w-12 h-12 rounded-full bg-tigerOrange/10 flex items-center justify-center mb-4">
                 <Database className="h-6 w-6 text-tigerOrange" />
               </div>
-              <h2 className="text-lg font-semibold mb-2 text-black dark:text-white">
-                Initialize Knowledge Graph
-              </h2>
-              <p className="text-sm text-gray-600 dark:text-[#D9D9D9] mb-4">
-                Create the knowledge graph schema and queries for future document ingestion.
-              </p>
+              <h2 className="text-lg font-semibold mb-2 text-black dark:text-white">Initialize Knowledge Graph</h2>
+              <p className="text-sm text-gray-600 dark:text-[#D9D9D9] mb-4">Create the knowledge graph schema and queries for future document ingestion.</p>
             </div>
             <div className="mt-auto pt-4 border-t border-gray-300 dark:border-[#3D3D3D]">
-              <Button 
-                className="gradient w-full text-white"
-                onClick={() => setInitializeGraphOpen(true)}
-              >
+              <Button className="gradient w-full text-white" onClick={() => setInitializeGraphOpen(true)}>
                 <Database className="h-4 w-4 mr-2" />
                 Initialize Graph
               </Button>
@@ -942,18 +1257,11 @@ const Setup = () => {
               <div className="w-12 h-12 rounded-full bg-tigerOrange/10 flex items-center justify-center mb-4">
                 <Upload className="h-6 w-6 text-tigerOrange" />
               </div>
-              <h2 className="text-lg font-semibold mb-2 text-black dark:text-white">
-                Ingest to Knowledge Graph
-              </h2>
-              <p className="text-sm text-gray-600 dark:text-[#D9D9D9] mb-4">
-                Upload and ingest documents into your knowledge graph for future content processing.
-              </p>
+              <h2 className="text-lg font-semibold mb-2 text-black dark:text-white">Ingest to Knowledge Graph</h2>
+              <p className="text-sm text-gray-600 dark:text-[#D9D9D9] mb-4">Upload and ingest documents into your knowledge graph for future content processing.</p>
             </div>
             <div className="mt-auto pt-4 border-t border-gray-300 dark:border-[#3D3D3D]">
-              <Button 
-                className="gradient w-full text-white"
-                onClick={() => setIngestOpen(true)}
-              >
+              <Button className="gradient w-full text-white" onClick={() => setIngestOpen(true)}>
                 <Upload className="h-4 w-4 mr-2" />
                 Ingest Document
               </Button>
@@ -966,28 +1274,20 @@ const Setup = () => {
               <div className="w-12 h-12 rounded-full bg-tigerOrange/10 flex items-center justify-center mb-4">
                 <RefreshCw className="h-6 w-6 text-tigerOrange" />
               </div>
-              <h2 className="text-lg font-semibold mb-2 text-black dark:text-white">
-                Refresh Knowledge Graph
-              </h2>
-              <p className="text-sm text-gray-600 dark:text-[#D9D9D9] mb-4">
-                Process new documents in your knowledge graph to refresh its content.
-              </p>
+              <h2 className="text-lg font-semibold mb-2 text-black dark:text-white">Refresh Knowledge Graph</h2>
+              <p className="text-sm text-gray-600 dark:text-[#D9D9D9] mb-4">Process new documents in your knowledge graph to refresh its content.</p>
             </div>
             <div className="mt-auto pt-4 border-t border-gray-300 dark:border-[#3D3D3D]">
-              <Button 
-                className="gradient w-full text-white"
-                onClick={() => setRefreshOpen(true)}
-              >
+              <Button className="gradient w-full text-white" onClick={() => setRefreshOpen(true)}>
                 <RefreshCw className="h-4 w-4 mr-2" />
                 Refresh Graph
               </Button>
             </div>
           </div>
-
         </div>
 
         {/* Initialize Graph Dialog */}
-        <Dialog 
+        <Dialog
           open={initializeGraphOpen}
           onOpenChange={(open) => {
             // Prevent closing if confirm dialog is open
@@ -997,22 +1297,17 @@ const Setup = () => {
             setInitializeGraphOpen(open);
           }}
         >
-          <DialogContent 
-            className="sm:max-w-[500px] bg-white dark:bg-background border-gray-300 dark:border-[#3D3D3D]"
-            onInteractOutside={(e) => e.preventDefault()}
-          >
+          <DialogContent className="sm:max-w-[500px] bg-white dark:bg-background border-gray-300 dark:border-[#3D3D3D]" onInteractOutside={(e) => e.preventDefault()}>
             <DialogHeader>
               <DialogTitle className="text-black dark:text-white">Initialize Knowledge Graph</DialogTitle>
               <DialogDescription className="text-gray-600 dark:text-[#D9D9D9]">
                 Enter the name of your knowledge graph. The system will create it if necessary and initialize it with the GraphRAG schema.
               </DialogDescription>
             </DialogHeader>
-            
+
             <div className="py-4">
               <div className="mb-4">
-                <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                  Knowledge Graph Name
-                </label>
+                <label className="block text-sm font-medium mb-2 text-black dark:text-white">Knowledge Graph Name</label>
                 <Input
                   placeholder="e.g., MyKnowledgeGraph"
                   value={graphName}
@@ -1071,11 +1366,7 @@ const Setup = () => {
                   >
                     Cancel
                   </Button>
-                  <Button
-                    onClick={handleInitializeGraph}
-                    disabled={isInitializing || !graphName.trim()}
-                    className="gradient text-white"
-                  >
+                  <Button onClick={handleInitializeGraph} disabled={isInitializing || !graphName.trim()} className="gradient text-white">
                     {isInitializing ? (
                       <>
                         <Loader2 className="h-4 w-4 mr-2 animate-spin" />
@@ -1095,8 +1386,8 @@ const Setup = () => {
         </Dialog>
 
         {/* Data Ingest Dialog */}
-        <Dialog 
-          open={ingestOpen} 
+        <Dialog
+          open={ingestOpen}
           onOpenChange={(open) => {
             // Prevent closing if confirm dialog is open
             if (!open && isConfirmDialogOpen) {
@@ -1105,10 +1396,7 @@ const Setup = () => {
             setIngestOpen(open);
           }}
         >
-          <DialogContent 
-            className="sm:max-w-[700px] bg-white dark:bg-background border-gray-300 dark:border-[#3D3D3D] max-h-[80vh] overflow-y-auto"
-            onInteractOutside={(e) => e.preventDefault()}
-          >
+          <DialogContent className="sm:max-w-[700px] bg-white dark:bg-background border-gray-300 dark:border-[#3D3D3D] max-h-[80vh] overflow-y-auto" onInteractOutside={(e) => e.preventDefault()}>
             <DialogHeader>
               <DialogTitle className="text-black dark:text-white">Document Ingestion for Knowledge Graph</DialogTitle>
               <DialogDescription className="text-gray-600 dark:text-[#D9D9D9]">
@@ -1118,9 +1406,7 @@ const Setup = () => {
 
             {/* Graph Name Selection */}
             <div className="mb-4">
-              <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                Target Graph Name
-              </label>
+              <label className="block text-sm font-medium mb-2 text-black dark:text-white">Target Graph Name</label>
               <Select value={ingestGraphName} onValueChange={setIngestGraphName} disabled={isIngesting}>
                 <SelectTrigger className="dark:border-[#3D3D3D] dark:bg-shadeA" disabled={isIngesting}>
                   <SelectValue placeholder="Select a graph" />
@@ -1142,7 +1428,6 @@ const Setup = () => {
             </div>
 
             <Tabs value={activeTab} onValueChange={(value) => {
-              // Block tab switching when ingesting
               if (!isIngesting) {
                 setActiveTab(value);
               }
@@ -1156,40 +1441,38 @@ const Setup = () => {
                   <CloudDownload className="h-4 w-4 mr-2" />
                   Download from Cloud
                 </TabsTrigger>
-                <TabsTrigger value="AmazonBDA" disabled={isIngesting}>
-                  <CloudLightning className="h-4 w-4 mr-2" />
-                  Use Amazon BDA
+                <TabsTrigger value="s3" disabled={isIngesting}>
+                  <CloudCog className="h-4 w-4 mr-2" />
+                  Amazon BDA
                 </TabsTrigger>
               </TabsList>
 
               {/* Upload Data Tab */}
               <TabsContent value="upload" className="space-y-4">
                 <div className="space-y-4">
-                  <p className="text-sm font-medium text-gray-500 dark:text-gray-400 mb-3">
-                    Upload local files to the server and ingest them into your knowledge graph.
-                  </p>
+                  <p className="text-sm font-medium text-gray-500 dark:text-gray-400 mb-3">Upload local files to the server and ingest them into your knowledge graph.</p>
                   <div>
-                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                      Select Files
-                    </label>
-                    <Input
-                      type="file"
-                      multiple
-                      onChange={(e) => setSelectedFiles(e.target.files)}
-                      disabled={isUploading}
-                      className="dark:border-[#3D3D3D] dark:bg-shadeA"
+                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">Select Files</label>
+                    <Input type="file" multiple onChange={(e) => setSelectedFiles(e.target.files)} disabled={isUploading} className="dark:border-[#3D3D3D] dark:bg-shadeA" />
+                    <p className="text-xs text-gray-500 dark:text-gray-400 mt-2">Maximum upload per request: {MAX_UPLOAD_SIZE_MB} MB. {ingestGraphName ? `Upload destination: uploads/${ingestGraphName}/` : ""}</p>
+                  </div>
+
+                  {/* Direct Ingestion Checkbox */}
+                  <div className="flex items-center mt-3 mb-2">
+                    <input
+                      type="checkbox"
+                      id="directIngestion"
+                      checked={directIngestion}
+                      onChange={(e) => setDirectIngestion(e.target.checked)}
+                      className="mr-2 h-4 w-4 rounded border-gray-300 text-blue-600 focus:ring-blue-500"
                     />
-                    <p className="text-xs text-gray-500 dark:text-gray-400 mt-2">
-                      Maximum upload per request: {MAX_UPLOAD_SIZE_MB} MB. {ingestGraphName ? `Upload destination: uploads/${ingestGraphName}/` : ""}
-                    </p>
+                    <label htmlFor="directIngestion" className="text-sm text-gray-700 dark:text-gray-300">
+                      Direct Ingestion (upload + process + ingest all at once)
+                    </label>
                   </div>
 
                   <div className="flex gap-2">
-                    <Button
-                      onClick={handleUploadFiles}
-                      disabled={isUploading || !selectedFiles}
-                      className="gradient text-white"
-                    >
+                    <Button onClick={handleUploadFiles} disabled={isUploading || !selectedFiles} className="gradient text-white">
                       {isUploading ? (
                         <>
                           <Loader2 className="h-4 w-4 mr-2 animate-spin" />
@@ -1204,37 +1487,21 @@ const Setup = () => {
                     </Button>
 
                     {uploadedFiles.length > 0 && (
-                      <Button
-                        onClick={handleDeleteAllFiles}
-                        variant="outline"
-                        className="dark:border-[#3D3D3D]"
-                      >
+                      <Button onClick={handleDeleteAllFiles} variant="outline" className="dark:border-[#3D3D3D]">
                         <Trash2 className="h-4 w-4 mr-2" />
                         Delete All
                       </Button>
                     )}
                   </div>
 
-                  {uploadMessage && (
-                    <div className="p-3 rounded-lg text-sm bg-blue-50 dark:bg-blue-900/20 text-blue-700 dark:text-blue-300">
-                      {uploadMessage}
-                    </div>
-                  )}
+                  {uploadMessage && <div className="p-3 rounded-lg text-sm bg-blue-50 dark:bg-blue-900/20 text-blue-700 dark:text-blue-300">{uploadMessage}</div>}
 
                   {/* Ingest Data Section */}
                   {uploadedFiles.length > 0 && (
                     <div className="border-t border-gray-300 dark:border-[#3D3D3D] pt-4 mt-4">
-                      <h3 className="text-sm font-medium mb-2 text-black dark:text-white">
-                        Ingest Documents into Knowledge Graph
-                      </h3>
-                      <p className="text-xs text-gray-500 dark:text-gray-400 mb-3">
-                        Process uploaded files and add them to the knowledge graph
-                      </p>
-                      <Button
-                        onClick={() => handleIngestDocuments("uploaded")}
-                        disabled={isIngesting}
-                        className="gradient text-white w-full"
-                      >
+                      <h3 className="text-sm font-medium mb-2 text-black dark:text-white">Ingest Documents into Knowledge Graph</h3>
+                      <p className="text-xs text-gray-500 dark:text-gray-400 mb-3">Process uploaded files and add them to the knowledge graph</p>
+                      <Button onClick={handleRunIngest} disabled={isIngesting || !tempSessionId} className="gradient text-white w-full">
                         {isIngesting ? (
                           <>
                             <Loader2 className="h-4 w-4 mr-2 animate-spin" />
@@ -1254,9 +1521,7 @@ const Setup = () => {
                             : ingestMessage.includes("❌")
                             ? "bg-red-50 dark:bg-red-900/20 text-red-700 dark:text-red-300"
                             : "bg-blue-50 dark:bg-blue-900/20 text-blue-700 dark:text-blue-300"
-                        }`}>
-                          {ingestMessage}
-                        </div>
+                        }`}>{ingestMessage}</div>
                       )}
                     </div>
                   )}
@@ -1264,24 +1529,12 @@ const Setup = () => {
                   {/* Uploaded Files List */}
                   {uploadedFiles.length > 0 && (
                     <div className="border border-gray-300 dark:border-[#3D3D3D] rounded-lg p-4">
-                      <h3 className="text-sm font-medium mb-3 text-black dark:text-white">
-                        Uploaded Files ({uploadedFiles.length})
-                      </h3>
+                      <h3 className="text-sm font-medium mb-3 text-black dark:text-white">Uploaded Files ({uploadedFiles.length})</h3>
                       <div className="space-y-2 max-h-48 overflow-y-auto">
                         {uploadedFiles.map((file, index) => (
-                          <div
-                            key={index}
-                            className="flex items-center justify-between p-2 bg-gray-50 dark:bg-shadeA rounded"
-                          >
-                            <span className="text-sm text-black dark:text-white truncate flex-1">
-                              {file.filename}
-                            </span>
-                            <Button
-                              onClick={() => handleDeleteFile(file.filename)}
-                              variant="outline"
-                              size="sm"
-                              className="ml-2 dark:border-[#3D3D3D]"
-                            >
+                          <div key={index} className="flex items-center justify-between p-2 bg-gray-50 dark:bg-shadeA rounded">
+                            <span className="text-sm text-black dark:text-white truncate flex-1">{file.filename}</span>
+                            <Button onClick={() => handleDeleteFile(file.filename)} variant="outline" size="sm" className="ml-2 dark:border-[#3D3D3D]">
                               <Trash2 className="h-3 w-3" />
                             </Button>
                           </div>
@@ -1295,13 +1548,9 @@ const Setup = () => {
               {/* Download from Cloud Storage Tab */}
               <TabsContent value="cloudDownload" className="space-y-4">
                 <div className="space-y-4">
-                  <p className="text-sm font-medium text-gray-500 dark:text-gray-400 mb-3">
-                    Download files from cloud storage and ingest them into your knowledge graph.
-                  </p>
+                  <p className="text-sm font-medium text-gray-500 dark:text-gray-400 mb-3">Download files from cloud storage and ingest them into your knowledge graph.</p>
                   <div>
-                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                      Cloud Storage Provider
-                    </label>
+                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">Cloud Storage Provider</label>
                     <Select value={cloudProvider} onValueChange={(value: "s3" | "gcs" | "azure") => setCloudProvider(value)}>
                       <SelectTrigger className="dark:border-[#3D3D3D] dark:bg-shadeA">
                         <SelectValue placeholder="Select cloud provider" />
@@ -1318,64 +1567,24 @@ const Setup = () => {
                   {cloudProvider === "s3" && (
                     <>
                       <div>
-                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                          AWS Access Key
-                        </label>
-                        <Input
-                          type="text"
-                          value={cloudAccessKey}
-                          onChange={(e) => setCloudAccessKey(e.target.value)}
-                          placeholder="Enter AWS access key"
-                          className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                        />
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">AWS Access Key</label>
+                        <Input type="text" value={cloudAccessKey} onChange={(e) => setCloudAccessKey(e.target.value)} placeholder="Enter AWS access key" className="dark:border-[#3D3D3D] dark:bg-shadeA" />
                       </div>
                       <div>
-                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                          AWS Secret Key
-                        </label>
-                        <Input
-                          type="password"
-                          value={cloudSecretKey}
-                          onChange={(e) => setCloudSecretKey(e.target.value)}
-                          placeholder="Enter AWS secret key"
-                          className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                        />
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">AWS Secret Key</label>
+                        <Input type="password" value={cloudSecretKey} onChange={(e) => setCloudSecretKey(e.target.value)} placeholder="Enter AWS secret key" className="dark:border-[#3D3D3D] dark:bg-shadeA" />
                       </div>
                       <div>
-                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                          S3 Bucket Name
-                        </label>
-                        <Input
-                          type="text"
-                          value={cloudBucket}
-                          onChange={(e) => setCloudBucket(e.target.value)}
-                          placeholder="my-bucket-name"
-                          className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                        />
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">S3 Bucket Name</label>
+                        <Input type="text" value={cloudBucket} onChange={(e) => setCloudBucket(e.target.value)} placeholder="my-bucket-name" className="dark:border-[#3D3D3D] dark:bg-shadeA" />
                       </div>
                       <div>
-                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                          Region
-                        </label>
-                        <Input
-                          type="text"
-                          value={cloudRegion}
-                          onChange={(e) => setCloudRegion(e.target.value)}
-                          placeholder="us-east-1"
-                          className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                        />
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">Region</label>
+                        <Input type="text" value={cloudRegion} onChange={(e) => setCloudRegion(e.target.value)} placeholder="us-east-1" className="dark:border-[#3D3D3D] dark:bg-shadeA" />
                       </div>
                       <div>
-                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                          Prefix/Path (Optional)
-                        </label>
-                        <Input
-                          type="text"
-                          value={cloudPrefix}
-                          onChange={(e) => setCloudPrefix(e.target.value)}
-                          placeholder="folder/subfolder/"
-                          className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                        />
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">Prefix/Path (Optional)</label>
+                        <Input type="text" value={cloudPrefix} onChange={(e) => setCloudPrefix(e.target.value)} placeholder="folder/subfolder/" className="dark:border-[#3D3D3D] dark:bg-shadeA" />
                       </div>
                     </>
                   )}
@@ -1384,52 +1593,20 @@ const Setup = () => {
                   {cloudProvider === "gcs" && (
                     <>
                       <div>
-                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                          Project ID
-                        </label>
-                        <Input
-                          type="text"
-                          value={gcsProjectId}
-                          onChange={(e) => setGcsProjectId(e.target.value)}
-                          placeholder="my-project-id"
-                          className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                        />
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">Project ID</label>
+                        <Input type="text" value={gcsProjectId} onChange={(e) => setGcsProjectId(e.target.value)} placeholder="my-project-id" className="dark:border-[#3D3D3D] dark:bg-shadeA" />
                       </div>
                       <div>
-                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                          Service Account JSON
-                        </label>
-                        <textarea
-                          value={gcsCredentials}
-                          onChange={(e) => setGcsCredentials(e.target.value)}
-                          placeholder='{"type": "service_account", ...}'
-                          rows={4}
-                          className="w-full p-2 rounded border dark:border-[#3D3D3D] dark:bg-shadeA text-sm"
-                        />
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">Service Account JSON</label>
+                        <textarea value={gcsCredentials} onChange={(e) => setGcsCredentials(e.target.value)} placeholder='{"type": "service_account", ...}' rows={4} className="w-full p-2 rounded border dark:border-[#3D3D3D] dark:bg-shadeA text-sm" />
                       </div>
                       <div>
-                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                          Bucket Name
-                        </label>
-                        <Input
-                          type="text"
-                          value={cloudBucket}
-                          onChange={(e) => setCloudBucket(e.target.value)}
-                          placeholder="my-gcs-bucket"
-                          className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                        />
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">Bucket Name</label>
+                        <Input type="text" value={cloudBucket} onChange={(e) => setCloudBucket(e.target.value)} placeholder="my-gcs-bucket" className="dark:border-[#3D3D3D] dark:bg-shadeA" />
                       </div>
                       <div>
-                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                          Prefix/Path (Optional)
-                        </label>
-                        <Input
-                          type="text"
-                          value={cloudPrefix}
-                          onChange={(e) => setCloudPrefix(e.target.value)}
-                          placeholder="folder/subfolder/"
-                          className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                        />
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">Prefix/Path (Optional)</label>
+                        <Input type="text" value={cloudPrefix} onChange={(e) => setCloudPrefix(e.target.value)} placeholder="folder/subfolder/" className="dark:border-[#3D3D3D] dark:bg-shadeA" />
                       </div>
                     </>
                   )}
@@ -1438,67 +1615,27 @@ const Setup = () => {
                   {cloudProvider === "azure" && (
                     <>
                       <div>
-                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                          Storage Account Name
-                        </label>
-                        <Input
-                          type="text"
-                          value={azureAccountName}
-                          onChange={(e) => setAzureAccountName(e.target.value)}
-                          placeholder="mystorageaccount"
-                          className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                        />
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">Storage Account Name</label>
+                        <Input type="text" value={azureAccountName} onChange={(e) => setAzureAccountName(e.target.value)} placeholder="mystorageaccount" className="dark:border-[#3D3D3D] dark:bg-shadeA" />
                       </div>
                       <div>
-                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                          Account Key
-                        </label>
-                        <Input
-                          type="password"
-                          value={azureAccountKey}
-                          onChange={(e) => setAzureAccountKey(e.target.value)}
-                          placeholder="Enter account key"
-                          className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                        />
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">Account Key</label>
+                        <Input type="password" value={azureAccountKey} onChange={(e) => setAzureAccountKey(e.target.value)} placeholder="Enter account key" className="dark:border-[#3D3D3D] dark:bg-shadeA" />
                       </div>
                       <div>
-                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                          Container Name
-                        </label>
-                        <Input
-                          type="text"
-                          value={azureContainer}
-                          onChange={(e) => setAzureContainer(e.target.value)}
-                          placeholder="my-container"
-                          className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                        />
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">Container Name</label>
+                        <Input type="text" value={azureContainer} onChange={(e) => setAzureContainer(e.target.value)} placeholder="my-container" className="dark:border-[#3D3D3D] dark:bg-shadeA" />
                       </div>
                       <div>
-                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                          Prefix/Path (Optional)
-                        </label>
-                        <Input
-                          type="text"
-                          value={cloudPrefix}
-                          onChange={(e) => setCloudPrefix(e.target.value)}
-                          placeholder="folder/subfolder/"
-                          className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                        />
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">Prefix/Path (Optional)</label>
+                        <Input type="text" value={cloudPrefix} onChange={(e) => setCloudPrefix(e.target.value)} placeholder="folder/subfolder/" className="dark:border-[#3D3D3D] dark:bg-shadeA" />
                       </div>
                     </>
                   )}
-                  {ingestGraphName && (
-                    <p className="text-xs text-gray-500 dark:text-gray-400 mb-2">
-                      Download destination: downloaded_files_cloud/{ingestGraphName}/
-                    </p>
-                  )}
 
                   <div className="pt-4 border-t border-gray-300 dark:border-[#3D3D3D]">
-                    <Button 
-                      onClick={handleCloudDownload}
-                      disabled={isDownloading}
-                      className="gradient text-white w-full"
-                    >
+                    <p className="text-xs text-gray-500 dark:text-gray-400 mb-2">Download destination: downloaded_files_cloud/{ingestGraphName}/</p>
+                    <Button onClick={handleCloudDownload} disabled={isDownloading} className="gradient text-white w-full">
                       {isDownloading ? (
                         <>
                           <Loader2 className="h-4 w-4 mr-2 animate-spin" />
@@ -1520,43 +1657,24 @@ const Setup = () => {
                         : downloadMessage.includes("❌")
                         ? "bg-red-50 dark:bg-red-900/20 text-red-700 dark:text-red-300"
                         : "bg-blue-50 dark:bg-blue-900/20 text-blue-700 dark:text-blue-300"
-                    }`}>
-                      {downloadMessage}
-                    </div>
+                    }`}>{downloadMessage}</div>
                   )}
 
                   {/* Downloaded Files List */}
                   {downloadedFiles.length > 0 && (
                     <div className="mt-4 border-t border-gray-300 dark:border-[#3D3D3D] pt-4">
                       <div className="flex justify-between items-center mb-2">
-                        <h4 className="text-sm font-medium text-black dark:text-white">
-                          Downloaded Files ({downloadedFiles.length})
-                        </h4>
-                        <Button
-                          onClick={handleDeleteAllDownloadedFiles}
-                          variant="outline"
-                          size="sm"
-                          className="dark:border-[#3D3D3D]"
-                        >
+                        <h4 className="text-sm font-medium text-black dark:text-white">Downloaded Files ({downloadedFiles.length})</h4>
+                        <Button onClick={handleDeleteAllDownloadedFiles} variant="outline" size="sm" className="dark:border-[#3D3D3D]">
                           <Trash2 className="h-3 w-3 mr-1" />
                           Delete All
                         </Button>
                       </div>
                       <ul className="space-y-2 max-h-40 overflow-y-auto">
                         {downloadedFiles.map((file, index) => (
-                          <li
-                            key={index}
-                            className="flex justify-between items-center p-2 bg-gray-50 dark:bg-shadeA rounded text-sm"
-                          >
-                            <span className="text-black dark:text-white truncate flex-1">
-                              {file.name}
-                            </span>
-                            <Button
-                              onClick={() => handleDeleteDownloadedFile(file.name)}
-                              variant="ghost"
-                              size="sm"
-                              className="ml-2 text-red-500 hover:text-red-700 dark:hover:text-red-400"
-                            >
+                          <li key={index} className="flex justify-between items-center p-2 bg-gray-50 dark:bg-shadeA rounded text-sm">
+                            <span className="text-black dark:text-white truncate flex-1">{file.name}</span>
+                            <Button onClick={() => handleDeleteDownloadedFile(file.name)} variant="ghost" size="sm" className="ml-2 text-red-500 hover:text-red-700 dark:hover:text-red-400">
                               <Trash2 className="h-3 w-3" />
                             </Button>
                           </li>
@@ -1568,17 +1686,10 @@ const Setup = () => {
                   {/* Ingest Downloaded Data Section */}
                   {downloadedFiles.length > 0 && (
                     <div className="border-t border-gray-300 dark:border-[#3D3D3D] pt-4 mt-4">
-                      <h3 className="text-sm font-medium mb-2 text-black dark:text-white">
-                        Ingest Documents into Knowledge Graph
-                      </h3>
-                      <p className="text-xs text-gray-500 dark:text-gray-400 mb-3">
-                        Process downloaded files and add them to the knowledge graph
-                      </p>
-                      <Button
-                        onClick={() => handleIngestDocuments("downloaded")}
-                        disabled={isIngesting}
-                        className="gradient text-white w-full"
-                      >
+                      <h3 className="text-sm font-medium mb-2 text-black dark:text-white">Ingest Documents into Knowledge Graph</h3>
+                      <p className="text-xs text-gray-500 dark:text-gray-400 mb-3">Process downloaded files and add them to the knowledge graph</p>
+
+                      <Button onClick={handleRunIngest} disabled={isIngesting || !tempSessionId} className="gradient text-white w-full">
                         {isIngesting ? (
                           <>
                             <Loader2 className="h-4 w-4 mr-2 animate-spin" />
@@ -1598,118 +1709,70 @@ const Setup = () => {
                             : ingestMessage.includes("❌")
                             ? "bg-red-50 dark:bg-red-900/20 text-red-700 dark:text-red-300"
                             : "bg-blue-50 dark:bg-blue-900/20 text-blue-700 dark:text-blue-300"
-                        }`}>
-                          {ingestMessage}
-                        </div>
+                        }`}>{ingestMessage}</div>
                       )}
                     </div>
                   )}
                 </div>
               </TabsContent>
 
-              {/* Amazon BDA Configuration Tab */}
-              <TabsContent value="AmazonBDA" className="space-y-4">
-                <div className="space-y-4">              
-                  <p className="text-sm font-medium text-gray-500 dark:text-gray-400 mb-3">
-                    Process multimodal documents stored in S3 with Amazon Bedrock Data Automation and ingest them into your knowledge graph.
-                  </p>
-
-                  {/* Common fields */}
+              {/* S3 / Bedrock Tab */}
+              <TabsContent value="s3" className="space-y-4">
+                <div className="space-y-4">
                   <div>
-                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                      AWS Access Key
-                    </label>
-                    <Input
-                      type="text"
-                      value={awsAccessKey}
-                      onChange={(e) => setAwsAccessKey(e.target.value)}
-                      placeholder="Enter AWS access key"
-                      className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                      disabled={isIngesting}
-                    />
+                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">File Format</label>
+                    <Select value={fileFormat} onValueChange={(value: "json" | "multi") => setFileFormat(value)}>
+                      <SelectTrigger className="dark:border-[#3D3D3D] dark:bg-shadeA">
+                        <SelectValue placeholder="Select file format" />
+                      </SelectTrigger>
+                      <SelectContent>
+                        <SelectItem value="json">JSON</SelectItem>
+                        <SelectItem value="multi">Multi</SelectItem>
+                      </SelectContent>
+                    </Select>
                   </div>
 
+                  {/* Common fields */}
                   <div>
-                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                      AWS Secret Key
-                    </label>
-                    <Input
-                      type="password"
-                      value={awsSecretKey}
-                      onChange={(e) => setAwsSecretKey(e.target.value)}
-                      placeholder="Enter AWS secret key"
-                      className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                      disabled={isIngesting}
-                    />
+                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">AWS Access Key</label>
+                    <Input type="text" value={awsAccessKey} onChange={(e) => setAwsAccessKey(e.target.value)} placeholder="Enter AWS access key" className="dark:border-[#3D3D3D] dark:bg-shadeA" />
                   </div>
 
                   <div>
-                    <div className="flex items-center justify-between mb-2">
-                      <label className="block text-sm font-medium text-black dark:text-white">
-                        Input Bucket
-                      </label>
-                      <label className="flex items-center gap-2 text-sm text-gray-600 dark:text-gray-400 cursor-pointer">
-                        <input
-                          type="checkbox"
-                          checked={skipBDAProcessing}
-                          onChange={(e) => setSkipBDAProcessing(e.target.checked)}
-                          disabled={isIngesting}
-                          className="h-4 w-4 rounded border-gray-300 dark:border-gray-600"
-                        />
-                        <span>Skip BDA (ingest existing BDA output bucket directly)</span>
-                      </label>
-                    </div>
-                    <Input
-                      type="text"
-                      value={inputBucket}
-                      onChange={(e) => setInputBucket(e.target.value)}
-                      placeholder="Enter input bucket name"
-                      className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                      disabled={isIngesting || skipBDAProcessing}
-                    />
+                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">AWS Secret Key</label>
+                    <Input type="password" value={awsSecretKey} onChange={(e) => setAwsSecretKey(e.target.value)} placeholder="Enter AWS secret key" className="dark:border-[#3D3D3D] dark:bg-shadeA" />
                   </div>
 
-                  <div>
-                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                      Output Bucket
-                    </label>
-                    <Input
-                      type="text"
-                      value={outputBucket}
-                      onChange={(e) => setOutputBucket(e.target.value)}
-                      placeholder="Enter output bucket name"
-                      className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                      disabled={isIngesting}
-                    />
-                  </div>
+                  {/* Conditional fields based on file format */}
+                  {fileFormat === "json" ? (
+                    <div>
+                      <label className="block text-sm font-medium mb-2 text-black dark:text-white">Data Path</label>
+                      <Input type="text" value={dataPath} onChange={(e) => setDataPath(e.target.value)} placeholder="s3://bucket-name/path/to/data" className="dark:border-[#3D3D3D] dark:bg-shadeA" />
+                    </div>
+                  ) : (
+                    <>
+                      <div>
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">Input Bucket</label>
+                        <Input type="text" value={inputBucket} onChange={(e) => setInputBucket(e.target.value)} placeholder="Enter input bucket name" className="dark:border-[#3D3D3D] dark:bg-shadeA" />
+                      </div>
 
-                  <div>
-                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                      Region Name
-                    </label>
-                    <Input
-                      type="text"
-                      value={regionName}
-                      onChange={(e) => setRegionName(e.target.value)}
-                      placeholder="e.g., us-east-1"
-                      className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                      disabled={isIngesting}
-                    />
-                  </div>
+                      <div>
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">Output Bucket</label>
+                        <Input type="text" value={outputBucket} onChange={(e) => setOutputBucket(e.target.value)} placeholder="Enter output bucket name" className="dark:border-[#3D3D3D] dark:bg-shadeA" />
+                      </div>
 
-                  {ingestGraphName && (
-                    <p className="text-xs text-gray-500 dark:text-gray-400 mb-2">
-                      Processing destination: Input bucket ({inputBucket || "not specified"}) → Output bucket ({outputBucket || "not specified"}) → Knowledge graph ({ingestGraphName})
-                    </p>
+                      <div>
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">Region Name</label>
+                        <Input type="text" value={regionName} onChange={(e) => setRegionName(e.target.value)} placeholder="e.g., us-east-1" className="dark:border-[#3D3D3D] dark:bg-shadeA" />
+                      </div>
+                    </>
                   )}
 
-                  {/* Ingest S3 Files with Amazon BDA Section */}
+                  {/* Ingest S3 Bedrock Data Section */}
                   <div className="border-t border-gray-300 dark:border-[#3D3D3D] pt-4 mt-4">
-                    <Button
-                      onClick={handleAmazonBDAIngest}
-                      disabled={isIngesting}
-                      className="gradient text-white w-full"
-                    >
+                    <h3 className="text-sm font-medium mb-2 text-black dark:text-white">Ingest S3 Data into Knowledge Graph</h3>
+                    <p className="text-xs text-gray-500 dark:text-gray-400 mb-3">Process S3 data and add it to the knowledge graph using AWS Bedrock BDA for multimodal documents</p>
+                    <Button onClick={handleAmazonBDAIngest} disabled={isIngesting} className="gradient text-white w-full">
                       {isIngesting ? (
                         <>
                           <Loader2 className="h-4 w-4 mr-2 animate-spin" />
@@ -1718,7 +1781,7 @@ const Setup = () => {
                       ) : (
                         <>
                           <Database className="h-4 w-4 mr-2" />
-                          Ingest from S3 Bucket into {ingestGraphName}
+                          Ingest from S3 into {ingestGraphName}
                         </>
                       )}
                     </Button>
@@ -1729,9 +1792,7 @@ const Setup = () => {
                           : ingestMessage.includes("❌")
                           ? "bg-red-50 dark:bg-red-900/20 text-red-700 dark:text-red-300"
                           : "bg-blue-50 dark:bg-blue-900/20 text-blue-700 dark:text-blue-300"
-                      }`}>
-                        {ingestMessage}
-                      </div>
+                      }`}>{ingestMessage}</div>
                     )}
                   </div>
                 </div>
@@ -1754,8 +1815,8 @@ const Setup = () => {
         </Dialog>
 
         {/* Refresh Graph Dialog */}
-        <Dialog 
-          open={refreshOpen} 
+        <Dialog
+          open={refreshOpen}
           onOpenChange={(open) => {
             // Prevent closing if confirm dialog is open
             if (!open && isConfirmDialogOpen) {
@@ -1764,24 +1825,17 @@ const Setup = () => {
             setRefreshOpen(open);
           }}
         >
-          <DialogContent 
-            className="sm:max-w-[500px] bg-white dark:bg-background border-gray-300 dark:border-[#3D3D3D]"
-            onInteractOutside={(e) => e.preventDefault()}
-          >
+          <DialogContent className="sm:max-w-[500px] bg-white dark:bg-background border-gray-300 dark:border-[#3D3D3D]" onInteractOutside={(e) => e.preventDefault()}>
             <DialogHeader>
               <DialogTitle className="text-black dark:text-white">Refresh Knowledge Graph</DialogTitle>
-              <DialogDescription className="text-gray-600 dark:text-[#D9D9D9]">
-                Rebuild the graph content and rerun community detection for your knowledge graph
-              </DialogDescription>
+              <DialogDescription className="text-gray-600 dark:text-[#D9D9D9]">Rebuild the graph content of your knowledge graph</DialogDescription>
             </DialogHeader>
 
             <div className="py-4 space-y-4">
               <div>
-                <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                  Select Graph to Refresh
-                </label>
-                <Select value={refreshGraphName} onValueChange={setRefreshGraphName} disabled={isRefreshing || isRebuildRunning || isCheckingStatus}>
-                  <SelectTrigger className="dark:border-[#3D3D3D] dark:bg-shadeA" disabled={isRefreshing || isRebuildRunning || isCheckingStatus}>
+                <label className="block text-sm font-medium mb-2 text-black dark:text-white">Select Graph to Refresh</label>
+                <Select value={refreshGraphName} onValueChange={setRefreshGraphName}>
+                  <SelectTrigger className="dark:border-[#3D3D3D] dark:bg-shadeA">
                     <SelectValue placeholder="Select a graph" />
                   </SelectTrigger>
                   <SelectContent>
@@ -1801,13 +1855,8 @@ const Setup = () => {
               </div>
 
               <div className="bg-yellow-50 dark:bg-yellow-900/20 border border-yellow-200 dark:border-yellow-800 rounded-lg p-4">
-                <p className="text-sm text-yellow-800 dark:text-yellow-200 font-medium">
-                  ⚠️ Warning
-                </p>
-                <p className="text-sm text-yellow-700 dark:text-yellow-300 mt-1">
-                  This operation will process new documents and rerun community detection that will interrupt related queries.
-                  Please confirm to proceed.
-                </p>
+                <p className="text-sm text-yellow-800 dark:text-yellow-200 font-medium">⚠️ Warning</p>
+                <p className="text-sm text-yellow-700 dark:text-yellow-300 mt-1">This operation will rebuild the graph content that will interrupt related queries. Please confirm to proceed.</p>
               </div>
 
               {refreshMessage && (
@@ -1817,28 +1866,13 @@ const Setup = () => {
                     : refreshMessage.includes("❌")
                     ? "bg-red-50 dark:bg-red-900/20 text-red-700 dark:text-red-300"
                     : "bg-blue-50 dark:bg-blue-900/20 text-blue-700 dark:text-blue-300"
-                }`}>
-                  {refreshMessage}
-                </div>
+                }`}>{refreshMessage}</div>
               )}
             </div>
 
             <DialogFooter>
-              <Button
-                variant="outline"
-                onClick={() => {
-                  setRefreshOpen(false);
-                }}
-                disabled={isRefreshing}
-                className="dark:border-[#3D3D3D]"
-              >
-                Close
-              </Button>
-              <Button
-                onClick={handleRefreshGraph}
-                disabled={isRefreshing || !refreshGraphName || isRebuildRunning || isCheckingStatus}
-                className="gradient text-white"
-              >
+              <Button variant="outline" onClick={() => { setRefreshOpen(false); }} disabled={isRefreshing} className="dark:border-[#3D3D3D]">Close</Button>
+              <Button onClick={handleRefreshGraph} disabled={isRefreshing || !refreshGraphName || isRebuildRunning || isCheckingStatus} className="gradient text-white">
                 {isRefreshing ? (
                   <>
                     <Loader2 className="h-4 w-4 mr-2 animate-spin" />
@@ -1873,4 +1907,3 @@ const Setup = () => {
 };
 
 export default Setup;
-
diff --git a/graphrag/app/common b/graphrag/app/common
deleted file mode 120000
index dc879ab..0000000
--- a/graphrag/app/common
+++ /dev/null
@@ -1 +0,0 @@
-../../common
\ No newline at end of file
diff --git a/graphrag/app/configs b/graphrag/app/configs
deleted file mode 120000
index 5992d10..0000000
--- a/graphrag/app/configs
+++ /dev/null
@@ -1 +0,0 @@
-../../configs
\ No newline at end of file
diff --git a/graphrag/app/routers/ui.py b/graphrag/app/routers/ui.py
index 9637347..154faf5 100644
--- a/graphrag/app/routers/ui.py
+++ b/graphrag/app/routers/ui.py
@@ -61,7 +61,7 @@
     ResponseType,
     Role,
 )
-
+from common.utils.text_extractors import TextExtractor
 logger = logging.getLogger(__name__)
 
 use_cypher = os.getenv("USE_CYPHER", "false").lower() == "true"
@@ -989,6 +989,7 @@ async def clear_uploaded_files(
     graphname: str,
     creds: Annotated[tuple[list[str], HTTPBasicCredentials], Depends(ui_basic_auth)],
     filename: str | None = None,
+    session_id: str | None = None,
 ):
     """
     Clear uploaded files for a specific graphname.
@@ -996,6 +997,7 @@ async def clear_uploaded_files(
     Parameters:
     - graphname: The graph name whose files to clear
     - filename: If provided, only delete this specific file. Otherwise, delete all files.
+    - session_id: Optional session ID to delete processed content from temp folder
     """
     try:
         upload_dir = os.path.join("uploads", graphname)
@@ -1008,9 +1010,21 @@ async def clear_uploaded_files(
             }
         
         deleted_files = []
+        text_extractor = TextExtractor()
         
         if filename:
-            # Delete specific file
+            # Delete processed content from JSONL FIRST if session_id provided
+            if session_id:
+                temp_folder = os.path.join("uploads", "ingestion_temp", graphname, session_id)
+                if os.path.exists(temp_folder):
+                    logger.info(f"Deleting processed content for {filename} from temp folder")
+                    result = text_extractor.delete_file_from_jsonl(temp_folder, filename)
+                    if result.get('success'):
+                        logger.info(f"Removed {result.get('removed_count', 0)} processed documents for {filename}")
+                    else:
+                        logger.warning(f"Failed to remove processed content: {result.get('error', 'Unknown error')}")
+            
+            # Then delete the original file
             file_path = os.path.join(upload_dir, filename)
             if os.path.exists(file_path) and os.path.isfile(file_path):
                 os.remove(file_path)
@@ -1321,6 +1335,7 @@ async def delete_cloud_downloads(
     graphname: str,
     credentials: Annotated[HTTPBase, Depends(security)],
     filename: str = None,
+    session_id: str = None,
 ):
     """
     Delete downloaded cloud files for a specific graph.
@@ -1328,6 +1343,7 @@ async def delete_cloud_downloads(
     Parameters:
     - graphname: The graph name whose downloaded files to clear
     - filename: If provided, only delete this specific file. Otherwise, delete all files.
+    - session_id: Optional session ID to delete processed content from temp folder
     """
     try:
         download_dir = os.path.join("downloaded_files_cloud", graphname)
@@ -1340,9 +1356,21 @@ async def delete_cloud_downloads(
             }
         
         deleted_files = []
+        text_extractor = TextExtractor()
         
         if filename:
-            # Delete specific file
+            # Delete processed content from JSONL FIRST if session_id provided
+            if session_id:
+                temp_folder = os.path.join("uploads", "ingestion_temp", graphname, session_id)
+                if os.path.exists(temp_folder):
+                    logger.info(f"Deleting processed content for {filename} from temp folder")
+                    result = text_extractor.delete_file_from_jsonl(temp_folder, filename)
+                    if result.get('success'):
+                        logger.info(f"Removed {result.get('removed_count', 0)} processed documents for {filename}")
+                    else:
+                        logger.warning(f"Failed to remove processed content: {result.get('error', 'Unknown error')}")
+            
+            # Then delete the original file
             file_path = os.path.join(download_dir, filename)
             if os.path.exists(file_path) and os.path.isfile(file_path):
                 os.remove(file_path)
@@ -1379,3 +1407,4 @@ async def delete_cloud_downloads(
         logger.debug_pii(f"Delete error trace:\n{exc}")
         raise HTTPException(status_code=500, detail=f"Error deleting files: {str(e)}")
 
+
diff --git a/graphrag/app/supportai/supportai.py b/graphrag/app/supportai/supportai.py
index 70e03b4..58fdbbc 100644
--- a/graphrag/app/supportai/supportai.py
+++ b/graphrag/app/supportai/supportai.py
@@ -337,9 +337,9 @@ def create_ingest(
     conn: TigerGraphConnection,
 ):
     # Check for invalid combination of multi format and non-s3 data source
-    if ingest_config.data_source.lower() in ["bda", "server"] and ingest_config.get("file_format", "").lower() != "multi":
-        logger.warning(f"File format {ingest_config.get('file_format', '').lower()} is not supported for data source {ingest_config.data_source.lower()}")
-        ingest_config["file_format"] = "multi"
+    if ingest_config.data_source.lower() in ["bda", "server"] and ingest_config.file_format.lower() != "multi":
+        logger.warning(f"File format {ingest_config.file_format.lower()} is not supported for data source {ingest_config.data_source.lower()}")
+        ingest_config.file_format = "multi"
 
     res_ingest_config = {"data_source": ingest_config.data_source.lower()}
     res_ingest_config["file_format"] = ingest_config.file_format.lower()
@@ -485,18 +485,29 @@ def create_ingest(
         if data_path is None:
             raise Exception("Data path not provided for server processing")
         try:
+            # Create temp folder BEFORE processing so extractor can save directly
+            temp_session_id = str(uuid.uuid4())
+            temp_folder = os.path.join("uploads", "ingestion_temp", graphname, temp_session_id)
+            
+            # Process files and save immediately to temp folder (memory efficient)
             extractor = TextExtractor()
-            server_processing_result = extractor.process_folder(data_path, graphname=graphname)
+            server_processing_result = extractor.process_folder(
+                data_path, 
+                graphname=graphname,
+                temp_folder=temp_folder  # Extractor saves files as it processes
+            )
             if server_processing_result.get("statusCode") != 200:
                 raise Exception(f"Server folder processing failed: {server_processing_result}")
-            else:
-                logger.info(f"Server folder processing completed successfully: {server_processing_result}")
+            
+            doc_count = server_processing_result.get("num_documents", 0)
+            logger.info(f"Server folder processing completed: {server_processing_result.get('message')}")
 
-            res_ingest_config["server_jobs"] = server_processing_result.get("documents", [])
+            res_ingest_config["temp_session_id"] = temp_session_id
+            res_ingest_config["temp_folder"] = temp_folder
+            res_ingest_config["file_count"] = doc_count
             res_ingest_config["data_source_id"] = "DocumentContent"
-            # Use a placeholder path that doesn't start with "/" to avoid pyTigerGraph treating it as a file
-            # The actual folder path is stored in server_jobs, this is just for the API call
-            res["data_path"] = "in_response"
+            # Use a placeholder path to indicate temp storage
+            res["data_path"] = "in_temp_storage"
             res["data_source_id"] = res_ingest_config
         except Exception as e:
             raise Exception(f"Error during server folder processing: {e}")

From bf441e889f0903d9dc384187827f39c04d04a9fd Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Mon, 8 Dec 2025 19:06:20 +0530
Subject: [PATCH 3/4] updated code with latest main: added code for temp folder
 support , delete logic

---
 configs/nginx.conf         | 1 +
 configs/server_config.json | 1 +
 ecc/app/common             | 1 +
 ecc/app/configs            | 1 +
 graphrag/app/common        | 1 +
 graphrag/app/configs       | 1 +
 6 files changed, 6 insertions(+)
 create mode 100644 configs/nginx.conf
 create mode 100644 configs/server_config.json
 create mode 100644 ecc/app/common
 create mode 100644 ecc/app/configs
 create mode 100644 graphrag/app/common
 create mode 100644 graphrag/app/configs

diff --git a/configs/nginx.conf b/configs/nginx.conf
new file mode 100644
index 0000000..cffce04
--- /dev/null
+++ b/configs/nginx.conf
@@ -0,0 +1 @@
+../docs/tutorials/configs/nginx.conf
\ No newline at end of file
diff --git a/configs/server_config.json b/configs/server_config.json
new file mode 100644
index 0000000..04e4259
--- /dev/null
+++ b/configs/server_config.json
@@ -0,0 +1 @@
+../docs/tutorials/configs/server_config.json
\ No newline at end of file
diff --git a/ecc/app/common b/ecc/app/common
new file mode 100644
index 0000000..dc879ab
--- /dev/null
+++ b/ecc/app/common
@@ -0,0 +1 @@
+../../common
\ No newline at end of file
diff --git a/ecc/app/configs b/ecc/app/configs
new file mode 100644
index 0000000..5992d10
--- /dev/null
+++ b/ecc/app/configs
@@ -0,0 +1 @@
+../../configs
\ No newline at end of file
diff --git a/graphrag/app/common b/graphrag/app/common
new file mode 100644
index 0000000..dc879ab
--- /dev/null
+++ b/graphrag/app/common
@@ -0,0 +1 @@
+../../common
\ No newline at end of file
diff --git a/graphrag/app/configs b/graphrag/app/configs
new file mode 100644
index 0000000..5992d10
--- /dev/null
+++ b/graphrag/app/configs
@@ -0,0 +1 @@
+../../configs
\ No newline at end of file

From ffcd3e6b7b50c973b91af233bc6334177181949e Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Mon, 8 Dec 2025 20:51:23 +0530
Subject: [PATCH 4/4] restore the original server_file

---
 common/utils/text_extractors.py | 58 +++++++++++++++++++++++++++--
 graphrag/app/routers/ui.py      | 65 +++++++++++++++++++++++++++++++++
 2 files changed, 119 insertions(+), 4 deletions(-)

diff --git a/common/utils/text_extractors.py b/common/utils/text_extractors.py
index 9b5b652..a0766ac 100644
--- a/common/utils/text_extractors.py
+++ b/common/utils/text_extractors.py
@@ -8,6 +8,7 @@
 import uuid
 import base64
 import io
+import re
 import threading
 from pathlib import Path
 import shutil
@@ -20,6 +21,56 @@
 _pymupdf4llm_lock = threading.Lock()
 
 
+# regex for markdown images: ![alt](path)
+_md_pattern = re.compile(r'!\[([^\]]*)\]\(([^)\s]+)\)')
+
+def extract_images(md_text):
+    """
+    Returns list of {"path": path, "image_id": image_id}
+    image_id = basename without extension
+    """
+    images = []
+    for m in _md_pattern.finditer(md_text):
+        path = m.group(2)
+        basename = os.path.basename(path)
+        image_id = os.path.splitext(basename)[0]
+        images.append({"path": path, "image_id": image_id})
+    return images
+
+
+def insert_description_by_id(md_text, image_id, description):
+    """
+    Replace the description for an image whose basename == image_id.
+    """
+    def repl(m):
+        old_path = m.group(2)
+        candidate_id = os.path.splitext(os.path.basename(old_path))[0]
+
+        if candidate_id == image_id:
+            return f'![{description}]({old_path})'
+
+        return m.group(0)
+
+    return _md_pattern.sub(repl, md_text)
+
+
+def replace_path_with_tg_protocol(md_text, image_id, tg_reference):
+    """
+    Replace the file path for an image whose basename == image_id with tg:// protocol reference.
+    tg_reference should be like 'Graphs_image_1'
+    """
+    def repl(m):
+        old_path = m.group(2)
+        candidate_id = os.path.splitext(os.path.basename(old_path))[0]
+
+        if candidate_id == image_id:
+            alt_text = m.group(1)
+            return f'![{alt_text}](tg://{tg_reference})'
+
+        return m.group(0)
+
+    return _md_pattern.sub(repl, md_text)
+
 class TextExtractor:
     """Class for handling text extraction from various file formats and cleanup."""
 
@@ -325,7 +376,6 @@ def _extract_pdf_with_images_as_docs(file_path, base_doc_id, graphname=None):
         import pymupdf4llm
         from PIL import Image as PILImage
         from common.utils.image_data_extractor import describe_image_with_llm
-        from common.utils.markdown_parsing import MarkdownProcessor
 
         # Ensure clean slate - remove folder if it exists from failed previous run
         if image_output_folder.exists():
@@ -370,7 +420,7 @@ def _extract_pdf_with_images_as_docs(file_path, base_doc_id, graphname=None):
             logger.warning(f"No content extracted from PDF: {file_path}")
 
         # Extract image references from markdown
-        image_refs = MarkdownProcessor.extract_images(markdown_content)
+        image_refs = extract_images(markdown_content)
 
         if not image_refs:
             # cleanup folder anyway
@@ -395,7 +445,7 @@ def _extract_pdf_with_images_as_docs(file_path, base_doc_id, graphname=None):
                 # Image description
                 description = describe_image_with_llm(str(img_path))
 
-                markdown_content = MarkdownProcessor.insert_description_by_id(
+                markdown_content = insert_description_by_id(
                     markdown_content,
                     image_id,
                     description
@@ -415,7 +465,7 @@ def _extract_pdf_with_images_as_docs(file_path, base_doc_id, graphname=None):
                 image_doc_id = f"{base_doc_id}_image_{image_counter}"
 
                 # Replace file path with tg:// protocol reference in markdown
-                markdown_content = MarkdownProcessor.replace_path_with_tg_protocol(
+                markdown_content = replace_path_with_tg_protocol(
                     markdown_content,
                     image_id,
                     image_doc_id
diff --git a/graphrag/app/routers/ui.py b/graphrag/app/routers/ui.py
index 154faf5..79207cf 100644
--- a/graphrag/app/routers/ui.py
+++ b/graphrag/app/routers/ui.py
@@ -1408,3 +1408,68 @@ async def delete_cloud_downloads(
         raise HTTPException(status_code=500, detail=f"Error deleting files: {str(e)}")
 
 
+@router.delete(route_prefix + "/{graphname}/ingestion_temp/delete")
+async def delete_ingestion_temp_files(
+    graphname: str,
+    credentials: Annotated[HTTPBase, Depends(security)],
+    session_id: str = None,
+    filename: str = None,
+):
+    """
+    Delete files from ingestion temp folder.
+    """
+    try:
+        base_temp_dir = os.path.join("uploads", "ingestion_temp", graphname)
+        
+        if not session_id:
+            raise HTTPException(status_code=400, detail="session_id is required")
+        
+        session_dir = os.path.join(base_temp_dir, session_id)
+        
+        if not os.path.exists(session_dir):
+            return {
+                "status": "success",
+                "message": f"No temp files found for session {session_id}",
+                "deleted_files": [],
+            }
+        
+        deleted_files = []
+        
+        if filename:
+            # Delete specific file
+            file_path = os.path.join(session_dir, filename)
+            if os.path.exists(file_path) and os.path.isfile(file_path):
+                os.remove(file_path)
+                deleted_files.append(filename)
+                logger.info(f"Deleted temp file {filename} from session {session_id}")
+                
+                # If session folder is now empty, remove it
+                if not os.listdir(session_dir):
+                    os.rmdir(session_dir)
+                    logger.info(f"Removed empty session folder {session_id}")
+            else:
+                raise HTTPException(status_code=404, detail=f"File {filename} not found")
+        else:
+            # Delete entire session folder
+            import shutil
+            for filename in os.listdir(session_dir):
+                if os.path.isfile(os.path.join(session_dir, filename)):
+                    deleted_files.append(filename)
+            
+            shutil.rmtree(session_dir)
+            logger.info(f"Deleted session folder {session_id} for graph {graphname}")
+        
+        return {
+            "status": "success",
+            "message": f"Successfully deleted {len(deleted_files)} file(s)",
+            "deleted_files": deleted_files,
+            "session_id": session_id,
+        }
+    
+    except HTTPException:
+        raise
+    except Exception as e:
+        exc = traceback.format_exc()
+        logger.error(f"Error deleting ingestion temp files for graph {graphname}: {e}")
+        logger.debug_pii(f"Delete error trace:\n{exc}")
+        raise HTTPException(status_code=500, detail=f"Error deleting temp files: {str(e)}")
\ No newline at end of file