From 439b1baff62591b10010ad5d5064ea80b03fea72 Mon Sep 17 00:00:00 2001
From: Aviraj <100823015+avirajsingh7@users.noreply.github.com>
Date: Wed, 23 Jul 2025 13:21:18 +0530
Subject: [PATCH 1/9] script for onboarding organisation in glific migration

---
 .../organization_onboarding/client.py         | 47 ++++++++++
 .../organization_onboarding/processor.py      | 89 +++++++++++++++++++
 .../organization_onboarding/run_onboarding.py | 20 +++++
 .../organization_onboarding/sample_input.csv  |  4 +
 .../organization_onboarding/validator.py      | 33 +++++++
 5 files changed, 193 insertions(+)
 create mode 100644 backend/glific_migration/organization_onboarding/client.py
 create mode 100644 backend/glific_migration/organization_onboarding/processor.py
 create mode 100644 backend/glific_migration/organization_onboarding/run_onboarding.py
 create mode 100644 backend/glific_migration/organization_onboarding/sample_input.csv
 create mode 100644 backend/glific_migration/organization_onboarding/validator.py

diff --git a/backend/glific_migration/organization_onboarding/client.py b/backend/glific_migration/organization_onboarding/client.py
new file mode 100644
index 000000000..78389ab4b
--- /dev/null
+++ b/backend/glific_migration/organization_onboarding/client.py
@@ -0,0 +1,47 @@
+import json
+import logging
+import requests
+from requests.adapters import HTTPAdapter
+from urllib3.util.retry import Retry
+
+logger = logging.getLogger(__name__)
+
+
+class OnboardingClient:
+    def __init__(self, api_url, api_key):
+        self.api_url = api_url
+        self.headers = {
+            'accept': 'application/json',
+            'Content-Type': 'application/json',
+            'X-API-KEY': api_key,
+        }
+        self.session = requests.Session()
+        retries = Retry(total=3, backoff_factor=1, status_forcelist=[429])
+        self.session.mount('https://', HTTPAdapter(max_retries=retries))
+
+    def send(self, row):
+        payload = {
+            "organization_name": row['organization_name'],
+            "project_name": row['project_name'],
+            "email": row['email'],
+            "password": row['password'],
+            "user_name": row['user_name'],
+        }
+
+        try:
+            response = self.session.post(self.api_url, headers=self.headers, json=payload, timeout=10)
+            response_json = response.json()
+            success = (
+                response.status_code == 200 and
+                all(k in response_json for k in ['organization_id', 'project_id', 'user_id', 'api_key'])
+            )
+            return success, {k: v for k, v in response_json.items() if k != 'password'}
+        except requests.exceptions.Timeout:
+            return False, {"error": "Request timed out"}
+        except requests.exceptions.RequestException as e:
+            return False, {"error": str(e)}
+        except json.JSONDecodeError as e:
+            return False, {"error": f"Invalid JSON response: {str(e)}"}
+        except Exception as e:
+            logger.exception("Unexpected error during API call")
+            return False, {"error": f"Unexpected error: {str(e)}"}
diff --git a/backend/glific_migration/organization_onboarding/processor.py b/backend/glific_migration/organization_onboarding/processor.py
new file mode 100644
index 000000000..e10bfca7d
--- /dev/null
+++ b/backend/glific_migration/organization_onboarding/processor.py
@@ -0,0 +1,89 @@
+import csv
+import json
+import logging
+
+from .client import OnboardingClient
+from .validator import CSVValidator
+
+logger = logging.getLogger(__name__)
+
+
+class OnboardingProcessor:
+    def __init__(self, input_filename, output_filename, api_url, api_key):
+        self.input_filename = input_filename
+        self.output_filename = output_filename
+        self.client = OnboardingClient(api_url, api_key)
+        self.csv_validator = CSVValidator(['organization_name', 'project_name', 'email', 'password', 'user_name'])
+        self.output_headers = [
+            'organization_name', 'organization_id',
+            'project_name', 'project_id',
+            'user_name', 'user_id',
+            'api_key',
+            'success', 'response_from_endpoint'
+        ]
+
+    def create_error_row(self, row, error_message):
+        return {
+            'organization_name': row.get('organization_name', ''),
+            'organization_id': '',
+            'project_name': row.get('project_name', ''),
+            'project_id': '',
+            'user_name': row.get('user_name', ''),
+            'user_id': '',
+            'api_key': '',
+            'success': 'no',
+            'response_from_endpoint': error_message
+        }
+
+    def run(self):
+        try:
+            with open(self.input_filename, 'r', newline='', encoding='utf-8') as infile:
+                reader = list(csv.DictReader(infile))
+
+                if not reader:
+                    logger.error("CSV file is empty.")
+                    return
+
+                is_valid, errors = self.csv_validator.validate_rows(reader)
+                if not is_valid:
+                    logger.error("CSV validation failed:")
+                    for e in errors:
+                        logger.error(f"  - {e}")
+                    print("Validation failed. Check onboarding.log for details.")
+                    return
+
+                with open(self.output_filename, 'w', newline='', encoding='utf-8') as outfile:
+                    writer = csv.DictWriter(outfile, fieldnames=self.output_headers)
+                    writer.writeheader()
+
+                    for row in reader:
+                        logger.info(f"Processing: Org='{row.get('organization_name')}', Project='{row.get('project_name')}'")
+                        success, response_data = self.client.send(row)
+
+                        if success:
+                            logger.info(f"Success: Org='{row['organization_name']}', Project='{row['project_name']}'")
+                        else:
+                            logger.warning(f"Failed: Org='{row['organization_name']}' - {response_data.get('error')}")
+
+                        writer.writerow({
+                            'organization_name': row['organization_name'],
+                            'organization_id': response_data.get('organization_id', ''),
+                            'project_name': row['project_name'],
+                            'project_id': response_data.get('project_id', ''),
+                            'user_name': row['user_name'],
+                            'user_id': response_data.get('user_id', ''),
+                            'api_key': response_data.get('api_key', ''),
+                            'success': 'yes' if success else 'no',
+                            'response_from_endpoint': json.dumps(response_data)
+                        })
+
+        except FileNotFoundError:
+            logger.error(f"Input file '{self.input_filename}' not found.")
+        except PermissionError:
+            logger.error(f"Permission denied to access file '{self.input_filename}'.")
+        except csv.Error as e:
+            logger.error(f"CSV parsing error: {str(e)}")
+        except Exception as e:
+            logger.exception(f"Unhandled error in processor: {str(e)}")
+
+        logger.info(f"Onboarding completed. See {self.output_filename} for results.")
diff --git a/backend/glific_migration/organization_onboarding/run_onboarding.py b/backend/glific_migration/organization_onboarding/run_onboarding.py
new file mode 100644
index 000000000..6fff64227
--- /dev/null
+++ b/backend/glific_migration/organization_onboarding/run_onboarding.py
@@ -0,0 +1,20 @@
+import logging
+from .processor import OnboardingProcessor
+
+logging.basicConfig(
+    filename='onboarding.log',
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+)
+
+def main():
+    input_filename = 'sample_input.csv'
+    output_filename = 'output_onboarding.csv'
+    api_url = 'http://localhost:8000/api/v1/onboard'
+    api_key = 'test_api_key'
+
+    processor = OnboardingProcessor(input_filename, output_filename, api_url, api_key)
+    processor.run()
+
+if __name__ == "__main__":
+    main()
diff --git a/backend/glific_migration/organization_onboarding/sample_input.csv b/backend/glific_migration/organization_onboarding/sample_input.csv
new file mode 100644
index 000000000..38a8974db
--- /dev/null
+++ b/backend/glific_migration/organization_onboarding/sample_input.csv
@@ -0,0 +1,4 @@
+organization_name,project_name,project_description,email,password,user_name
+Glific Foundation,Chatbot Project,Automated WhatsApp Chatbot,onboard1@glific.org,TestPass123,admin1
+TestOrg,EducationBot,AI for Education,onboard2@test.org,TestPass456,edubot
+Acme NGO,SurveyBot,Field Data Collection AI,onboard3@acme.org,TestPass789,survey_lead
diff --git a/backend/glific_migration/organization_onboarding/validator.py b/backend/glific_migration/organization_onboarding/validator.py
new file mode 100644
index 000000000..04aae53bd
--- /dev/null
+++ b/backend/glific_migration/organization_onboarding/validator.py
@@ -0,0 +1,33 @@
+from email_validator import validate_email, EmailNotValidError
+
+
+class CSVValidator:
+    def __init__(self, required_fields):
+        self.required_fields = required_fields
+
+    def validate_rows(self, rows):
+        errors = []
+        seen_projects = set()
+
+        for i, row in enumerate(rows, start=2):
+            for field in self.required_fields:
+                if field not in row or not row[field].strip():
+                    errors.append(f"Row {i}: Missing or empty value for '{field}'")
+
+            project_name = row.get('project_name', '').strip()
+            if project_name in seen_projects:
+                errors.append(f"Row {i}: Duplicate project name '{project_name}'")
+            else:
+                seen_projects.add(project_name)
+
+            email = row.get('email', '').strip()
+            try:
+                validate_email(email, check_deliverability=False)
+            except EmailNotValidError as e:
+                errors.append(f"Row {i}: Invalid email '{email}' - {str(e)}")
+
+            password = row.get('password', '')
+            if len(password) < 8:
+                errors.append(f"Row {i}: Password must be at least 8 characters")
+
+        return len(errors) == 0, errors

From 4b539ab4c93f1dcd381a81cbfad4090eaff599cc Mon Sep 17 00:00:00 2001
From: Aviraj <100823015+avirajsingh7@users.noreply.github.com>
Date: Wed, 23 Jul 2025 14:39:40 +0530
Subject: [PATCH 2/9] modify logic for onboarding endpoint glific migration

---
 backend/.gitignore                            |   2 +
 backend/glific_migration/__init__.py          |   0
 backend/glific_migration/base_processor.py    |  20 +++
 backend/glific_migration/client.py            |  37 ++++
 .../organization_onboarding/client.py         |  47 -----
 .../organization_onboarding/processor.py      | 169 ++++++++++--------
 .../organization_onboarding/run_onboarding.py |  28 ++-
 .../organization_onboarding/sample_input.csv  |   8 +-
 .../organization_onboarding/validator.py      |  33 ----
 backend/glific_migration/validator.py         |  15 ++
 10 files changed, 188 insertions(+), 171 deletions(-)
 create mode 100644 backend/glific_migration/__init__.py
 create mode 100644 backend/glific_migration/base_processor.py
 create mode 100644 backend/glific_migration/client.py
 delete mode 100644 backend/glific_migration/organization_onboarding/client.py
 delete mode 100644 backend/glific_migration/organization_onboarding/validator.py
 create mode 100644 backend/glific_migration/validator.py

diff --git a/backend/.gitignore b/backend/.gitignore
index 63f67bcd2..1f3151334 100644
--- a/backend/.gitignore
+++ b/backend/.gitignore
@@ -6,3 +6,5 @@ app.egg-info
 htmlcov
 .cache
 .venv
+*.logs
+*.log
\ No newline at end of file
diff --git a/backend/glific_migration/__init__.py b/backend/glific_migration/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/backend/glific_migration/base_processor.py b/backend/glific_migration/base_processor.py
new file mode 100644
index 000000000..995330ae5
--- /dev/null
+++ b/backend/glific_migration/base_processor.py
@@ -0,0 +1,20 @@
+import csv
+import logging
+
+logger = logging.getLogger(__name__)
+
+class BaseCSVProcessor:
+    def __init__(self, input_file, output_file):
+        self.input_file = input_file
+        self.output_file = output_file
+
+    def load_csv(self):
+        with open(self.input_file, newline='', encoding='utf-8') as f:
+            return list(csv.DictReader(f))
+
+    def save_output(self, headers, rows):
+        with open(self.output_file, 'w', newline='', encoding='utf-8') as f:
+            writer = csv.DictWriter(f, fieldnames=headers)
+            writer.writeheader()
+            for row in rows:
+                writer.writerow(row)
diff --git a/backend/glific_migration/client.py b/backend/glific_migration/client.py
new file mode 100644
index 000000000..3b9a89469
--- /dev/null
+++ b/backend/glific_migration/client.py
@@ -0,0 +1,37 @@
+import logging
+import requests
+from requests.adapters import HTTPAdapter
+from urllib3.util.retry import Retry
+
+logger = logging.getLogger(__name__)
+
+class APIClient:
+    def __init__(self, api_key: str):
+        self.headers = {
+            'accept': 'application/json',
+            'Content-Type': 'application/json',
+            'X-API-KEY': api_key
+        }
+        self.session = requests.Session()
+        retries = Retry(total=3, backoff_factor=1, status_forcelist=[429])
+        self.session.mount('https://', HTTPAdapter(max_retries=retries))
+        self.session.mount('http://', HTTPAdapter(max_retries=retries))
+
+    def post(self, url: str, data: dict = None):
+        try:
+            response = self.session.post(url, headers=self.headers, json=data, timeout=10)
+            response.raise_for_status()
+            return True, response.json()
+        except requests.exceptions.HTTPError as http_err:
+            try:
+                error_detail = response.json().get("error", "No error detail provided.")
+            except Exception:
+                error_detail = "Unable to parse error response."
+            logger.error(
+                "HTTP error while posting to %s: %s | Response error: %s",
+                url, str(http_err), error_detail, exc_info=True
+            )
+            return False, {"error": error_detail}
+        except requests.exceptions.RequestException as e:
+            logger.error("Request to %s failed: %s", url, str(e), exc_info=True)
+            return False, {"error": str(e)}
diff --git a/backend/glific_migration/organization_onboarding/client.py b/backend/glific_migration/organization_onboarding/client.py
deleted file mode 100644
index 78389ab4b..000000000
--- a/backend/glific_migration/organization_onboarding/client.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import json
-import logging
-import requests
-from requests.adapters import HTTPAdapter
-from urllib3.util.retry import Retry
-
-logger = logging.getLogger(__name__)
-
-
-class OnboardingClient:
-    def __init__(self, api_url, api_key):
-        self.api_url = api_url
-        self.headers = {
-            'accept': 'application/json',
-            'Content-Type': 'application/json',
-            'X-API-KEY': api_key,
-        }
-        self.session = requests.Session()
-        retries = Retry(total=3, backoff_factor=1, status_forcelist=[429])
-        self.session.mount('https://', HTTPAdapter(max_retries=retries))
-
-    def send(self, row):
-        payload = {
-            "organization_name": row['organization_name'],
-            "project_name": row['project_name'],
-            "email": row['email'],
-            "password": row['password'],
-            "user_name": row['user_name'],
-        }
-
-        try:
-            response = self.session.post(self.api_url, headers=self.headers, json=payload, timeout=10)
-            response_json = response.json()
-            success = (
-                response.status_code == 200 and
-                all(k in response_json for k in ['organization_id', 'project_id', 'user_id', 'api_key'])
-            )
-            return success, {k: v for k, v in response_json.items() if k != 'password'}
-        except requests.exceptions.Timeout:
-            return False, {"error": "Request timed out"}
-        except requests.exceptions.RequestException as e:
-            return False, {"error": str(e)}
-        except json.JSONDecodeError as e:
-            return False, {"error": f"Invalid JSON response: {str(e)}"}
-        except Exception as e:
-            logger.exception("Unexpected error during API call")
-            return False, {"error": f"Unexpected error: {str(e)}"}
diff --git a/backend/glific_migration/organization_onboarding/processor.py b/backend/glific_migration/organization_onboarding/processor.py
index e10bfca7d..4a1c5e725 100644
--- a/backend/glific_migration/organization_onboarding/processor.py
+++ b/backend/glific_migration/organization_onboarding/processor.py
@@ -1,89 +1,100 @@
-import csv
-import json
 import logging
-
-from .client import OnboardingClient
-from .validator import CSVValidator
+import json
+import csv
+from glific_migration.base_processor import BaseCSVProcessor
+from glific_migration.validator import validate_required_fields, validate_email_format, validate_password
+from glific_migration.client import APIClient
 
 logger = logging.getLogger(__name__)
 
-
-class OnboardingProcessor:
-    def __init__(self, input_filename, output_filename, api_url, api_key):
-        self.input_filename = input_filename
-        self.output_filename = output_filename
-        self.client = OnboardingClient(api_url, api_key)
-        self.csv_validator = CSVValidator(['organization_name', 'project_name', 'email', 'password', 'user_name'])
-        self.output_headers = [
+class OnboardProcessor(BaseCSVProcessor):
+    def __init__(self, input_file, output_file, api_url, api_key):
+        super().__init__(input_file, output_file)
+        self.client = APIClient(api_key)
+        self.api_url = api_url
+        self.headers = [
             'organization_name', 'organization_id',
             'project_name', 'project_id',
-            'user_name', 'user_id',
-            'api_key',
+            'user_name', 'user_id', 'api_key',
             'success', 'response_from_endpoint'
         ]
 
-    def create_error_row(self, row, error_message):
-        return {
-            'organization_name': row.get('organization_name', ''),
-            'organization_id': '',
-            'project_name': row.get('project_name', ''),
-            'project_id': '',
-            'user_name': row.get('user_name', ''),
-            'user_id': '',
-            'api_key': '',
-            'success': 'no',
-            'response_from_endpoint': error_message
-        }
-
     def run(self):
-        try:
-            with open(self.input_filename, 'r', newline='', encoding='utf-8') as infile:
-                reader = list(csv.DictReader(infile))
-
-                if not reader:
-                    logger.error("CSV file is empty.")
-                    return
-
-                is_valid, errors = self.csv_validator.validate_rows(reader)
-                if not is_valid:
-                    logger.error("CSV validation failed:")
-                    for e in errors:
-                        logger.error(f"  - {e}")
-                    print("Validation failed. Check onboarding.log for details.")
-                    return
-
-                with open(self.output_filename, 'w', newline='', encoding='utf-8') as outfile:
-                    writer = csv.DictWriter(outfile, fieldnames=self.output_headers)
-                    writer.writeheader()
-
-                    for row in reader:
-                        logger.info(f"Processing: Org='{row.get('organization_name')}', Project='{row.get('project_name')}'")
-                        success, response_data = self.client.send(row)
-
-                        if success:
-                            logger.info(f"Success: Org='{row['organization_name']}', Project='{row['project_name']}'")
-                        else:
-                            logger.warning(f"Failed: Org='{row['organization_name']}' - {response_data.get('error')}")
-
-                        writer.writerow({
-                            'organization_name': row['organization_name'],
-                            'organization_id': response_data.get('organization_id', ''),
-                            'project_name': row['project_name'],
-                            'project_id': response_data.get('project_id', ''),
-                            'user_name': row['user_name'],
-                            'user_id': response_data.get('user_id', ''),
-                            'api_key': response_data.get('api_key', ''),
-                            'success': 'yes' if success else 'no',
-                            'response_from_endpoint': json.dumps(response_data)
-                        })
-
-        except FileNotFoundError:
-            logger.error(f"Input file '{self.input_filename}' not found.")
-        except PermissionError:
-            logger.error(f"Permission denied to access file '{self.input_filename}'.")
-        except csv.Error as e:
-            logger.error(f"CSV parsing error: {str(e)}")
-        except Exception as e:
-            logger.exception(f"Unhandled error in processor: {str(e)}")
-
-        logger.info(f"Onboarding completed. See {self.output_filename} for results.")
+        logger.info("Loading CSV input...")
+        rows = self.load_csv()
+
+        logger.info("Validating CSV rows...")
+        if not self.validate_csv(rows):
+            logger.error("Validation failed. Aborting processing.")
+            return
+
+        logger.info("Creating output CSV and writing headers...")
+        self.init_output_csv()
+
+        logger.info("Processing rows and writing results...")
+        self.process_rows(rows)
+
+        logger.info("Processing complete. Output written to %s", self.output_file)
+
+
+    def validate_csv(self, rows: list[dict]) -> bool:
+        seen_projects = set()
+        validation_errors = []
+
+        for i, row in enumerate(rows, start=2):
+            row_errors = []
+
+            missing = validate_required_fields(row, ['organization_name', 'project_name', 'email', 'password', 'user_name'])
+            if missing:
+                row_errors.append(f"Row {i}: Missing fields: {', '.join(missing)}")
+
+            project_name = row.get('project_name', '')
+            if project_name in seen_projects:
+                row_errors.append(f"Row {i}: Duplicate project name '{project_name}'")
+            else:
+                seen_projects.add(project_name)
+
+            ok, msg = validate_email_format(row.get('email', ''))
+            if not ok:
+                row_errors.append(f"Row {i}: Invalid email: {msg}")
+
+            if not validate_password(row.get('password', '')):
+                row_errors.append(f"Row {i}: Password must be at least 8 characters")
+
+            if row_errors:
+                validation_errors.extend(row_errors)
+
+        if validation_errors:
+            logger.error("CSV validation failed with the following issues:")
+            for error in validation_errors:
+                logger.error(" - %s", error)
+            return False
+
+        logger.info("CSV validation passed.")
+        return True
+
+    def init_output_csv(self):
+        """Initialize CSV file with headers (overwrite if already exists)."""
+        with open(self.output_file, 'w', newline='', encoding='utf-8') as f:
+            writer = csv.DictWriter(f, fieldnames=self.headers)
+            writer.writeheader()
+
+    def process_rows(self, rows: list[dict]):
+        with open(self.output_file, 'a', newline='', encoding='utf-8') as f:
+            writer = csv.DictWriter(f, fieldnames=self.headers)
+
+            for idx, row in enumerate(rows, start=1):
+                logger.info("Sending API request for row %d (project: %s)...", idx, row.get('project_name', ''))
+                success, resp = self.client.post(self.api_url, data=row)
+                logger.info("Row %d processed. Success: %s", idx, success)
+
+                row_result = {
+                    **row,
+                    'success': 'yes' if success else 'no',
+                    'response_from_endpoint': str(resp)
+                }
+                row_result.update(resp if success else {})
+
+                filtered = {k: row_result.get(k, '') for k in self.headers}
+                writer.writerow(filtered)
+                
\ No newline at end of file
diff --git a/backend/glific_migration/organization_onboarding/run_onboarding.py b/backend/glific_migration/organization_onboarding/run_onboarding.py
index 6fff64227..39a918786 100644
--- a/backend/glific_migration/organization_onboarding/run_onboarding.py
+++ b/backend/glific_migration/organization_onboarding/run_onboarding.py
@@ -1,20 +1,32 @@
 import logging
-from .processor import OnboardingProcessor
+from pathlib import Path
+from glific_migration.organization_onboarding.processor import OnboardProcessor
 
+base_dir = Path(__file__).parent.resolve()
+
+log_file = base_dir / "onboarding.logs"
 logging.basicConfig(
-    filename='onboarding.log',
+    filename=str(log_file),
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s',
 )
 
+logger = logging.getLogger(__name__)
+
 def main():
-    input_filename = 'sample_input.csv'
-    output_filename = 'output_onboarding.csv'
-    api_url = 'http://localhost:8000/api/v1/onboard'
-    api_key = 'test_api_key'
+    logger.info("Starting onboarding process...")
+
+    input_file = base_dir / "sample_input.csv"
+    output_file = base_dir / "orgs_output.csv"
+
+    OnboardProcessor(
+        input_file=str(input_file),
+        output_file=str(output_file),
+        api_url="http://localhost:8000/api/v1/onboard",
+        api_key="ApiKey No3x47A5qoIGhm0kVKjQ77dhCqEdWRIQZlEPzzzh7i8"
+    ).run()
 
-    processor = OnboardingProcessor(input_filename, output_filename, api_url, api_key)
-    processor.run()
+    logger.info("Onboarding process completed successfully.")
 
 if __name__ == "__main__":
     main()
diff --git a/backend/glific_migration/organization_onboarding/sample_input.csv b/backend/glific_migration/organization_onboarding/sample_input.csv
index 38a8974db..a46c5339c 100644
--- a/backend/glific_migration/organization_onboarding/sample_input.csv
+++ b/backend/glific_migration/organization_onboarding/sample_input.csv
@@ -1,4 +1,4 @@
-organization_name,project_name,project_description,email,password,user_name
-Glific Foundation,Chatbot Project,Automated WhatsApp Chatbot,onboard1@glific.org,TestPass123,admin1
-TestOrg,EducationBot,AI for Education,onboard2@test.org,TestPass456,edubot
-Acme NGO,SurveyBot,Field Data Collection AI,onboard3@acme.org,TestPass789,survey_lead
+organization_name,project_name,email,password,user_name
+Glific Foundation,Chatbot Project,onboard1@glific.org,TestPass123,admin1
+TestOrg,EducationBot,onboard2@test.org,TestPass456,edubot
+Acme NGO,SurveyBot,onboard3@acme.org,TestPass789,survey_lead
diff --git a/backend/glific_migration/organization_onboarding/validator.py b/backend/glific_migration/organization_onboarding/validator.py
deleted file mode 100644
index 04aae53bd..000000000
--- a/backend/glific_migration/organization_onboarding/validator.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from email_validator import validate_email, EmailNotValidError
-
-
-class CSVValidator:
-    def __init__(self, required_fields):
-        self.required_fields = required_fields
-
-    def validate_rows(self, rows):
-        errors = []
-        seen_projects = set()
-
-        for i, row in enumerate(rows, start=2):
-            for field in self.required_fields:
-                if field not in row or not row[field].strip():
-                    errors.append(f"Row {i}: Missing or empty value for '{field}'")
-
-            project_name = row.get('project_name', '').strip()
-            if project_name in seen_projects:
-                errors.append(f"Row {i}: Duplicate project name '{project_name}'")
-            else:
-                seen_projects.add(project_name)
-
-            email = row.get('email', '').strip()
-            try:
-                validate_email(email, check_deliverability=False)
-            except EmailNotValidError as e:
-                errors.append(f"Row {i}: Invalid email '{email}' - {str(e)}")
-
-            password = row.get('password', '')
-            if len(password) < 8:
-                errors.append(f"Row {i}: Password must be at least 8 characters")
-
-        return len(errors) == 0, errors
diff --git a/backend/glific_migration/validator.py b/backend/glific_migration/validator.py
new file mode 100644
index 000000000..0df4233d8
--- /dev/null
+++ b/backend/glific_migration/validator.py
@@ -0,0 +1,15 @@
+from email_validator import validate_email, EmailNotValidError
+
+def validate_required_fields(row, fields):
+    missing = [f for f in fields if f not in row or not row[f].strip()]
+    return missing
+
+def validate_email_format(email: str):
+    try:
+        validate_email(email, check_deliverability=False)
+        return True, None
+    except EmailNotValidError as e:
+        return False, str(e)
+
+def validate_password(password: str):
+    return len(password) >= 8

From 2e2213c12f09d7c9de669c8fac8ed3617a11f0cf Mon Sep 17 00:00:00 2001
From: Aviraj <100823015+avirajsingh7@users.noreply.github.com>
Date: Wed, 23 Jul 2025 15:01:14 +0530
Subject: [PATCH 3/9] add processor for credential addition

---
 .../organization_onboarding/run_onboarding.py |  2 +-
 .../sync_credentials/processor.py             | 86 +++++++++++++++++++
 .../sync_credentials/run_credentials.py       | 34 ++++++++
 .../sync_credentials/sample_input.csv         |  4 +
 4 files changed, 125 insertions(+), 1 deletion(-)
 create mode 100644 backend/glific_migration/sync_credentials/processor.py
 create mode 100644 backend/glific_migration/sync_credentials/run_credentials.py
 create mode 100644 backend/glific_migration/sync_credentials/sample_input.csv

diff --git a/backend/glific_migration/organization_onboarding/run_onboarding.py b/backend/glific_migration/organization_onboarding/run_onboarding.py
index 39a918786..b1e6c02e0 100644
--- a/backend/glific_migration/organization_onboarding/run_onboarding.py
+++ b/backend/glific_migration/organization_onboarding/run_onboarding.py
@@ -23,7 +23,7 @@ def main():
         input_file=str(input_file),
         output_file=str(output_file),
         api_url="http://localhost:8000/api/v1/onboard",
-        api_key="ApiKey No3x47A5qoIGhm0kVKjQ77dhCqEdWRIQZlEPzzzh7i8"
+        api_key="api_key"
     ).run()
 
     logger.info("Onboarding process completed successfully.")
diff --git a/backend/glific_migration/sync_credentials/processor.py b/backend/glific_migration/sync_credentials/processor.py
new file mode 100644
index 000000000..5367d3f8f
--- /dev/null
+++ b/backend/glific_migration/sync_credentials/processor.py
@@ -0,0 +1,86 @@
+import logging
+import csv
+from glific_migration.base_processor import BaseCSVProcessor
+from glific_migration.client import APIClient
+
+logger = logging.getLogger(__name__)
+
+class CredentialProcessor(BaseCSVProcessor):
+    def __init__(self, input_file, output_file, api_url, api_key, openai_key):
+        super().__init__(input_file, output_file)
+        self.client = APIClient(api_key)
+        self.api_url = api_url
+        self.openai_key = openai_key
+        self.headers = [
+            'organization_id', 'project_id',
+            'success', 'response_from_endpoint'
+        ]
+
+    def run(self):
+        logger.info("Loading CSV input...")
+        rows = self.load_csv()
+
+        logger.info("Validating input data...")
+        self.validate_csv(rows)
+
+        logger.info("Initializing output file if needed...")
+        self.init_output_csv()
+
+        logger.info("Processing rows for credential creation...")
+        self.process_rows(rows)
+
+        logger.info("Credential processing complete.")
+
+    def validate_csv(self, rows: list[dict]):
+        required_fields = {"organization_id", "project_id"}
+
+        for idx, row in enumerate(rows, start=1):
+            missing = required_fields - row.keys()
+            if missing:
+                logger.error(f"Row {idx} is missing required fields: {missing}")
+                raise ValueError(f"Row {idx} is missing required fields: {missing}")
+
+            try:
+                int(row['organization_id'])
+                int(row['project_id'])
+            except ValueError:
+                logger.error(f"Row {idx} has non-integer organization_id or project_id: {row}")
+                raise ValueError(f"Row {idx} has non-integer organization_id or project_id")
+
+    def init_output_csv(self):
+        """Initialize CSV file with headers (overwrite if already exists)."""
+        with open(self.output_file, 'w', newline='', encoding='utf-8') as f:
+            writer = csv.DictWriter(f, fieldnames=self.headers)
+            writer.writeheader()
+
+    def process_rows(self, rows: list[dict]):
+        with open(self.output_file, 'a', newline='', encoding='utf-8') as f:
+            writer = csv.DictWriter(f, fieldnames=self.headers)
+
+            for idx, row in enumerate(rows, start=1):
+                org_id = int(row['organization_id'])
+                proj_id = int(row['project_id'])
+
+                payload = {
+                    "organization_id": org_id,
+                    "project_id": proj_id,
+                    "is_active": True,
+                    "credential": {
+                        "openai": {
+                            "api_key": self.openai_key
+                        }
+                    }
+                }
+
+                logger.info("Sending credential request for row %d (org: %s, project: %s)...", idx, org_id, proj_id)
+                success, resp = self.client.post(self.api_url, data=payload)
+                logger.info("Row %d processed. Success: %s", idx, success)
+
+                result = {
+                    "organization_id": org_id,
+                    "project_id": proj_id,
+                    "success": 'yes' if success else 'no',
+                    "response_from_endpoint": str(resp)
+                }
+
+                writer.writerow(result)
diff --git a/backend/glific_migration/sync_credentials/run_credentials.py b/backend/glific_migration/sync_credentials/run_credentials.py
new file mode 100644
index 000000000..efc43d0b4
--- /dev/null
+++ b/backend/glific_migration/sync_credentials/run_credentials.py
@@ -0,0 +1,34 @@
+import logging
+from pathlib import Path
+from glific_migration.sync_credentials.processor import CredentialProcessor
+
+# Resolve script's base directory
+base_dir = Path(__file__).parent.resolve()
+
+# Log file inside same folder
+log_file = base_dir / "credentials.logs"
+logging.basicConfig(
+    filename=str(log_file),
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+)
+
+logger = logging.getLogger(__name__)
+
+if __name__ == "__main__":
+
+    input_csv = base_dir / "sample_input.csv"
+    output_csv = base_dir / "credentials_output.csv"
+
+    api_url = "http://localhost:8000/api/v1/credentials/"
+    api_key = "api_key"
+    openai_key = "adfgdasfds"
+
+    processor = CredentialProcessor(
+        input_file=str(input_csv),
+        output_file=str(output_csv),
+        api_url=api_url,
+        api_key=api_key,
+        openai_key=openai_key
+    )
+    processor.run()
diff --git a/backend/glific_migration/sync_credentials/sample_input.csv b/backend/glific_migration/sync_credentials/sample_input.csv
new file mode 100644
index 000000000..4ae16e7dd
--- /dev/null
+++ b/backend/glific_migration/sync_credentials/sample_input.csv
@@ -0,0 +1,4 @@
+organization_id,project_id
+2,3
+3,4
+4,5

From a4d157bedee4c9084977a5a15b99913f90b34503 Mon Sep 17 00:00:00 2001
From: Aviraj <100823015+avirajsingh7@users.noreply.github.com>
Date: Wed, 23 Jul 2025 15:23:13 +0530
Subject: [PATCH 4/9] add script to sync assistants

---
 .../sync_assistant/processor.py               | 73 +++++++++++++++++++
 .../sync_assistant/sample_input.csv           |  4 +
 .../sync_assistant/sync_assistant.py          | 25 +++++++
 .../sync_credentials/run_credentials.py       |  4 +-
 4 files changed, 104 insertions(+), 2 deletions(-)
 create mode 100644 backend/glific_migration/sync_assistant/processor.py
 create mode 100644 backend/glific_migration/sync_assistant/sample_input.csv
 create mode 100644 backend/glific_migration/sync_assistant/sync_assistant.py

diff --git a/backend/glific_migration/sync_assistant/processor.py b/backend/glific_migration/sync_assistant/processor.py
new file mode 100644
index 000000000..3a83b5c8b
--- /dev/null
+++ b/backend/glific_migration/sync_assistant/processor.py
@@ -0,0 +1,73 @@
+import logging
+import csv
+from glific_migration.base_processor import BaseCSVProcessor
+from glific_migration.client import APIClient
+
+logger = logging.getLogger(__name__)
+
+class AssistantIngestProcessor(BaseCSVProcessor):
+    def __init__(self, input_file, output_file, base_url):
+        super().__init__(input_file, output_file)
+        self.base_url = base_url
+        self.headers = [
+            'assistant_id', 'api_key',
+            'success', 'response_from_endpoint'
+        ]
+
+    def run(self):
+        logger.info("Loading assistant ingest CSV input...")
+        rows = self.load_csv()
+
+        logger.info("Validating CSV rows...")
+        self.validate_csv(rows)
+
+        logger.info("Initializing output file...")
+        self.init_output_csv()
+
+        logger.info("Processing rows for assistant ingestion...")
+        self.process_rows(rows)
+
+        logger.info("Assistant ingestion processing complete.")
+
+    def validate_csv(self, rows: list[dict]):
+        required_fields = {"assistant_id", "api_key"}
+
+        for idx, row in enumerate(rows, start=1):
+            missing = required_fields - row.keys()
+            if missing:
+                logger.error(f"Row {idx} missing required fields: {missing}")
+                raise ValueError(f"Row {idx} missing required fields: {missing}")
+
+            if not row['assistant_id'].strip() or not row['api_key'].strip():
+                logger.error(f"Row {idx} has empty assistant_id or api_key")
+                raise ValueError(f"Row {idx} has empty assistant_id or api_key")
+
+    def init_output_csv(self):
+        with open(self.output_file, 'w', newline='', encoding='utf-8') as f:
+            writer = csv.DictWriter(f, fieldnames=self.headers)
+            writer.writeheader()
+
+    def process_rows(self, rows: list[dict]):
+        with open(self.output_file, 'a', newline='', encoding='utf-8') as f:
+            writer = csv.DictWriter(f, fieldnames=self.headers)
+
+            for idx, row in enumerate(rows, start=1):
+                assistant_id = row['assistant_id']
+                api_key = row['api_key']
+
+                logger.info("Ingesting assistant for row %d (assistant_id: %s)", idx, assistant_id)
+
+                url = f"{self.base_url.rstrip('/')}/assistant/{assistant_id}/ingest"
+                client = APIClient(api_key=api_key)
+
+                success, resp = client.post(url)
+                logger.info("Row %d processed. Success: %s", idx, success)
+
+                result = {
+                    "assistant_id": assistant_id,
+                    "api_key": api_key,
+                    "success": 'yes' if success else 'no',
+                    "response_from_endpoint": str(resp)
+                }
+
+                writer.writerow(result)
diff --git a/backend/glific_migration/sync_assistant/sample_input.csv b/backend/glific_migration/sync_assistant/sample_input.csv
new file mode 100644
index 000000000..1de470df7
--- /dev/null
+++ b/backend/glific_migration/sync_assistant/sample_input.csv
@@ -0,0 +1,4 @@
+assistant_id,api_key
+asst_1,ApiKey abc123
+asst_2,ApiKey def456
+asst_3,ApiKey ghi789
diff --git a/backend/glific_migration/sync_assistant/sync_assistant.py b/backend/glific_migration/sync_assistant/sync_assistant.py
new file mode 100644
index 000000000..c76872af8
--- /dev/null
+++ b/backend/glific_migration/sync_assistant/sync_assistant.py
@@ -0,0 +1,25 @@
+import logging
+from pathlib import Path
+from glific_migration.sync_assistant.processor import AssistantIngestProcessor
+
+base_dir = Path(__file__).parent.resolve()
+log_file = base_dir / "sync_assistant.logs"
+logging.basicConfig(
+    filename=str(log_file),
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+)
+
+logger = logging.getLogger(__name__)
+
+if __name__ == "__main__":
+    input_csv = base_dir / "sample_input.csv"
+    output_csv = base_dir / "assistants_output.csv"
+    api_base_url = "http://localhost:8000/api/v1"
+
+    processor = AssistantIngestProcessor(
+        input_file=input_csv,
+        output_file=output_csv,
+        base_url=api_base_url
+    )
+    processor.run()
diff --git a/backend/glific_migration/sync_credentials/run_credentials.py b/backend/glific_migration/sync_credentials/run_credentials.py
index efc43d0b4..8cb82b215 100644
--- a/backend/glific_migration/sync_credentials/run_credentials.py
+++ b/backend/glific_migration/sync_credentials/run_credentials.py
@@ -21,8 +21,8 @@
     output_csv = base_dir / "credentials_output.csv"
 
     api_url = "http://localhost:8000/api/v1/credentials/"
-    api_key = "api_key"
-    openai_key = "adfgdasfds"
+    api_key = "test_api_key"
+    openai_key = "openai_api_key"
 
     processor = CredentialProcessor(
         input_file=str(input_csv),

From b8bdc8133358e7160a4abb78e8cbad85cc6627d1 Mon Sep 17 00:00:00 2001
From: Aviraj <100823015+avirajsingh7@users.noreply.github.com>
Date: Wed, 23 Jul 2025 15:24:40 +0530
Subject: [PATCH 5/9] pre commit

---
 backend/.gitignore                            |  2 +-
 backend/glific_migration/base_processor.py    |  5 +-
 backend/glific_migration/client.py            | 20 +++++---
 .../organization_onboarding/processor.py      | 49 ++++++++++++-------
 .../organization_onboarding/run_onboarding.py |  6 ++-
 .../sync_assistant/processor.py               | 26 +++++-----
 .../sync_assistant/sync_assistant.py          |  6 +--
 .../sync_credentials/processor.py             | 44 ++++++++++-------
 .../sync_credentials/run_credentials.py       |  5 +-
 backend/glific_migration/validator.py         |  3 ++
 10 files changed, 100 insertions(+), 66 deletions(-)

diff --git a/backend/.gitignore b/backend/.gitignore
index 1f3151334..395de54ed 100644
--- a/backend/.gitignore
+++ b/backend/.gitignore
@@ -7,4 +7,4 @@ htmlcov
 .cache
 .venv
 *.logs
-*.log
\ No newline at end of file
+*.log
diff --git a/backend/glific_migration/base_processor.py b/backend/glific_migration/base_processor.py
index 995330ae5..9c44375b4 100644
--- a/backend/glific_migration/base_processor.py
+++ b/backend/glific_migration/base_processor.py
@@ -3,17 +3,18 @@
 
 logger = logging.getLogger(__name__)
 
+
 class BaseCSVProcessor:
     def __init__(self, input_file, output_file):
         self.input_file = input_file
         self.output_file = output_file
 
     def load_csv(self):
-        with open(self.input_file, newline='', encoding='utf-8') as f:
+        with open(self.input_file, newline="", encoding="utf-8") as f:
             return list(csv.DictReader(f))
 
     def save_output(self, headers, rows):
-        with open(self.output_file, 'w', newline='', encoding='utf-8') as f:
+        with open(self.output_file, "w", newline="", encoding="utf-8") as f:
             writer = csv.DictWriter(f, fieldnames=headers)
             writer.writeheader()
             for row in rows:
diff --git a/backend/glific_migration/client.py b/backend/glific_migration/client.py
index 3b9a89469..10de7f1e5 100644
--- a/backend/glific_migration/client.py
+++ b/backend/glific_migration/client.py
@@ -5,21 +5,24 @@
 
 logger = logging.getLogger(__name__)
 
+
 class APIClient:
     def __init__(self, api_key: str):
         self.headers = {
-            'accept': 'application/json',
-            'Content-Type': 'application/json',
-            'X-API-KEY': api_key
+            "accept": "application/json",
+            "Content-Type": "application/json",
+            "X-API-KEY": api_key,
         }
         self.session = requests.Session()
         retries = Retry(total=3, backoff_factor=1, status_forcelist=[429])
-        self.session.mount('https://', HTTPAdapter(max_retries=retries))
-        self.session.mount('http://', HTTPAdapter(max_retries=retries))
+        self.session.mount("https://", HTTPAdapter(max_retries=retries))
+        self.session.mount("http://", HTTPAdapter(max_retries=retries))
 
     def post(self, url: str, data: dict = None):
         try:
-            response = self.session.post(url, headers=self.headers, json=data, timeout=10)
+            response = self.session.post(
+                url, headers=self.headers, json=data, timeout=10
+            )
             response.raise_for_status()
             return True, response.json()
         except requests.exceptions.HTTPError as http_err:
@@ -29,7 +32,10 @@ def post(self, url: str, data: dict = None):
                 error_detail = "Unable to parse error response."
             logger.error(
                 "HTTP error while posting to %s: %s | Response error: %s",
-                url, str(http_err), error_detail, exc_info=True
+                url,
+                str(http_err),
+                error_detail,
+                exc_info=True,
             )
             return False, {"error": error_detail}
         except requests.exceptions.RequestException as e:
diff --git a/backend/glific_migration/organization_onboarding/processor.py b/backend/glific_migration/organization_onboarding/processor.py
index 4a1c5e725..884129c61 100644
--- a/backend/glific_migration/organization_onboarding/processor.py
+++ b/backend/glific_migration/organization_onboarding/processor.py
@@ -2,21 +2,31 @@
 import json
 import csv
 from glific_migration.base_processor import BaseCSVProcessor
-from glific_migration.validator import validate_required_fields, validate_email_format, validate_password
+from glific_migration.validator import (
+    validate_required_fields,
+    validate_email_format,
+    validate_password,
+)
 from glific_migration.client import APIClient
 
 logger = logging.getLogger(__name__)
 
+
 class OnboardProcessor(BaseCSVProcessor):
     def __init__(self, input_file, output_file, api_url, api_key):
         super().__init__(input_file, output_file)
         self.client = APIClient(api_key)
         self.api_url = api_url
         self.headers = [
-            'organization_name', 'organization_id',
-            'project_name', 'project_id',
-            'user_name', 'user_id', 'api_key',
-            'success', 'response_from_endpoint'
+            "organization_name",
+            "organization_id",
+            "project_name",
+            "project_id",
+            "user_name",
+            "user_id",
+            "api_key",
+            "success",
+            "response_from_endpoint",
         ]
 
     def run(self):
@@ -36,7 +46,6 @@ def run(self):
 
         logger.info("Processing complete. Output written to %s", self.output_file)
 
-
     def validate_csv(self, rows: list[dict]) -> bool:
         seen_projects = set()
         validation_errors = []
@@ -44,21 +53,24 @@ def validate_csv(self, rows: list[dict]) -> bool:
         for i, row in enumerate(rows, start=2):
             row_errors = []
 
-            missing = validate_required_fields(row, ['organization_name', 'project_name', 'email', 'password', 'user_name'])
+            missing = validate_required_fields(
+                row,
+                ["organization_name", "project_name", "email", "password", "user_name"],
+            )
             if missing:
                 row_errors.append(f"Row {i}: Missing fields: {', '.join(missing)}")
 
-            project_name = row.get('project_name', '')
+            project_name = row.get("project_name", "")
             if project_name in seen_projects:
                 row_errors.append(f"Row {i}: Duplicate project name '{project_name}'")
             else:
                 seen_projects.add(project_name)
 
-            ok, msg = validate_email_format(row.get('email', ''))
+            ok, msg = validate_email_format(row.get("email", ""))
             if not ok:
                 row_errors.append(f"Row {i}: Invalid email: {msg}")
 
-            if not validate_password(row.get('password', '')):
+            if not validate_password(row.get("password", "")):
                 row_errors.append(f"Row {i}: Password must be at least 8 characters")
 
             if row_errors:
@@ -75,26 +87,29 @@ def validate_csv(self, rows: list[dict]) -> bool:
 
     def init_output_csv(self):
         """Initialize CSV file with headers (overwrite if already exists)."""
-        with open(self.output_file, 'w', newline='', encoding='utf-8') as f:
+        with open(self.output_file, "w", newline="", encoding="utf-8") as f:
             writer = csv.DictWriter(f, fieldnames=self.headers)
             writer.writeheader()
 
     def process_rows(self, rows: list[dict]):
-        with open(self.output_file, 'a', newline='', encoding='utf-8') as f:
+        with open(self.output_file, "a", newline="", encoding="utf-8") as f:
             writer = csv.DictWriter(f, fieldnames=self.headers)
 
             for idx, row in enumerate(rows, start=1):
-                logger.info("Sending API request for row %d (project: %s)...", idx, row.get('project_name', ''))
+                logger.info(
+                    "Sending API request for row %d (project: %s)...",
+                    idx,
+                    row.get("project_name", ""),
+                )
                 success, resp = self.client.post(self.api_url, data=row)
                 logger.info("Row %d processed. Success: %s", idx, success)
 
                 row_result = {
                     **row,
-                    'success': 'yes' if success else 'no',
-                    'response_from_endpoint': str(resp)
+                    "success": "yes" if success else "no",
+                    "response_from_endpoint": str(resp),
                 }
                 row_result.update(resp if success else {})
 
-                filtered = {k: row_result.get(k, '') for k in self.headers}
+                filtered = {k: row_result.get(k, "") for k in self.headers}
                 writer.writerow(filtered)
-                
\ No newline at end of file
diff --git a/backend/glific_migration/organization_onboarding/run_onboarding.py b/backend/glific_migration/organization_onboarding/run_onboarding.py
index b1e6c02e0..bb7855409 100644
--- a/backend/glific_migration/organization_onboarding/run_onboarding.py
+++ b/backend/glific_migration/organization_onboarding/run_onboarding.py
@@ -8,11 +8,12 @@
 logging.basicConfig(
     filename=str(log_file),
     level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s',
+    format="%(asctime)s - %(levelname)s - %(message)s",
 )
 
 logger = logging.getLogger(__name__)
 
+
 def main():
     logger.info("Starting onboarding process...")
 
@@ -23,10 +24,11 @@ def main():
         input_file=str(input_file),
         output_file=str(output_file),
         api_url="http://localhost:8000/api/v1/onboard",
-        api_key="api_key"
+        api_key="api_key",
     ).run()
 
     logger.info("Onboarding process completed successfully.")
 
+
 if __name__ == "__main__":
     main()
diff --git a/backend/glific_migration/sync_assistant/processor.py b/backend/glific_migration/sync_assistant/processor.py
index 3a83b5c8b..c93c426a7 100644
--- a/backend/glific_migration/sync_assistant/processor.py
+++ b/backend/glific_migration/sync_assistant/processor.py
@@ -5,14 +5,12 @@
 
 logger = logging.getLogger(__name__)
 
+
 class AssistantIngestProcessor(BaseCSVProcessor):
     def __init__(self, input_file, output_file, base_url):
         super().__init__(input_file, output_file)
         self.base_url = base_url
-        self.headers = [
-            'assistant_id', 'api_key',
-            'success', 'response_from_endpoint'
-        ]
+        self.headers = ["assistant_id", "api_key", "success", "response_from_endpoint"]
 
     def run(self):
         logger.info("Loading assistant ingest CSV input...")
@@ -38,24 +36,28 @@ def validate_csv(self, rows: list[dict]):
                 logger.error(f"Row {idx} missing required fields: {missing}")
                 raise ValueError(f"Row {idx} missing required fields: {missing}")
 
-            if not row['assistant_id'].strip() or not row['api_key'].strip():
+            if not row["assistant_id"].strip() or not row["api_key"].strip():
                 logger.error(f"Row {idx} has empty assistant_id or api_key")
                 raise ValueError(f"Row {idx} has empty assistant_id or api_key")
 
     def init_output_csv(self):
-        with open(self.output_file, 'w', newline='', encoding='utf-8') as f:
+        with open(self.output_file, "w", newline="", encoding="utf-8") as f:
             writer = csv.DictWriter(f, fieldnames=self.headers)
             writer.writeheader()
 
     def process_rows(self, rows: list[dict]):
-        with open(self.output_file, 'a', newline='', encoding='utf-8') as f:
+        with open(self.output_file, "a", newline="", encoding="utf-8") as f:
             writer = csv.DictWriter(f, fieldnames=self.headers)
 
             for idx, row in enumerate(rows, start=1):
-                assistant_id = row['assistant_id']
-                api_key = row['api_key']
+                assistant_id = row["assistant_id"]
+                api_key = row["api_key"]
 
-                logger.info("Ingesting assistant for row %d (assistant_id: %s)", idx, assistant_id)
+                logger.info(
+                    "Ingesting assistant for row %d (assistant_id: %s)",
+                    idx,
+                    assistant_id,
+                )
 
                 url = f"{self.base_url.rstrip('/')}/assistant/{assistant_id}/ingest"
                 client = APIClient(api_key=api_key)
@@ -66,8 +68,8 @@ def process_rows(self, rows: list[dict]):
                 result = {
                     "assistant_id": assistant_id,
                     "api_key": api_key,
-                    "success": 'yes' if success else 'no',
-                    "response_from_endpoint": str(resp)
+                    "success": "yes" if success else "no",
+                    "response_from_endpoint": str(resp),
                 }
 
                 writer.writerow(result)
diff --git a/backend/glific_migration/sync_assistant/sync_assistant.py b/backend/glific_migration/sync_assistant/sync_assistant.py
index c76872af8..037562b23 100644
--- a/backend/glific_migration/sync_assistant/sync_assistant.py
+++ b/backend/glific_migration/sync_assistant/sync_assistant.py
@@ -7,7 +7,7 @@
 logging.basicConfig(
     filename=str(log_file),
     level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s',
+    format="%(asctime)s - %(levelname)s - %(message)s",
 )
 
 logger = logging.getLogger(__name__)
@@ -18,8 +18,6 @@
     api_base_url = "http://localhost:8000/api/v1"
 
     processor = AssistantIngestProcessor(
-        input_file=input_csv,
-        output_file=output_csv,
-        base_url=api_base_url
+        input_file=input_csv, output_file=output_csv, base_url=api_base_url
     )
     processor.run()
diff --git a/backend/glific_migration/sync_credentials/processor.py b/backend/glific_migration/sync_credentials/processor.py
index 5367d3f8f..d2ccf4990 100644
--- a/backend/glific_migration/sync_credentials/processor.py
+++ b/backend/glific_migration/sync_credentials/processor.py
@@ -5,6 +5,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 class CredentialProcessor(BaseCSVProcessor):
     def __init__(self, input_file, output_file, api_url, api_key, openai_key):
         super().__init__(input_file, output_file)
@@ -12,8 +13,10 @@ def __init__(self, input_file, output_file, api_url, api_key, openai_key):
         self.api_url = api_url
         self.openai_key = openai_key
         self.headers = [
-            'organization_id', 'project_id',
-            'success', 'response_from_endpoint'
+            "organization_id",
+            "project_id",
+            "success",
+            "response_from_endpoint",
         ]
 
     def run(self):
@@ -41,46 +44,51 @@ def validate_csv(self, rows: list[dict]):
                 raise ValueError(f"Row {idx} is missing required fields: {missing}")
 
             try:
-                int(row['organization_id'])
-                int(row['project_id'])
+                int(row["organization_id"])
+                int(row["project_id"])
             except ValueError:
-                logger.error(f"Row {idx} has non-integer organization_id or project_id: {row}")
-                raise ValueError(f"Row {idx} has non-integer organization_id or project_id")
+                logger.error(
+                    f"Row {idx} has non-integer organization_id or project_id: {row}"
+                )
+                raise ValueError(
+                    f"Row {idx} has non-integer organization_id or project_id"
+                )
 
     def init_output_csv(self):
         """Initialize CSV file with headers (overwrite if already exists)."""
-        with open(self.output_file, 'w', newline='', encoding='utf-8') as f:
+        with open(self.output_file, "w", newline="", encoding="utf-8") as f:
             writer = csv.DictWriter(f, fieldnames=self.headers)
             writer.writeheader()
 
     def process_rows(self, rows: list[dict]):
-        with open(self.output_file, 'a', newline='', encoding='utf-8') as f:
+        with open(self.output_file, "a", newline="", encoding="utf-8") as f:
             writer = csv.DictWriter(f, fieldnames=self.headers)
 
             for idx, row in enumerate(rows, start=1):
-                org_id = int(row['organization_id'])
-                proj_id = int(row['project_id'])
+                org_id = int(row["organization_id"])
+                proj_id = int(row["project_id"])
 
                 payload = {
                     "organization_id": org_id,
                     "project_id": proj_id,
                     "is_active": True,
-                    "credential": {
-                        "openai": {
-                            "api_key": self.openai_key
-                        }
-                    }
+                    "credential": {"openai": {"api_key": self.openai_key}},
                 }
 
-                logger.info("Sending credential request for row %d (org: %s, project: %s)...", idx, org_id, proj_id)
+                logger.info(
+                    "Sending credential request for row %d (org: %s, project: %s)...",
+                    idx,
+                    org_id,
+                    proj_id,
+                )
                 success, resp = self.client.post(self.api_url, data=payload)
                 logger.info("Row %d processed. Success: %s", idx, success)
 
                 result = {
                     "organization_id": org_id,
                     "project_id": proj_id,
-                    "success": 'yes' if success else 'no',
-                    "response_from_endpoint": str(resp)
+                    "success": "yes" if success else "no",
+                    "response_from_endpoint": str(resp),
                 }
 
                 writer.writerow(result)
diff --git a/backend/glific_migration/sync_credentials/run_credentials.py b/backend/glific_migration/sync_credentials/run_credentials.py
index 8cb82b215..feaca5b1e 100644
--- a/backend/glific_migration/sync_credentials/run_credentials.py
+++ b/backend/glific_migration/sync_credentials/run_credentials.py
@@ -10,13 +10,12 @@
 logging.basicConfig(
     filename=str(log_file),
     level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s',
+    format="%(asctime)s - %(levelname)s - %(message)s",
 )
 
 logger = logging.getLogger(__name__)
 
 if __name__ == "__main__":
-
     input_csv = base_dir / "sample_input.csv"
     output_csv = base_dir / "credentials_output.csv"
 
@@ -29,6 +28,6 @@
         output_file=str(output_csv),
         api_url=api_url,
         api_key=api_key,
-        openai_key=openai_key
+        openai_key=openai_key,
     )
     processor.run()
diff --git a/backend/glific_migration/validator.py b/backend/glific_migration/validator.py
index 0df4233d8..2864378b3 100644
--- a/backend/glific_migration/validator.py
+++ b/backend/glific_migration/validator.py
@@ -1,9 +1,11 @@
 from email_validator import validate_email, EmailNotValidError
 
+
 def validate_required_fields(row, fields):
     missing = [f for f in fields if f not in row or not row[f].strip()]
     return missing
 
+
 def validate_email_format(email: str):
     try:
         validate_email(email, check_deliverability=False)
@@ -11,5 +13,6 @@ def validate_email_format(email: str):
     except EmailNotValidError as e:
         return False, str(e)
 
+
 def validate_password(password: str):
     return len(password) >= 8

From 18fa5eb4da85822a7880d246d42457f5792870c5 Mon Sep 17 00:00:00 2001
From: Aviraj <100823015+avirajsingh7@users.noreply.github.com>
Date: Wed, 23 Jul 2025 18:39:32 +0530
Subject: [PATCH 6/9] improved code structure by enhancing class structure

---
 backend/glific_migration/base_processor.py    |  93 ++++++++++--
 backend/glific_migration/client.py            |  15 +-
 .../organization_onboarding/processor.py      | 124 +++++++---------
 .../organization_onboarding/run_onboarding.py |  25 +---
 .../sync_assistant/processor.py               | 125 ++++++++--------
 .../sync_assistant/sync_assistant.py          |  18 +--
 .../sync_credentials/processor.py             | 138 +++++++++---------
 .../sync_credentials/run_credentials.py       |  22 +--
 backend/glific_migration/validator.py         |  33 ++++-
 9 files changed, 324 insertions(+), 269 deletions(-)

diff --git a/backend/glific_migration/base_processor.py b/backend/glific_migration/base_processor.py
index 9c44375b4..07b6be14c 100644
--- a/backend/glific_migration/base_processor.py
+++ b/backend/glific_migration/base_processor.py
@@ -1,21 +1,90 @@
 import csv
 import logging
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import List, Dict
 
 logger = logging.getLogger(__name__)
 
 
-class BaseCSVProcessor:
-    def __init__(self, input_file, output_file):
-        self.input_file = input_file
-        self.output_file = output_file
+class BaseCSVProcessor(ABC):
+    """Base class for CSV processing with common functionality."""
 
-    def load_csv(self):
-        with open(self.input_file, newline="", encoding="utf-8") as f:
-            return list(csv.DictReader(f))
+    def __init__(self, input_file: str, output_file: str, headers: List[str]):
+        self.input_file = Path(input_file)
+        self.output_file = Path(output_file)
+        self.headers = headers
+        self._setup_logging()
+        self._init_output_csv()
 
-    def save_output(self, headers, rows):
-        with open(self.output_file, "w", newline="", encoding="utf-8") as f:
-            writer = csv.DictWriter(f, fieldnames=headers)
-            writer.writeheader()
-            for row in rows:
+    def _setup_logging(self) -> None:
+        """Configure logging for the processor."""
+        log_file = self.output_file.parent / f"{self.__class__.__name__.lower()}.logs"
+        logging.basicConfig(
+            filename=str(log_file),
+            level=logging.INFO,
+            format="%(asctime)s - %(levelname)s - %(message)s",
+        )
+        console_handler = logging.StreamHandler()
+        console_handler.setLevel(logging.INFO)
+        console_handler.setFormatter(
+            logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
+        )
+
+        logging.getLogger().addHandler(console_handler)
+
+    def _init_output_csv(self) -> None:
+        """Initialize CSV file with headers."""
+        try:
+            with open(self.output_file, "w", newline="", encoding="utf-8") as f:
+                writer = csv.DictWriter(f, fieldnames=self.headers)
+                writer.writeheader()
+        except Exception as e:
+            logger.error(f"Error initializing output file {self.output_file}: {str(e)}")
+            raise
+
+    def load_csv(self) -> List[Dict[str, str]]:
+        """Load CSV file into list of dictionaries."""
+        try:
+            with open(self.input_file, newline="", encoding="utf-8") as f:
+                return list(csv.DictReader(f))
+        except FileNotFoundError:
+            logger.error(f"Input file not found: {self.input_file}")
+            raise
+        except Exception as e:
+            logger.error(f"Error reading CSV file {self.input_file}: {str(e)}")
+            raise
+
+    def append_to_csv(self, row: Dict[str, str]) -> None:
+        """Append a single row to the output CSV."""
+        try:
+            with open(self.output_file, "a", newline="", encoding="utf-8") as f:
+                writer = csv.DictWriter(f, fieldnames=self.headers)
                 writer.writerow(row)
+        except Exception as e:
+            logger.error(f"Error appending to output file {self.output_file}: {str(e)}")
+            raise
+
+    @abstractmethod
+    def validate_csv(self, rows: List[Dict[str, str]]) -> bool:
+        """Validate CSV data before processing."""
+        pass
+
+    @abstractmethod
+    def process_rows(self, rows: List[Dict[str, str]]) -> None:
+        """Process CSV rows and write results incrementally."""
+        pass
+
+    def run(self) -> None:
+        """Execute the complete processing pipeline."""
+        logger.info(f"Starting {self.__class__.__name__}...")
+        try:
+            rows = self.load_csv()
+            if not self.validate_csv(rows):
+                logger.error("Validation failed. Aborting processing.")
+                return
+            self.process_rows(rows)
+            logger.info(f"{self.__class__.__name__} completed successfully.")
+        except Exception as e:
+            logger.error(f"Processing failed: {str(e)}", exc_info=True)
+            raise
diff --git a/backend/glific_migration/client.py b/backend/glific_migration/client.py
index 10de7f1e5..8644ecc04 100644
--- a/backend/glific_migration/client.py
+++ b/backend/glific_migration/client.py
@@ -2,11 +2,14 @@
 import requests
 from requests.adapters import HTTPAdapter
 from urllib3.util.retry import Retry
+from typing import Tuple, Dict, Optional
 
 logger = logging.getLogger(__name__)
 
 
 class APIClient:
+    """Client for making API requests with retry and error handling."""
+
     def __init__(self, api_key: str):
         self.headers = {
             "accept": "application/json",
@@ -14,11 +17,15 @@ def __init__(self, api_key: str):
             "X-API-KEY": api_key,
         }
         self.session = requests.Session()
-        retries = Retry(total=3, backoff_factor=1, status_forcelist=[429])
-        self.session.mount("https://", HTTPAdapter(max_retries=retries))
-        self.session.mount("http://", HTTPAdapter(max_retries=retries))
+        retries = Retry(
+            total=3, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]
+        )
+        adapter = HTTPAdapter(max_retries=retries)
+        self.session.mount("https://", adapter)
+        self.session.mount("http://", adapter)
 
-    def post(self, url: str, data: dict = None):
+    def post(self, url: str, data: Optional[Dict] = None) -> Tuple[bool, Dict]:
+        """Make a POST request to the specified URL."""
         try:
             response = self.session.post(
                 url, headers=self.headers, json=data, timeout=10
diff --git a/backend/glific_migration/organization_onboarding/processor.py b/backend/glific_migration/organization_onboarding/processor.py
index 884129c61..7d97a2af0 100644
--- a/backend/glific_migration/organization_onboarding/processor.py
+++ b/backend/glific_migration/organization_onboarding/processor.py
@@ -1,80 +1,70 @@
 import logging
-import json
-import csv
+from typing import List, Dict, Set
 from glific_migration.base_processor import BaseCSVProcessor
+from glific_migration.client import APIClient
 from glific_migration.validator import (
     validate_required_fields,
     validate_email_format,
     validate_password,
 )
-from glific_migration.client import APIClient
 
 logger = logging.getLogger(__name__)
 
 
 class OnboardProcessor(BaseCSVProcessor):
-    def __init__(self, input_file, output_file, api_url, api_key):
-        super().__init__(input_file, output_file)
+    """Processor for handling organization onboarding."""
+
+    HEADERS = [
+        "organization_name",
+        "organization_id",
+        "project_name",
+        "project_id",
+        "user_name",
+        "user_id",
+        "api_key",
+        "success",
+        "response_from_endpoint",
+    ]
+    REQUIRED_FIELDS = {
+        "organization_name",
+        "project_name",
+        "email",
+        "password",
+        "user_name",
+    }
+
+    def __init__(self, input_file: str, output_file: str, api_url: str, api_key: str):
+        super().__init__(input_file, output_file, self.HEADERS)
         self.client = APIClient(api_key)
         self.api_url = api_url
-        self.headers = [
-            "organization_name",
-            "organization_id",
-            "project_name",
-            "project_id",
-            "user_name",
-            "user_id",
-            "api_key",
-            "success",
-            "response_from_endpoint",
-        ]
-
-    def run(self):
-        logger.info("Loading CSV input...")
-        rows = self.load_csv()
-
-        logger.info("Validating CSV rows...")
-        if not self.validate_csv(rows):
-            logger.error("Validation failed. Aborting processing.")
-            return
-
-        logger.info("Creating output CSV and writing headers...")
-        self.init_output_csv()
-
-        logger.info("Processing rows and writing results...")
-        self.process_rows(rows)
-
-        logger.info("Processing complete. Output written to %s", self.output_file)
-
-    def validate_csv(self, rows: list[dict]) -> bool:
+
+    def validate_csv(self, rows: List[Dict[str, str]]) -> bool:
+        """Validate CSV data for organization onboarding."""
         seen_projects = set()
         validation_errors = []
 
-        for i, row in enumerate(rows, start=2):
+        for idx, row in enumerate(rows, start=1):
             row_errors = []
 
-            missing = validate_required_fields(
-                row,
-                ["organization_name", "project_name", "email", "password", "user_name"],
-            )
+            missing = validate_required_fields(row, self.REQUIRED_FIELDS)
             if missing:
-                row_errors.append(f"Row {i}: Missing fields: {', '.join(missing)}")
+                row_errors.append(f"Missing fields: {', '.join(missing)}")
 
             project_name = row.get("project_name", "")
             if project_name in seen_projects:
-                row_errors.append(f"Row {i}: Duplicate project name '{project_name}'")
+                row_errors.append(f"Duplicate project name '{project_name}'")
             else:
                 seen_projects.add(project_name)
 
             ok, msg = validate_email_format(row.get("email", ""))
             if not ok:
-                row_errors.append(f"Row {i}: Invalid email: {msg}")
+                row_errors.append(f"Invalid email: {msg}")
 
             if not validate_password(row.get("password", "")):
-                row_errors.append(f"Row {i}: Password must be at least 8 characters")
+                row_errors.append("Password must be at least 8 characters")
 
             if row_errors:
-                validation_errors.extend(row_errors)
+                validation_errors.extend(f"Row {idx}: {error}" for error in row_errors)
 
         if validation_errors:
             logger.error("CSV validation failed with the following issues:")
@@ -85,31 +75,19 @@ def validate_csv(self, rows: list[dict]) -> bool:
         logger.info("CSV validation passed.")
         return True
 
-    def init_output_csv(self):
-        """Initialize CSV file with headers (overwrite if already exists)."""
-        with open(self.output_file, "w", newline="", encoding="utf-8") as f:
-            writer = csv.DictWriter(f, fieldnames=self.headers)
-            writer.writeheader()
-
-    def process_rows(self, rows: list[dict]):
-        with open(self.output_file, "a", newline="", encoding="utf-8") as f:
-            writer = csv.DictWriter(f, fieldnames=self.headers)
-
-            for idx, row in enumerate(rows, start=1):
-                logger.info(
-                    "Sending API request for row %d (project: %s)...",
-                    idx,
-                    row.get("project_name", ""),
-                )
-                success, resp = self.client.post(self.api_url, data=row)
-                logger.info("Row %d processed. Success: %s", idx, success)
-
-                row_result = {
-                    **row,
-                    "success": "yes" if success else "no",
-                    "response_from_endpoint": str(resp),
-                }
-                row_result.update(resp if success else {})
-
-                filtered = {k: row_result.get(k, "") for k in self.headers}
-                writer.writerow(filtered)
+    def process_rows(self, rows: List[Dict[str, str]]) -> None:
+        """Process rows for organization onboarding and write to CSV after each request."""
+        for idx, row in enumerate(rows, start=1):
+            logger.info(
+                f"Sending API request for row {idx} (project: {row.get('project_name', '')})..."
+            )
+            success, resp = self.client.post(self.api_url, data=row)
+            logger.info(f"Row {idx} processed. Success: {success}")
+
+            row_result = {
+                **row,
+                "success": "yes" if success else "no",
+                "response_from_endpoint": str(resp),
+            }
+            row_result.update(resp if success else {})
+            self.append_to_csv({k: row_result.get(k, "") for k in self.HEADERS})
diff --git a/backend/glific_migration/organization_onboarding/run_onboarding.py b/backend/glific_migration/organization_onboarding/run_onboarding.py
index bb7855409..789d3bb06 100644
--- a/backend/glific_migration/organization_onboarding/run_onboarding.py
+++ b/backend/glific_migration/organization_onboarding/run_onboarding.py
@@ -1,33 +1,22 @@
-import logging
 from pathlib import Path
 from glific_migration.organization_onboarding.processor import OnboardProcessor
 
 base_dir = Path(__file__).parent.resolve()
 
-log_file = base_dir / "onboarding.logs"
-logging.basicConfig(
-    filename=str(log_file),
-    level=logging.INFO,
-    format="%(asctime)s - %(levelname)s - %(message)s",
-)
-
-logger = logging.getLogger(__name__)
-
 
 def main():
-    logger.info("Starting onboarding process...")
-
     input_file = base_dir / "sample_input.csv"
     output_file = base_dir / "orgs_output.csv"
+    api_url = "http://localhost:8000/api/v1/onboard"
+    api_key = "SuperUserApiKey"
 
-    OnboardProcessor(
+    processor = OnboardProcessor(
         input_file=str(input_file),
         output_file=str(output_file),
-        api_url="http://localhost:8000/api/v1/onboard",
-        api_key="api_key",
-    ).run()
-
-    logger.info("Onboarding process completed successfully.")
+        api_url=api_url,
+        api_key=api_key,
+    )
+    processor.run()
 
 
 if __name__ == "__main__":
diff --git a/backend/glific_migration/sync_assistant/processor.py b/backend/glific_migration/sync_assistant/processor.py
index c93c426a7..3f3797072 100644
--- a/backend/glific_migration/sync_assistant/processor.py
+++ b/backend/glific_migration/sync_assistant/processor.py
@@ -1,75 +1,82 @@
 import logging
-import csv
+from typing import List, Dict, Set
 from glific_migration.base_processor import BaseCSVProcessor
 from glific_migration.client import APIClient
+from glific_migration.validator import (
+    validate_required_fields,
+    is_valid_api_key,
+    is_valid_assistant_id,
+)
+
 
 logger = logging.getLogger(__name__)
 
 
 class AssistantIngestProcessor(BaseCSVProcessor):
-    def __init__(self, input_file, output_file, base_url):
-        super().__init__(input_file, output_file)
-        self.base_url = base_url
-        self.headers = ["assistant_id", "api_key", "success", "response_from_endpoint"]
+    """Processor for handling assistant ingestion."""
+
+    HEADERS = ["assistant_id", "api_key", "success", "response_from_endpoint"]
+    REQUIRED_FIELDS = {"assistant_id", "api_key"}
+
+    def __init__(self, input_file: str, output_file: str, base_url: str):
+        super().__init__(input_file, output_file, self.HEADERS)
+        self.base_url = base_url.rstrip("/")
+
+    def validate_csv(self, rows: List[Dict[str, str]]) -> bool:
+        """Validate CSV data for assistant ingestion."""
+        validation_errors = []
+
+        for idx, row in enumerate(rows, start=1):
+            row_errors = []
+
+            missing = validate_required_fields(row, self.REQUIRED_FIELDS)
+            if missing:
+                row_errors.append(f"Missing fields: {', '.join(missing)}")
 
-    def run(self):
-        logger.info("Loading assistant ingest CSV input...")
-        rows = self.load_csv()
+            if not row.get("assistant_id", "").strip():
+                row_errors.append("Empty assistant_id")
 
-        logger.info("Validating CSV rows...")
-        self.validate_csv(rows)
+            if not row.get("api_key", "").strip():
+                row_errors.append("Empty api_key")
 
-        logger.info("Initializing output file...")
-        self.init_output_csv()
+            if row.get("assistant_id") and not is_valid_assistant_id(
+                row["assistant_id"]
+            ):
+                row_errors.append(f"Invalid assistant_id format: {row['assistant_id']}")
 
-        logger.info("Processing rows for assistant ingestion...")
-        self.process_rows(rows)
+            if row.get("api_key") and not is_valid_api_key(row["api_key"]):
+                row_errors.append(f"Invalid api_key format: {row['api_key']}")
 
-        logger.info("Assistant ingestion processing complete.")
+            if row_errors:
+                validation_errors.extend(f"Row {idx}: {err}" for err in row_errors)
 
-    def validate_csv(self, rows: list[dict]):
-        required_fields = {"assistant_id", "api_key"}
+        if validation_errors:
+            logger.error("CSV validation failed with the following issues:")
+            for error in validation_errors:
+                logger.error(" - %s", error)
+            return False
 
+        logger.info("CSV validation passed.")
+        return True
+
+    def process_rows(self, rows: List[Dict[str, str]]) -> None:
+        """Process rows for assistant ingestion and write to CSV after each request."""
         for idx, row in enumerate(rows, start=1):
-            missing = required_fields - row.keys()
-            if missing:
-                logger.error(f"Row {idx} missing required fields: {missing}")
-                raise ValueError(f"Row {idx} missing required fields: {missing}")
-
-            if not row["assistant_id"].strip() or not row["api_key"].strip():
-                logger.error(f"Row {idx} has empty assistant_id or api_key")
-                raise ValueError(f"Row {idx} has empty assistant_id or api_key")
-
-    def init_output_csv(self):
-        with open(self.output_file, "w", newline="", encoding="utf-8") as f:
-            writer = csv.DictWriter(f, fieldnames=self.headers)
-            writer.writeheader()
-
-    def process_rows(self, rows: list[dict]):
-        with open(self.output_file, "a", newline="", encoding="utf-8") as f:
-            writer = csv.DictWriter(f, fieldnames=self.headers)
-
-            for idx, row in enumerate(rows, start=1):
-                assistant_id = row["assistant_id"]
-                api_key = row["api_key"]
-
-                logger.info(
-                    "Ingesting assistant for row %d (assistant_id: %s)",
-                    idx,
-                    assistant_id,
-                )
-
-                url = f"{self.base_url.rstrip('/')}/assistant/{assistant_id}/ingest"
-                client = APIClient(api_key=api_key)
-
-                success, resp = client.post(url)
-                logger.info("Row %d processed. Success: %s", idx, success)
-
-                result = {
-                    "assistant_id": assistant_id,
-                    "api_key": api_key,
-                    "success": "yes" if success else "no",
-                    "response_from_endpoint": str(resp),
-                }
-
-                writer.writerow(result)
+            assistant_id = row["assistant_id"]
+            api_key = row["api_key"]
+
+            logger.info(
+                f"Ingesting assistant for row {idx} (assistant_id: {assistant_id})"
+            )
+            url = f"{self.base_url}/assistant/{assistant_id}/ingest"
+            client = APIClient(api_key=api_key)
+            success, resp = client.post(url)
+            logger.info(f"Row {idx} processed. Success: {success}")
+
+            result = {
+                "assistant_id": assistant_id,
+                "api_key": api_key,
+                "success": "yes" if success else "no",
+                "response_from_endpoint": str(resp),
+            }
+            self.append_to_csv(result)
diff --git a/backend/glific_migration/sync_assistant/sync_assistant.py b/backend/glific_migration/sync_assistant/sync_assistant.py
index 037562b23..7c37a7b7a 100644
--- a/backend/glific_migration/sync_assistant/sync_assistant.py
+++ b/backend/glific_migration/sync_assistant/sync_assistant.py
@@ -1,23 +1,21 @@
-import logging
 from pathlib import Path
 from glific_migration.sync_assistant.processor import AssistantIngestProcessor
 
 base_dir = Path(__file__).parent.resolve()
-log_file = base_dir / "sync_assistant.logs"
-logging.basicConfig(
-    filename=str(log_file),
-    level=logging.INFO,
-    format="%(asctime)s - %(levelname)s - %(message)s",
-)
 
-logger = logging.getLogger(__name__)
 
-if __name__ == "__main__":
+def main():
     input_csv = base_dir / "sample_input.csv"
     output_csv = base_dir / "assistants_output.csv"
     api_base_url = "http://localhost:8000/api/v1"
 
     processor = AssistantIngestProcessor(
-        input_file=input_csv, output_file=output_csv, base_url=api_base_url
+        input_file=str(input_csv),
+        output_file=str(output_csv),
+        base_url=api_base_url,
     )
     processor.run()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/backend/glific_migration/sync_credentials/processor.py b/backend/glific_migration/sync_credentials/processor.py
index d2ccf4990..5402df14b 100644
--- a/backend/glific_migration/sync_credentials/processor.py
+++ b/backend/glific_migration/sync_credentials/processor.py
@@ -1,94 +1,88 @@
 import logging
-import csv
+from typing import List, Dict, Set
 from glific_migration.base_processor import BaseCSVProcessor
 from glific_migration.client import APIClient
+from glific_migration.validator import validate_required_fields, is_valid_api_key
 
 logger = logging.getLogger(__name__)
 
 
 class CredentialProcessor(BaseCSVProcessor):
-    def __init__(self, input_file, output_file, api_url, api_key, openai_key):
-        super().__init__(input_file, output_file)
+    """Processor for handling credential migration."""
+
+    HEADERS = ["organization_id", "project_id", "success", "response_from_endpoint"]
+    REQUIRED_FIELDS = {"organization_id", "project_id"}
+
+    def __init__(
+        self,
+        input_file: str,
+        output_file: str,
+        api_url: str,
+        api_key: str,
+        openai_key: str,
+    ):
+        super().__init__(input_file, output_file, self.HEADERS)
         self.client = APIClient(api_key)
         self.api_url = api_url
         self.openai_key = openai_key
-        self.headers = [
-            "organization_id",
-            "project_id",
-            "success",
-            "response_from_endpoint",
-        ]
 
-    def run(self):
-        logger.info("Loading CSV input...")
-        rows = self.load_csv()
-
-        logger.info("Validating input data...")
-        self.validate_csv(rows)
-
-        logger.info("Initializing output file if needed...")
-        self.init_output_csv()
-
-        logger.info("Processing rows for credential creation...")
-        self.process_rows(rows)
-
-        logger.info("Credential processing complete.")
-
-    def validate_csv(self, rows: list[dict]):
-        required_fields = {"organization_id", "project_id"}
+    def validate_csv(self, rows: List[Dict[str, str]]) -> bool:
+        """Validate CSV data for credential processing."""
+        validation_errors = []
 
         for idx, row in enumerate(rows, start=1):
-            missing = required_fields - row.keys()
+            row_errors = []
+
+            missing = validate_required_fields(row, self.REQUIRED_FIELDS)
             if missing:
-                logger.error(f"Row {idx} is missing required fields: {missing}")
-                raise ValueError(f"Row {idx} is missing required fields: {missing}")
+                row_errors.append(f"Missing fields: {', '.join(missing)}")
 
             try:
-                int(row["organization_id"])
-                int(row["project_id"])
+                int(row.get("organization_id", ""))
+                int(row.get("project_id", ""))
             except ValueError:
-                logger.error(
-                    f"Row {idx} has non-integer organization_id or project_id: {row}"
-                )
-                raise ValueError(
-                    f"Row {idx} has non-integer organization_id or project_id"
+                row_errors.append(
+                    f"organization_id or project_id is not an integer: org_id='{row.get('organization_id')}', proj_id='{row.get('project_id')}'"
                 )
 
-    def init_output_csv(self):
-        """Initialize CSV file with headers (overwrite if already exists)."""
-        with open(self.output_file, "w", newline="", encoding="utf-8") as f:
-            writer = csv.DictWriter(f, fieldnames=self.headers)
-            writer.writeheader()
-
-    def process_rows(self, rows: list[dict]):
-        with open(self.output_file, "a", newline="", encoding="utf-8") as f:
-            writer = csv.DictWriter(f, fieldnames=self.headers)
-
-            for idx, row in enumerate(rows, start=1):
-                org_id = int(row["organization_id"])
-                proj_id = int(row["project_id"])
-
-                payload = {
-                    "organization_id": org_id,
-                    "project_id": proj_id,
-                    "is_active": True,
-                    "credential": {"openai": {"api_key": self.openai_key}},
-                }
-
-                logger.info(
-                    "Sending credential request for row %d (org: %s, project: %s)...",
-                    idx,
-                    org_id,
-                    proj_id,
-                )
-                success, resp = self.client.post(self.api_url, data=payload)
-                logger.info("Row %d processed. Success: %s", idx, success)
+            if row_errors:
+                validation_errors.extend(f"Row {idx}: {err}" for err in row_errors)
+
+            if row.get("api_key") and not is_valid_api_key(row["api_key"]):
+                validation_errors.append(f"Invalid api_key format: {row['api_key']}")
 
-                result = {
-                    "organization_id": org_id,
-                    "project_id": proj_id,
-                    "success": "yes" if success else "no",
-                    "response_from_endpoint": str(resp),
-                }
+        if validation_errors:
+            logger.error("CSV validation failed with the following issues:")
+            for error in validation_errors:
+                logger.error(" - %s", error)
+            return False
 
-                writer.writerow(result)
+        logger.info("CSV validation passed.")
+        return True
+
+    def process_rows(self, rows: List[Dict[str, str]]) -> None:
+        """Process rows for credential creation and write to CSV after each request."""
+        for idx, row in enumerate(rows, start=1):
+            org_id = int(row["organization_id"])
+            proj_id = int(row["project_id"])
+
+            payload = {
+                "organization_id": org_id,
+                "project_id": proj_id,
+                "is_active": True,
+                "credential": {"openai": {"api_key": self.openai_key}},
+            }
+
+            logger.info(
+                f"Sending credential request for row {idx} (org: {org_id}, project: {proj_id})..."
+            )
+            success, resp = self.client.post(self.api_url, data=payload)
+            logger.info(f"Row {idx} processed. Success: {success}")
+
+            result = {
+                "organization_id": org_id,
+                "project_id": proj_id,
+                "success": "yes" if success else "no",
+                "response_from_endpoint": str(resp),
+            }
+            self.append_to_csv(result)
diff --git a/backend/glific_migration/sync_credentials/run_credentials.py b/backend/glific_migration/sync_credentials/run_credentials.py
index feaca5b1e..304833862 100644
--- a/backend/glific_migration/sync_credentials/run_credentials.py
+++ b/backend/glific_migration/sync_credentials/run_credentials.py
@@ -1,27 +1,15 @@
-import logging
 from pathlib import Path
 from glific_migration.sync_credentials.processor import CredentialProcessor
 
-# Resolve script's base directory
 base_dir = Path(__file__).parent.resolve()
 
-# Log file inside same folder
-log_file = base_dir / "credentials.logs"
-logging.basicConfig(
-    filename=str(log_file),
-    level=logging.INFO,
-    format="%(asctime)s - %(levelname)s - %(message)s",
-)
 
-logger = logging.getLogger(__name__)
-
-if __name__ == "__main__":
+def main():
     input_csv = base_dir / "sample_input.csv"
     output_csv = base_dir / "credentials_output.csv"
-
     api_url = "http://localhost:8000/api/v1/credentials/"
-    api_key = "test_api_key"
-    openai_key = "openai_api_key"
+    api_key = "SuperUserApiKey"
+    openai_key = "openai_api_key_example"
 
     processor = CredentialProcessor(
         input_file=str(input_csv),
@@ -31,3 +19,7 @@
         openai_key=openai_key,
     )
     processor.run()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/backend/glific_migration/validator.py b/backend/glific_migration/validator.py
index 2864378b3..526c10dcf 100644
--- a/backend/glific_migration/validator.py
+++ b/backend/glific_migration/validator.py
@@ -1,18 +1,39 @@
 from email_validator import validate_email, EmailNotValidError
+from typing import List, Dict, Tuple, Set
+import re
 
 
-def validate_required_fields(row, fields):
-    missing = [f for f in fields if f not in row or not row[f].strip()]
-    return missing
+def validate_required_fields(row: Dict[str, str], fields: Set[str]) -> List[str]:
+    """Validate that required fields are present and non-empty."""
+    return [f for f in fields if f not in row or not row[f].strip()]
 
 
-def validate_email_format(email: str):
+def validate_email_format(email: str) -> Tuple[bool, str]:
+    """Validate email format."""
     try:
         validate_email(email, check_deliverability=False)
-        return True, None
+        return True, ""
     except EmailNotValidError as e:
         return False, str(e)
 
 
-def validate_password(password: str):
+def validate_password(password: str) -> bool:
+    """Validate password length."""
     return len(password) >= 8
+
+
+def is_valid_api_key(api_key: str) -> bool:
+    """
+    Validates that the API key is in the format:
+    'ApiKey <43-character base64url-like token>'
+    """
+    pattern = r"^ApiKey [A-Za-z0-9_-]{43}$"
+    return bool(re.fullmatch(pattern, api_key))
+
+
+def is_valid_assistant_id(assistant_id: str) -> bool:
+    """
+    Validates OpenAI assistant ID. Should start with 'asst_' followed by 15+ alphanumeric chars.
+    """
+    pattern = r"^asst_[a-zA-Z0-9]{15,}$"
+    return bool(re.fullmatch(pattern, assistant_id))

From 87e9d098bc3cf1ee407325eb3466184db108c31c Mon Sep 17 00:00:00 2001
From: Aviraj <100823015+avirajsingh7@users.noreply.github.com>
Date: Wed, 23 Jul 2025 19:20:56 +0530
Subject: [PATCH 7/9] use config file

---
 backend/glific_migration/config.json             | 16 ++++++++++++++++
 .../organization_onboarding/run_onboarding.py    | 12 ++++++++----
 .../sync_assistant/sync_assistant.py             | 10 +++++++---
 .../sync_credentials/run_credentials.py          | 14 +++++++++-----
 4 files changed, 40 insertions(+), 12 deletions(-)
 create mode 100644 backend/glific_migration/config.json

diff --git a/backend/glific_migration/config.json b/backend/glific_migration/config.json
new file mode 100644
index 000000000..d19959a73
--- /dev/null
+++ b/backend/glific_migration/config.json
@@ -0,0 +1,16 @@
+{
+  "base_url": "http://localhost:8000/api/v1",
+  "openai_key": "openai_api_key_example",
+  "assistant_ingest": {
+    "input_csv": "sample_input.csv",
+    "output_csv": "assistants_output.csv"
+  },
+  "organization_onboarding": {
+    "input_csv": "sample_input.csv",
+    "output_csv": "orgs_output.csv"
+  },
+  "sync_credentials": {
+    "input_csv": "sample_input.csv",
+    "output_csv": "credentials_output.csv"
+  }
+}
diff --git a/backend/glific_migration/organization_onboarding/run_onboarding.py b/backend/glific_migration/organization_onboarding/run_onboarding.py
index 789d3bb06..943018c58 100644
--- a/backend/glific_migration/organization_onboarding/run_onboarding.py
+++ b/backend/glific_migration/organization_onboarding/run_onboarding.py
@@ -1,14 +1,18 @@
 from pathlib import Path
+import json
 from glific_migration.organization_onboarding.processor import OnboardProcessor
 
 base_dir = Path(__file__).parent.resolve()
 
 
 def main():
-    input_file = base_dir / "sample_input.csv"
-    output_file = base_dir / "orgs_output.csv"
-    api_url = "http://localhost:8000/api/v1/onboard"
-    api_key = "SuperUserApiKey"
+    with open(base_dir / "../config.json", "r") as file:
+        config = json.load(file)
+
+    input_file = base_dir / config["organization_onboarding"]["input_csv"]
+    output_file = base_dir / config["organization_onboarding"]["output_csv"]
+    api_url = config["base_url"] + "/onboard"
+    api_key = config["api_key"]
 
     processor = OnboardProcessor(
         input_file=str(input_file),
diff --git a/backend/glific_migration/sync_assistant/sync_assistant.py b/backend/glific_migration/sync_assistant/sync_assistant.py
index 7c37a7b7a..6e7e517e3 100644
--- a/backend/glific_migration/sync_assistant/sync_assistant.py
+++ b/backend/glific_migration/sync_assistant/sync_assistant.py
@@ -1,13 +1,17 @@
 from pathlib import Path
+import json
 from glific_migration.sync_assistant.processor import AssistantIngestProcessor
 
 base_dir = Path(__file__).parent.resolve()
 
 
 def main():
-    input_csv = base_dir / "sample_input.csv"
-    output_csv = base_dir / "assistants_output.csv"
-    api_base_url = "http://localhost:8000/api/v1"
+    with open(base_dir / "../config.json", "r") as file:
+        config = json.load(file)
+
+    input_csv = base_dir / config["assistant_ingest"]["input_csv"]
+    output_csv = base_dir / config["assistant_ingest"]["output_csv"]
+    api_base_url = config["base_url"]
 
     processor = AssistantIngestProcessor(
         input_file=str(input_csv),
diff --git a/backend/glific_migration/sync_credentials/run_credentials.py b/backend/glific_migration/sync_credentials/run_credentials.py
index 304833862..846819b41 100644
--- a/backend/glific_migration/sync_credentials/run_credentials.py
+++ b/backend/glific_migration/sync_credentials/run_credentials.py
@@ -1,15 +1,19 @@
 from pathlib import Path
+import json
 from glific_migration.sync_credentials.processor import CredentialProcessor
 
 base_dir = Path(__file__).parent.resolve()
 
 
 def main():
-    input_csv = base_dir / "sample_input.csv"
-    output_csv = base_dir / "credentials_output.csv"
-    api_url = "http://localhost:8000/api/v1/credentials/"
-    api_key = "SuperUserApiKey"
-    openai_key = "openai_api_key_example"
+    with open(base_dir / "../config.json", "r") as file:
+        config = json.load(file)
+
+    input_csv = base_dir / config["sync_credentials"]["input_csv"]
+    output_csv = base_dir / config["sync_credentials"]["output_csv"]
+    api_url = config["base_url"] + "/credentials/"
+    api_key = config["api_key"]
+    openai_key = config["openai_key"]
 
     processor = CredentialProcessor(
         input_file=str(input_csv),

From 74437bc9e1e980935963f8fd0667d190bdbccd9d Mon Sep 17 00:00:00 2001
From: Aviraj <100823015+avirajsingh7@users.noreply.github.com>
Date: Wed, 23 Jul 2025 19:21:20 +0530
Subject: [PATCH 8/9] add api_key to config

---
 backend/glific_migration/config.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/backend/glific_migration/config.json b/backend/glific_migration/config.json
index d19959a73..d48277f20 100644
--- a/backend/glific_migration/config.json
+++ b/backend/glific_migration/config.json
@@ -1,5 +1,6 @@
 {
   "base_url": "http://localhost:8000/api/v1",
+  "api_key": "SuperUserApiKey",
   "openai_key": "openai_api_key_example",
   "assistant_ingest": {
     "input_csv": "sample_input.csv",
@@ -13,4 +14,4 @@
     "input_csv": "sample_input.csv",
     "output_csv": "credentials_output.csv"
   }
-}
+}
\ No newline at end of file

From 48a06adc5a0ef5c5c764f61f9f352abbd2ace08d Mon Sep 17 00:00:00 2001
From: Aviraj <100823015+avirajsingh7@users.noreply.github.com>
Date: Wed, 23 Jul 2025 19:25:52 +0530
Subject: [PATCH 9/9] pre commit

---
 backend/glific_migration/config.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/glific_migration/config.json b/backend/glific_migration/config.json
index d48277f20..c5f702dc7 100644
--- a/backend/glific_migration/config.json
+++ b/backend/glific_migration/config.json
@@ -14,4 +14,4 @@
     "input_csv": "sample_input.csv",
     "output_csv": "credentials_output.csv"
   }
-}
\ No newline at end of file
+}