Geetha82 · Geetha82 · Apr 23, 2026 · Apr 23, 2026 · May 22, 2026 · May 29, 2026
diff --git a/assignment10/advanced_sql.py b/assignment10/advanced_sql.py
@@ -0,0 +1,178 @@
+import sqlite3
+import os
+
+
+# ===== Database Path Configuration (Robust Absolute Path Discovery) =====
+
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+DB_PATH = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "db", "lesson.db"))
+
+def main():
+    if not os.path.exists(DB_PATH):
+        print(f" Error: Database file not found at: {DB_PATH}")
+        return
+
+    conn = None
+    try:
+        # Open database connection
+        conn = sqlite3.connect(DB_PATH)
+        cursor = conn.cursor()
+
+        # Enforce foreign key validation strings immediately after connecting
+        conn.execute("PRAGMA foreign_keys = 1;")
+
+        # ===== Task 1: Complex JOINs with Aggregation =====
+        query_task1 = """
+        SELECT 
+            o.order_id, 
+            SUM(p.price * l.quantity) AS total_price
+        FROM orders o
+        INNER JOIN line_items l ON o.order_id = l.order_id
+        INNER JOIN products p ON l.product_id = p.product_id
+        GROUP BY o.order_id
+        ORDER BY o.order_id ASC
+        LIMIT 5;
+        """
+
+        print("\n===== Task 1: Total Price of the First 5 Orders =====\n")
+        print(f"{'Order ID':<10} | {'Total Price':<12}")
+        print("-" * 27)
+
+        cursor.execute(query_task1)
+        for row in cursor.fetchall():
+            order_id, total_price = row
+            print(f"{order_id:<10} | ${total_price:,.2f}")
+
+
+        # ===== Task 2: Understanding Subqueries =====
+        query_task2 = """
+        SELECT 
+            c.customer_name,
+            AVG(sub.total_price) AS average_total_price
+        FROM customers c
+        LEFT JOIN (
+            SELECT 
+                o.customer_id AS customer_id_b, 
+                SUM(p.price * l.quantity) AS total_price
+            FROM orders o
+            INNER JOIN line_items l ON o.order_id = l.order_id
+            INNER JOIN products p ON l.product_id = p.product_id
+            GROUP BY o.order_id
+        ) sub ON c.customer_id = sub.customer_id_b
+        GROUP BY c.customer_id
+        ORDER BY average_total_price DESC;
+        """
+
+        print("\n===== Task 2: Average Order Price Per Customer =====\n")
+        print(f"{'Customer Name':<35} | {'Avg Order Price':<15}")
+        print("-" * 53)
+
+        cursor.execute(query_task2)
+        for row in cursor.fetchall():
+            customer_name, avg_price = row
+            display_price = f"${avg_price:,.2f}" if avg_price is not None else "$0.00"
+            print(f"{customer_name:<35} | {display_price:<15}")
+
+
+        # ===== Task 3: An Insert Transaction Based on Data =====
+        print("\n===== Task 3: Transactional Order Processing =====\n")
+
+        # 1. Fetch data variables dynamically using verified schemas (unpacking tuples)
+        cursor.execute("SELECT customer_id FROM customers WHERE customer_name = 'Perez and Sons';")
+        customer_id_row = cursor.fetchone()
+        customer_id = customer_id_row[0] if customer_id_row else None
+
+        cursor.execute("SELECT employee_id FROM employees WHERE first_name = 'Miranda' AND last_name = 'Harris';")
+        employee_id_row = cursor.fetchone()
+        employee_id = employee_id_row[0] if employee_id_row else None
+
+        # Find the product_ids of the 5 least expensive products
+        cursor.execute("SELECT product_id FROM products ORDER BY price ASC LIMIT 5;")
+        product_ids = [row[0] for row in cursor.fetchall()]
+
+        # Safety check to ensure lookups succeeded before starting the transaction
+        if customer_id is None or employee_id is None or len(product_ids) < 5:
+            print("Setup error: Could not locate necessary customer, employee, or product records.")
+            conn.close()
+            return
+
+        # 2. Begin the unified context transaction block
+        conn.execute("BEGIN TRANSACTION;")
+
+        # Create order record utilizing RETURNING clause to fetch the auto-assigned key
+        insert_order_query = """
+        INSERT INTO orders (customer_id, employee_id) 
+        VALUES (?, ?) 
+        RETURNING order_id;
+        """
+        cursor.execute(insert_order_query, (customer_id, employee_id))
+        new_order_id = cursor.fetchone()[0] # Unpack the auto-assigned integer ID
+
+        # Insert the 5 separate line items for the target order
+        insert_item_query = """
+        INSERT INTO line_items (order_id, product_id, quantity) 
+        VALUES (?, ?, 10);
+        """
+        for prod_id in product_ids:
+            cursor.execute(insert_item_query, (new_order_id, prod_id))
+
+        # Commit all changes permanently to disk
+        conn.commit()
+        print(f" Transaction successful! Created Order ID: {new_order_id}")
+
+        # 3. Use SELECT with a JOIN to print out order metadata details (FIXED TO p.product_name)
+        display_order_query = """
+        SELECT l.line_item_id, l.quantity, p.product_name
+        FROM line_items l
+        INNER JOIN products p ON l.product_id = p.product_id
+        WHERE l.order_id = ?
+        ORDER BY l.line_item_id ASC;
+        """
+        cursor.execute(display_order_query, (new_order_id,))
+        order_details = cursor.fetchall()
+
+        print(f"\nManifest for Order #{new_order_id}:")
+        print(f"{'Line Item ID':<12} | {'Quantity':<8} | {'Product Name':<25}")
+        print("-" * 51)
+        for item in order_details:
+            li_id, qty, prod_name = item
+            print(f"{li_id:<12} | {qty:<8} | {prod_name:<25}")
+
+
+        # ===== Task 4: Aggregation with HAVING =====
+        query_task4 = """
+        SELECT 
+            e.employee_id, 
+            e.first_name, 
+            e.last_name, 
+            COUNT(o.order_id) AS order_count
+        FROM employees e
+        INNER JOIN orders o ON e.employee_id = o.employee_id
+        GROUP BY e.employee_id
+        HAVING COUNT(o.order_id) > 5
+        ORDER BY order_count DESC;
+        """
+
+        print("\n===== Task 4: Employees with More Than 5 Orders =====\n")
+        print(f"{'Emp ID':<8} | {'First Name':<12} | {'Last Name':<15} | {'Order Count':<12}")
+        print("-" * 55)
+
+        cursor.execute(query_task4)
+        for row in cursor.fetchall():
+            emp_id, f_name, l_name, order_count = row
+            print(f"{emp_id:<8} | {f_name:<12} | {l_name:<15} | {order_count:<12}")
+
+        # Close database connection
+        conn.close()
+
+    except sqlite3.Error as e:
+        if conn:
+            try:
+                conn.rollback()
+                print("🔄 Transaction successfully rolled back.")
+            except sqlite3.OperationalError:
+                pass 
+        print(f"❌ SQLite Error Encountered: {e}")
+
+if __name__ == "__main__":
+    main()
diff --git a/assignment10/assignment10.py b/assignment10/assignment10.py
diff --git a/assignment4/additional_employees.json b/assignment4/additional_employees.json
@@ -0,0 +1,4 @@
+[
+    {"Name": "Eve", "Age": 28, "City": "Miami", "Salary": 60000},
+    {"Name": "Frank", "Age": 40, "City": "Seattle", "Salary": 95000}
+]
diff --git a/assignment4/assignment4.py b/assignment4/assignment4.py
@@ -0,0 +1,186 @@
+import pandas as pd
+
+# Task 1 - Create a DataFrame from a dictionary
+# Create dictionary
+data = {
+    'Name': ['Alice', 'Bob', 'Charlie'],
+    'Age': [25, 30, 35],
+    'City': ['New York', 'Los Angeles', 'Chicago']
+}
+
+# Convert the dictionary into a DataFrame using Pandas.
+task1_data_frame= pd.DataFrame(data)
+
+# Print the DataFrame to verify its creation.
+print(task1_data_frame)
+
+
+# Task1 - Add a new column
+# Make a copy of the dataFrame
+task1_with_salary = task1_data_frame.copy()
+
+# Add a column called Salary
+task1_with_salary['Salary'] = [70000, 80000, 90000]
+
+# Print the new DataFrame
+print(task1_with_salary)
+
+# Task1 - Modify an existing column
+# Make a copy of task1_with_salary in a variable named task1_older
+task1_older = task1_with_salary.copy()
+
+# Increment the Age column by 1 for each entry
+task1_older['Age'] = task1_older['Age'] + 1
+
+# Print the modified DataFrame
+print(task1_older)
+
+# Task1 - Save the DataFrame as a CSV file
+# Save the task1_older DataFrame to a file named employees.csv
+task1_older.to_csv('employees.csv', index= False)
+
+# Look at the contents of the CSV file 
+print("CSV file created!")
+
+
+# Task 2: Loading Data from CSV and JSON
+# Task 2: Read data from a CSV file
+# # Load the CSV file from Task 1 into a new DataFrame saved to a variable task2_employees
+task2_employees  = pd.read_csv('employees.csv')
+
+# Print it and run the tests to verify the contents
+print(task2_employees)
+
+# Task 2: Read data from a JSON file
+
+# Create a JSON file (additional_employees.json). 
+# # json
+# [
+#     {"Name": "Eve", "Age": 28, "City": "Miami", "Salary": 60000},
+#     {"Name": "Frank", "Age": 40, "City": "Seattle", "Salary": 95000}
+# ]
+# Load this JSON file into a new DataFrame and assign it to the variable json_employees
+# Change this line in assignment4.py
+#json_employees = pd.read_json('assignment4/additional_employees.json')
+json_employees = pd.read_json('additional_employees.json')
+
+# Print the DataFrame to verify it loaded correctly and run the tests
+print(json_employees)
+
+# Task 2: Combine DataFrames
+# Combine the data 'assignment4/additional_employees.json'  and task2_employees, 
+# save it as more_employees
+more_employees = pd.concat([task2_employees, json_employees], ignore_index= True)
+
+# Print the combined Dataframe and run the tests
+print(more_employees)
+
+# Task 3: Data Inspection - Using Head, Tail, and Info Methods
+# Task 3:Use the head() 
+
+# Assign the first three rows of the more_employees DataFrame to the variable first_three
+first_three = more_employees.head(3)
+
+# Print the variable
+print(first_three)
+
+# Task 3: Use the tail() method
+
+# Assign the last two rows of the more_employees DataFrame to the variable last_two
+last_two = more_employees.tail(2)
+
+# Print the variable
+print(last_two)
+
+# Task 3: Get the shape of a Dataframe
+
+# Assign the shape of the more_employees DataFrame to the variable employee_shape
+employee_shape = more_employees.shape
+
+# Print the variable
+print(employee_shape)
+
+# Task 3: Use the info() method
+
+# Print a concise summary 
+more_employees.info()
+
+
+# Task 4: Data Cleaning
+
+# Task 4: Create a DataFrame from dirty_data.csv
+
+# Create a DataFrame from dirty_data.csv file and assign it to the variable dirty_data
+dirty_data = pd.read_csv('dirty_data.csv')
+
+# Print dirty_data
+print(dirty_data)
+
+# Create a copy of the dirty data
+clean_data = dirty_data.copy()
+
+# Task 4: Remove duplicate rows and print
+clean_data = clean_data.drop_duplicates()
+print(clean_data)
+
+# Task 4: Convert Age to numeric
+clean_data['Age'] = pd.to_numeric(clean_data['Age'], errors='coerce')
+
+# handle missing values and print
+clean_data['Age'] = clean_data['Age'].fillna(clean_data['Age'].mean())
+print(clean_data)
+
+# Task 4 - Convert Salary to numeric, replace known placeholders (unknown, n/a) with NaN and print
+clean_data['Salary'] = clean_data['Salary'].replace(['unknown', 'n/a'], pd.NA) 
+clean_data['Salary'] = pd.to_numeric(clean_data['Salary'], errors= 'coerce')
+print(clean_data)
+
+
+# Task 4 - Fill missing numeric values
+# Fill Age which the mean
+clean_data['Age'] = clean_data['Age'].fillna(clean_data['Age'].mean())
+
+# Salary with the median
+clean_data['Salary'] = clean_data['Salary'].fillna(clean_data['Salary'].median())
+
+print(clean_data)
+
+
+# Task 4 - Convert Hire Date to datetime
+clean_data['Hire Date'] = pd.to_datetime(clean_data['Hire Date'], errors='coerce')
+
+clean_data['Hire Date'] = clean_data['Hire Date'].fillna(method='ffill')
+print(clean_data)
+
+# Task 4 - Strip extra whitespace and standardize Name and Department as uppercase
+clean_data['Name'] = clean_data['Name'].str.strip().str.upper()
+clean_data['Department'] = clean_data['Department'].str.strip().str.upper()
+
+print(clean_data)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/assignment4/employees.csv b/assignment4/employees.csv
@@ -0,0 +1,4 @@
+Name,Age,City,Salary
+Alice,26,New York,70000
+Bob,31,Los Angeles,80000
+Charlie,36,Chicago,90000
diff --git a/assignment7/school_a.py b/assignment7/school_a.py
@@ -0,0 +1,7 @@
+import sqlite3
+
+# Connect to a new SQLite database
+with  sqlite3.connect("../db/school.db") as conn:  # Create the file here, so that it is not pushed to GitHub!
+    print("Database created and connected successfully.")
+
+# The "with" statement commits successful transactions and rolls back transactions which cause exceptions within the block.  You must close the connection explicitly with conn.close().