Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 178 additions & 0 deletions assignment10/advanced_sql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
import sqlite3
import os


# ===== Database Path Configuration (Robust Absolute Path Discovery) =====

SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
DB_PATH = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "db", "lesson.db"))

def main():
if not os.path.exists(DB_PATH):
print(f" Error: Database file not found at: {DB_PATH}")
return

conn = None
try:
# Open database connection
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()

# Enforce foreign key validation strings immediately after connecting
conn.execute("PRAGMA foreign_keys = 1;")

# ===== Task 1: Complex JOINs with Aggregation =====
query_task1 = """
SELECT
o.order_id,
SUM(p.price * l.quantity) AS total_price
FROM orders o
INNER JOIN line_items l ON o.order_id = l.order_id
INNER JOIN products p ON l.product_id = p.product_id
GROUP BY o.order_id
ORDER BY o.order_id ASC
LIMIT 5;
"""

print("\n===== Task 1: Total Price of the First 5 Orders =====\n")
print(f"{'Order ID':<10} | {'Total Price':<12}")
print("-" * 27)

cursor.execute(query_task1)
for row in cursor.fetchall():
order_id, total_price = row
print(f"{order_id:<10} | ${total_price:,.2f}")


# ===== Task 2: Understanding Subqueries =====
query_task2 = """
SELECT
c.customer_name,
AVG(sub.total_price) AS average_total_price
FROM customers c
LEFT JOIN (
SELECT
o.customer_id AS customer_id_b,
SUM(p.price * l.quantity) AS total_price
FROM orders o
INNER JOIN line_items l ON o.order_id = l.order_id
INNER JOIN products p ON l.product_id = p.product_id
GROUP BY o.order_id
) sub ON c.customer_id = sub.customer_id_b
GROUP BY c.customer_id
ORDER BY average_total_price DESC;
"""

print("\n===== Task 2: Average Order Price Per Customer =====\n")
print(f"{'Customer Name':<35} | {'Avg Order Price':<15}")
print("-" * 53)

cursor.execute(query_task2)
for row in cursor.fetchall():
customer_name, avg_price = row
display_price = f"${avg_price:,.2f}" if avg_price is not None else "$0.00"
print(f"{customer_name:<35} | {display_price:<15}")


# ===== Task 3: An Insert Transaction Based on Data =====
print("\n===== Task 3: Transactional Order Processing =====\n")

# 1. Fetch data variables dynamically using verified schemas (unpacking tuples)
cursor.execute("SELECT customer_id FROM customers WHERE customer_name = 'Perez and Sons';")
customer_id_row = cursor.fetchone()
customer_id = customer_id_row[0] if customer_id_row else None

cursor.execute("SELECT employee_id FROM employees WHERE first_name = 'Miranda' AND last_name = 'Harris';")
employee_id_row = cursor.fetchone()
employee_id = employee_id_row[0] if employee_id_row else None

# Find the product_ids of the 5 least expensive products
cursor.execute("SELECT product_id FROM products ORDER BY price ASC LIMIT 5;")
product_ids = [row[0] for row in cursor.fetchall()]

# Safety check to ensure lookups succeeded before starting the transaction
if customer_id is None or employee_id is None or len(product_ids) < 5:
print("Setup error: Could not locate necessary customer, employee, or product records.")
conn.close()
return

# 2. Begin the unified context transaction block
conn.execute("BEGIN TRANSACTION;")

# Create order record utilizing RETURNING clause to fetch the auto-assigned key
insert_order_query = """
INSERT INTO orders (customer_id, employee_id)
VALUES (?, ?)
RETURNING order_id;
"""
cursor.execute(insert_order_query, (customer_id, employee_id))
new_order_id = cursor.fetchone()[0] # Unpack the auto-assigned integer ID

# Insert the 5 separate line items for the target order
insert_item_query = """
INSERT INTO line_items (order_id, product_id, quantity)
VALUES (?, ?, 10);
"""
for prod_id in product_ids:
cursor.execute(insert_item_query, (new_order_id, prod_id))

# Commit all changes permanently to disk
conn.commit()
print(f" Transaction successful! Created Order ID: {new_order_id}")

# 3. Use SELECT with a JOIN to print out order metadata details (FIXED TO p.product_name)
display_order_query = """
SELECT l.line_item_id, l.quantity, p.product_name
FROM line_items l
INNER JOIN products p ON l.product_id = p.product_id
WHERE l.order_id = ?
ORDER BY l.line_item_id ASC;
"""
cursor.execute(display_order_query, (new_order_id,))
order_details = cursor.fetchall()

print(f"\nManifest for Order #{new_order_id}:")
print(f"{'Line Item ID':<12} | {'Quantity':<8} | {'Product Name':<25}")
print("-" * 51)
for item in order_details:
li_id, qty, prod_name = item
print(f"{li_id:<12} | {qty:<8} | {prod_name:<25}")


# ===== Task 4: Aggregation with HAVING =====
query_task4 = """
SELECT
e.employee_id,
e.first_name,
e.last_name,
COUNT(o.order_id) AS order_count
FROM employees e
INNER JOIN orders o ON e.employee_id = o.employee_id
GROUP BY e.employee_id
HAVING COUNT(o.order_id) > 5
ORDER BY order_count DESC;
"""

print("\n===== Task 4: Employees with More Than 5 Orders =====\n")
print(f"{'Emp ID':<8} | {'First Name':<12} | {'Last Name':<15} | {'Order Count':<12}")
print("-" * 55)

cursor.execute(query_task4)
for row in cursor.fetchall():
emp_id, f_name, l_name, order_count = row
print(f"{emp_id:<8} | {f_name:<12} | {l_name:<15} | {order_count:<12}")

# Close database connection
conn.close()

except sqlite3.Error as e:
if conn:
try:
conn.rollback()
print("🔄 Transaction successfully rolled back.")
except sqlite3.OperationalError:
pass
print(f"❌ SQLite Error Encountered: {e}")

if __name__ == "__main__":
main()
Empty file added assignment10/assignment10.py
Empty file.
4 changes: 4 additions & 0 deletions assignment4/additional_employees.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[
{"Name": "Eve", "Age": 28, "City": "Miami", "Salary": 60000},
{"Name": "Frank", "Age": 40, "City": "Seattle", "Salary": 95000}
]
186 changes: 186 additions & 0 deletions assignment4/assignment4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
import pandas as pd

# Task 1 - Create a DataFrame from a dictionary
# Create dictionary
data = {
'Name': ['Alice', 'Bob', 'Charlie'],
'Age': [25, 30, 35],
'City': ['New York', 'Los Angeles', 'Chicago']
}

# Convert the dictionary into a DataFrame using Pandas.
task1_data_frame= pd.DataFrame(data)

# Print the DataFrame to verify its creation.
print(task1_data_frame)


# Task1 - Add a new column
# Make a copy of the dataFrame
task1_with_salary = task1_data_frame.copy()

# Add a column called Salary
task1_with_salary['Salary'] = [70000, 80000, 90000]

# Print the new DataFrame
print(task1_with_salary)

# Task1 - Modify an existing column
# Make a copy of task1_with_salary in a variable named task1_older
task1_older = task1_with_salary.copy()

# Increment the Age column by 1 for each entry
task1_older['Age'] = task1_older['Age'] + 1

# Print the modified DataFrame
print(task1_older)

# Task1 - Save the DataFrame as a CSV file
# Save the task1_older DataFrame to a file named employees.csv
task1_older.to_csv('employees.csv', index= False)

# Look at the contents of the CSV file
print("CSV file created!")


# Task 2: Loading Data from CSV and JSON
# Task 2: Read data from a CSV file
# # Load the CSV file from Task 1 into a new DataFrame saved to a variable task2_employees
task2_employees = pd.read_csv('employees.csv')

# Print it and run the tests to verify the contents
print(task2_employees)

# Task 2: Read data from a JSON file

# Create a JSON file (additional_employees.json).
# # json
# [
# {"Name": "Eve", "Age": 28, "City": "Miami", "Salary": 60000},
# {"Name": "Frank", "Age": 40, "City": "Seattle", "Salary": 95000}
# ]
# Load this JSON file into a new DataFrame and assign it to the variable json_employees
# Change this line in assignment4.py
#json_employees = pd.read_json('assignment4/additional_employees.json')
json_employees = pd.read_json('additional_employees.json')

# Print the DataFrame to verify it loaded correctly and run the tests
print(json_employees)

# Task 2: Combine DataFrames
# Combine the data 'assignment4/additional_employees.json' and task2_employees,
# save it as more_employees
more_employees = pd.concat([task2_employees, json_employees], ignore_index= True)

# Print the combined Dataframe and run the tests
print(more_employees)

# Task 3: Data Inspection - Using Head, Tail, and Info Methods
# Task 3:Use the head()

# Assign the first three rows of the more_employees DataFrame to the variable first_three
first_three = more_employees.head(3)

# Print the variable
print(first_three)

# Task 3: Use the tail() method

# Assign the last two rows of the more_employees DataFrame to the variable last_two
last_two = more_employees.tail(2)

# Print the variable
print(last_two)

# Task 3: Get the shape of a Dataframe

# Assign the shape of the more_employees DataFrame to the variable employee_shape
employee_shape = more_employees.shape

# Print the variable
print(employee_shape)

# Task 3: Use the info() method

# Print a concise summary
more_employees.info()


# Task 4: Data Cleaning

# Task 4: Create a DataFrame from dirty_data.csv

# Create a DataFrame from dirty_data.csv file and assign it to the variable dirty_data
dirty_data = pd.read_csv('dirty_data.csv')

# Print dirty_data
print(dirty_data)

# Create a copy of the dirty data
clean_data = dirty_data.copy()

# Task 4: Remove duplicate rows and print
clean_data = clean_data.drop_duplicates()
print(clean_data)

# Task 4: Convert Age to numeric
clean_data['Age'] = pd.to_numeric(clean_data['Age'], errors='coerce')

# handle missing values and print
clean_data['Age'] = clean_data['Age'].fillna(clean_data['Age'].mean())
print(clean_data)

# Task 4 - Convert Salary to numeric, replace known placeholders (unknown, n/a) with NaN and print
clean_data['Salary'] = clean_data['Salary'].replace(['unknown', 'n/a'], pd.NA)
clean_data['Salary'] = pd.to_numeric(clean_data['Salary'], errors= 'coerce')
print(clean_data)


# Task 4 - Fill missing numeric values
# Fill Age which the mean
clean_data['Age'] = clean_data['Age'].fillna(clean_data['Age'].mean())

# Salary with the median
clean_data['Salary'] = clean_data['Salary'].fillna(clean_data['Salary'].median())

print(clean_data)


# Task 4 - Convert Hire Date to datetime
clean_data['Hire Date'] = pd.to_datetime(clean_data['Hire Date'], errors='coerce')

clean_data['Hire Date'] = clean_data['Hire Date'].fillna(method='ffill')
print(clean_data)

# Task 4 - Strip extra whitespace and standardize Name and Department as uppercase
clean_data['Name'] = clean_data['Name'].str.strip().str.upper()
clean_data['Department'] = clean_data['Department'].str.strip().str.upper()

print(clean_data)



























4 changes: 4 additions & 0 deletions assignment4/employees.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Name,Age,City,Salary
Alice,26,New York,70000
Bob,31,Los Angeles,80000
Charlie,36,Chicago,90000
7 changes: 7 additions & 0 deletions assignment7/school_a.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import sqlite3

# Connect to a new SQLite database
with sqlite3.connect("../db/school.db") as conn: # Create the file here, so that it is not pushed to GitHub!
print("Database created and connected successfully.")

# The "with" statement commits successful transactions and rolls back transactions which cause exceptions within the block. You must close the connection explicitly with conn.close().
Loading