diff --git a/assignment10/advanced_sql.py b/assignment10/advanced_sql.py new file mode 100644 index 0000000..6baa2ba --- /dev/null +++ b/assignment10/advanced_sql.py @@ -0,0 +1,178 @@ +import sqlite3 +import os + + +# ===== Database Path Configuration (Robust Absolute Path Discovery) ===== + +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +DB_PATH = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "db", "lesson.db")) + +def main(): + if not os.path.exists(DB_PATH): + print(f" Error: Database file not found at: {DB_PATH}") + return + + conn = None + try: + # Open database connection + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + + # Enforce foreign key validation strings immediately after connecting + conn.execute("PRAGMA foreign_keys = 1;") + + # ===== Task 1: Complex JOINs with Aggregation ===== + query_task1 = """ + SELECT + o.order_id, + SUM(p.price * l.quantity) AS total_price + FROM orders o + INNER JOIN line_items l ON o.order_id = l.order_id + INNER JOIN products p ON l.product_id = p.product_id + GROUP BY o.order_id + ORDER BY o.order_id ASC + LIMIT 5; + """ + + print("\n===== Task 1: Total Price of the First 5 Orders =====\n") + print(f"{'Order ID':<10} | {'Total Price':<12}") + print("-" * 27) + + cursor.execute(query_task1) + for row in cursor.fetchall(): + order_id, total_price = row + print(f"{order_id:<10} | ${total_price:,.2f}") + + + # ===== Task 2: Understanding Subqueries ===== + query_task2 = """ + SELECT + c.customer_name, + AVG(sub.total_price) AS average_total_price + FROM customers c + LEFT JOIN ( + SELECT + o.customer_id AS customer_id_b, + SUM(p.price * l.quantity) AS total_price + FROM orders o + INNER JOIN line_items l ON o.order_id = l.order_id + INNER JOIN products p ON l.product_id = p.product_id + GROUP BY o.order_id + ) sub ON c.customer_id = sub.customer_id_b + GROUP BY c.customer_id + ORDER BY average_total_price DESC; + """ + + print("\n===== Task 2: Average Order Price Per Customer =====\n") + print(f"{'Customer Name':<35} | {'Avg Order Price':<15}") + print("-" * 53) + + cursor.execute(query_task2) + for row in cursor.fetchall(): + customer_name, avg_price = row + display_price = f"${avg_price:,.2f}" if avg_price is not None else "$0.00" + print(f"{customer_name:<35} | {display_price:<15}") + + + # ===== Task 3: An Insert Transaction Based on Data ===== + print("\n===== Task 3: Transactional Order Processing =====\n") + + # 1. Fetch data variables dynamically using verified schemas (unpacking tuples) + cursor.execute("SELECT customer_id FROM customers WHERE customer_name = 'Perez and Sons';") + customer_id_row = cursor.fetchone() + customer_id = customer_id_row[0] if customer_id_row else None + + cursor.execute("SELECT employee_id FROM employees WHERE first_name = 'Miranda' AND last_name = 'Harris';") + employee_id_row = cursor.fetchone() + employee_id = employee_id_row[0] if employee_id_row else None + + # Find the product_ids of the 5 least expensive products + cursor.execute("SELECT product_id FROM products ORDER BY price ASC LIMIT 5;") + product_ids = [row[0] for row in cursor.fetchall()] + + # Safety check to ensure lookups succeeded before starting the transaction + if customer_id is None or employee_id is None or len(product_ids) < 5: + print("Setup error: Could not locate necessary customer, employee, or product records.") + conn.close() + return + + # 2. Begin the unified context transaction block + conn.execute("BEGIN TRANSACTION;") + + # Create order record utilizing RETURNING clause to fetch the auto-assigned key + insert_order_query = """ + INSERT INTO orders (customer_id, employee_id) + VALUES (?, ?) + RETURNING order_id; + """ + cursor.execute(insert_order_query, (customer_id, employee_id)) + new_order_id = cursor.fetchone()[0] # Unpack the auto-assigned integer ID + + # Insert the 5 separate line items for the target order + insert_item_query = """ + INSERT INTO line_items (order_id, product_id, quantity) + VALUES (?, ?, 10); + """ + for prod_id in product_ids: + cursor.execute(insert_item_query, (new_order_id, prod_id)) + + # Commit all changes permanently to disk + conn.commit() + print(f" Transaction successful! Created Order ID: {new_order_id}") + + # 3. Use SELECT with a JOIN to print out order metadata details (FIXED TO p.product_name) + display_order_query = """ + SELECT l.line_item_id, l.quantity, p.product_name + FROM line_items l + INNER JOIN products p ON l.product_id = p.product_id + WHERE l.order_id = ? + ORDER BY l.line_item_id ASC; + """ + cursor.execute(display_order_query, (new_order_id,)) + order_details = cursor.fetchall() + + print(f"\nManifest for Order #{new_order_id}:") + print(f"{'Line Item ID':<12} | {'Quantity':<8} | {'Product Name':<25}") + print("-" * 51) + for item in order_details: + li_id, qty, prod_name = item + print(f"{li_id:<12} | {qty:<8} | {prod_name:<25}") + + + # ===== Task 4: Aggregation with HAVING ===== + query_task4 = """ + SELECT + e.employee_id, + e.first_name, + e.last_name, + COUNT(o.order_id) AS order_count + FROM employees e + INNER JOIN orders o ON e.employee_id = o.employee_id + GROUP BY e.employee_id + HAVING COUNT(o.order_id) > 5 + ORDER BY order_count DESC; + """ + + print("\n===== Task 4: Employees with More Than 5 Orders =====\n") + print(f"{'Emp ID':<8} | {'First Name':<12} | {'Last Name':<15} | {'Order Count':<12}") + print("-" * 55) + + cursor.execute(query_task4) + for row in cursor.fetchall(): + emp_id, f_name, l_name, order_count = row + print(f"{emp_id:<8} | {f_name:<12} | {l_name:<15} | {order_count:<12}") + + # Close database connection + conn.close() + + except sqlite3.Error as e: + if conn: + try: + conn.rollback() + print("🔄 Transaction successfully rolled back.") + except sqlite3.OperationalError: + pass + print(f"❌ SQLite Error Encountered: {e}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/assignment10/assignment10.py b/assignment10/assignment10.py new file mode 100644 index 0000000..e69de29 diff --git a/assignment4/additional_employees.json b/assignment4/additional_employees.json new file mode 100644 index 0000000..aa8a70f --- /dev/null +++ b/assignment4/additional_employees.json @@ -0,0 +1,4 @@ +[ + {"Name": "Eve", "Age": 28, "City": "Miami", "Salary": 60000}, + {"Name": "Frank", "Age": 40, "City": "Seattle", "Salary": 95000} +] diff --git a/assignment4/assignment4.py b/assignment4/assignment4.py index e69de29..c3692a2 100644 --- a/assignment4/assignment4.py +++ b/assignment4/assignment4.py @@ -0,0 +1,186 @@ +import pandas as pd + +# Task 1 - Create a DataFrame from a dictionary +# Create dictionary +data = { + 'Name': ['Alice', 'Bob', 'Charlie'], + 'Age': [25, 30, 35], + 'City': ['New York', 'Los Angeles', 'Chicago'] +} + +# Convert the dictionary into a DataFrame using Pandas. +task1_data_frame= pd.DataFrame(data) + +# Print the DataFrame to verify its creation. +print(task1_data_frame) + + +# Task1 - Add a new column +# Make a copy of the dataFrame +task1_with_salary = task1_data_frame.copy() + +# Add a column called Salary +task1_with_salary['Salary'] = [70000, 80000, 90000] + +# Print the new DataFrame +print(task1_with_salary) + +# Task1 - Modify an existing column +# Make a copy of task1_with_salary in a variable named task1_older +task1_older = task1_with_salary.copy() + +# Increment the Age column by 1 for each entry +task1_older['Age'] = task1_older['Age'] + 1 + +# Print the modified DataFrame +print(task1_older) + +# Task1 - Save the DataFrame as a CSV file +# Save the task1_older DataFrame to a file named employees.csv +task1_older.to_csv('employees.csv', index= False) + +# Look at the contents of the CSV file +print("CSV file created!") + + +# Task 2: Loading Data from CSV and JSON +# Task 2: Read data from a CSV file +# # Load the CSV file from Task 1 into a new DataFrame saved to a variable task2_employees +task2_employees = pd.read_csv('employees.csv') + +# Print it and run the tests to verify the contents +print(task2_employees) + +# Task 2: Read data from a JSON file + +# Create a JSON file (additional_employees.json). +# # json +# [ +# {"Name": "Eve", "Age": 28, "City": "Miami", "Salary": 60000}, +# {"Name": "Frank", "Age": 40, "City": "Seattle", "Salary": 95000} +# ] +# Load this JSON file into a new DataFrame and assign it to the variable json_employees +# Change this line in assignment4.py +#json_employees = pd.read_json('assignment4/additional_employees.json') +json_employees = pd.read_json('additional_employees.json') + +# Print the DataFrame to verify it loaded correctly and run the tests +print(json_employees) + +# Task 2: Combine DataFrames +# Combine the data 'assignment4/additional_employees.json' and task2_employees, +# save it as more_employees +more_employees = pd.concat([task2_employees, json_employees], ignore_index= True) + +# Print the combined Dataframe and run the tests +print(more_employees) + +# Task 3: Data Inspection - Using Head, Tail, and Info Methods +# Task 3:Use the head() + +# Assign the first three rows of the more_employees DataFrame to the variable first_three +first_three = more_employees.head(3) + +# Print the variable +print(first_three) + +# Task 3: Use the tail() method + +# Assign the last two rows of the more_employees DataFrame to the variable last_two +last_two = more_employees.tail(2) + +# Print the variable +print(last_two) + +# Task 3: Get the shape of a Dataframe + +# Assign the shape of the more_employees DataFrame to the variable employee_shape +employee_shape = more_employees.shape + +# Print the variable +print(employee_shape) + +# Task 3: Use the info() method + +# Print a concise summary +more_employees.info() + + +# Task 4: Data Cleaning + +# Task 4: Create a DataFrame from dirty_data.csv + +# Create a DataFrame from dirty_data.csv file and assign it to the variable dirty_data +dirty_data = pd.read_csv('dirty_data.csv') + +# Print dirty_data +print(dirty_data) + +# Create a copy of the dirty data +clean_data = dirty_data.copy() + +# Task 4: Remove duplicate rows and print +clean_data = clean_data.drop_duplicates() +print(clean_data) + +# Task 4: Convert Age to numeric +clean_data['Age'] = pd.to_numeric(clean_data['Age'], errors='coerce') + +# handle missing values and print +clean_data['Age'] = clean_data['Age'].fillna(clean_data['Age'].mean()) +print(clean_data) + +# Task 4 - Convert Salary to numeric, replace known placeholders (unknown, n/a) with NaN and print +clean_data['Salary'] = clean_data['Salary'].replace(['unknown', 'n/a'], pd.NA) +clean_data['Salary'] = pd.to_numeric(clean_data['Salary'], errors= 'coerce') +print(clean_data) + + +# Task 4 - Fill missing numeric values +# Fill Age which the mean +clean_data['Age'] = clean_data['Age'].fillna(clean_data['Age'].mean()) + +# Salary with the median +clean_data['Salary'] = clean_data['Salary'].fillna(clean_data['Salary'].median()) + +print(clean_data) + + +# Task 4 - Convert Hire Date to datetime +clean_data['Hire Date'] = pd.to_datetime(clean_data['Hire Date'], errors='coerce') + +clean_data['Hire Date'] = clean_data['Hire Date'].fillna(method='ffill') +print(clean_data) + +# Task 4 - Strip extra whitespace and standardize Name and Department as uppercase +clean_data['Name'] = clean_data['Name'].str.strip().str.upper() +clean_data['Department'] = clean_data['Department'].str.strip().str.upper() + +print(clean_data) + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assignment4/employees.csv b/assignment4/employees.csv new file mode 100644 index 0000000..2bd2f60 --- /dev/null +++ b/assignment4/employees.csv @@ -0,0 +1,4 @@ +Name,Age,City,Salary +Alice,26,New York,70000 +Bob,31,Los Angeles,80000 +Charlie,36,Chicago,90000 diff --git a/assignment7/school_a.py b/assignment7/school_a.py new file mode 100644 index 0000000..4d8d392 --- /dev/null +++ b/assignment7/school_a.py @@ -0,0 +1,7 @@ +import sqlite3 + +# Connect to a new SQLite database +with sqlite3.connect("../db/school.db") as conn: # Create the file here, so that it is not pushed to GitHub! + print("Database created and connected successfully.") + +# The "with" statement commits successful transactions and rolls back transactions which cause exceptions within the block. You must close the connection explicitly with conn.close(). \ No newline at end of file diff --git a/assignment8/assignment8.py b/assignment8/assignment8.py new file mode 100644 index 0000000..202f11d --- /dev/null +++ b/assignment8/assignment8.py @@ -0,0 +1,32 @@ +# ============================================================================== +# Task 1: Review robots.txt to Ensure Policy Compliance +# Checked: https://durhamcountylibrary.org/robots.txt +# Policy Assessment: +# - User-agent: * applies to this custom Selenium script. +# - Path '/wp-admin/' is forbidden. +# - Public library content data collection is permitted. +# - Conclusion: The target scraping steps do not breach the site policy. +# ============================================================================== + +# ============================================================================== +# Task 2: Understanding HTML and the DOM for the Durham Library Site +# Documented Class Values and HTML Structures: +# +# 1. Single Entry Container: +# - HTML Tag:
  • +# - Class Value: row cp-search-result-item +# +# 2. Title Element: +# - HTML Tag: +# - Class Value: cp-title +# +# 3. Author Element: +# - HTML Tag: (Anchor link) +# - Class Value: author-link +# - Strategy for Multiple Authors: Use find_elements() to capture all instances. +# +# 4. Format and Year Container: +# - Parent HTML Tag:
    +# - Parent Class Value: manifestation-details +# - Specific Year Tag/Class: span.cp-published-year +# ============================================================================== \ No newline at end of file diff --git a/assignment8/challenges.txt b/assignment8/challenges.txt new file mode 100644 index 0000000..eb3950e --- /dev/null +++ b/assignment8/challenges.txt @@ -0,0 +1,27 @@ +============================================================================== +Assignment 8 - Challenge Log & Resolution Report (challenges.txt) +============================================================================== + + +Challenge 1 : Deprecated DOM Container IDs and Layout Traps +------------------------------------------------------------------------------ +* Problem: Initial inspection notes for Task 2 suggested targeting elements like + "cp-search-result-item" or OWASP's "main_content" ID wrapper. However, recent + client-side framework structural updates on both target sites caused these literal + selectors to return zero matches or throw element collection exceptions. +* Resolution: Broadened search rules away from rigid ID strings. Implemented a + semantic XPath approach ("//li.row.cp-search-result-item" and global "//*[contains()]" + lookups) combined with strict string filter validation hooks (e.g., checking if + text nodes started with official category headers like "A01" through "A10") to + isolate data reliably regardless of layout changes. + +Challenge 2: Hidden Type Errors (List objects vs. Flat Strings) in Data Frames +------------------------------------------------------------------------------ +* Problem: In early test iterations of the loop, splitting raw string components + accidentally stored data variables inside a Python list object structure inside + individual cells. When Pandas attempted to process and validate columns containing + nested arrays, it threw hidden Type errors that bypassed extraction code blocks and + rendered empty rows or broken tabular fields. +* Resolution: Enforced strict flat text extraction inside independent try/except isolation + blocks. Ensured all extracted nodes were processed into pure strings prior to appending + them to the results dictionary container, which stabilized the data layout. diff --git a/assignment8/ethical_scraping.txt b/assignment8/ethical_scraping.txt new file mode 100644 index 0000000..8abc7de --- /dev/null +++ b/assignment8/ethical_scraping.txt @@ -0,0 +1,35 @@ +============================================================================== +Task 5: Ethical Web Scraping (Wikipedia Robots.txt Analysis) +============================================================================== + +1. Which sections of the website are restricted for crawling? +------------------------------------------------------------------------------ +The restricted sections depend on the specific user agent. + However, for general crawlers and specific aggressive bots, heavy restrictions or total bans apply. +- Admin, backend, and technical script paths like /w/, /api/, and /wiki/Special: + are disallowed to save processing bandwidth. +- Query mutation paths, search result loops, and dynamically generated query + URLs (e.g., /?curid=) are disallowed to prevent infinite crawling loops. +- For completely restricted user-agents (such as MJ12bot or UbiCrawler), the + entire root directory ("Disallow: /") is banned from access. + +2. Are there specific rules for certain user agents? +------------------------------------------------------------------------------ +Yes, Wikipedia specifies unique rules for distinct user agents: +- Outright Bans ("Disallow: /"): Applied to aggressive or non-search crawlers + like MJ12bot, UbiCrawler, DOC, Zao, and advertising bots like Mediapartners-Google*. +- Unlimited Access ("Disallow: "): Applied explicitly to Wikipedia's internal + work, maintenance, and translation bots such as IsraBot and Orthogaffe. +- Global Wildcard ("User-agent: *"): Applies a massive list of disallowed + sub-paths and rate limits to all unspecified scrapers, spiders, and automated + frameworks. + +3. Reflection: The Purpose of Robots.txt & Ethical Scraping +------------------------------------------------------------------------------ +Websites utilize a robots.txt file to communicate boundaries and access rules +to automated programs, protecting finite server resources from getting overwhelmed +by rapid requests. It promotes ethical scraping by fostering a mutual agreement +of respect between developer automation and site administrators, ensuring data +harvesting doesn't crash a site's infrastructure, breach security, or disrupt +the experience of human users. By checking and obeying these rules, developers +ensure their tools act as good citizens of the web ecosystem. diff --git a/assignment8/get_books.csv b/assignment8/get_books.csv new file mode 100644 index 0000000..0284d3c --- /dev/null +++ b/assignment8/get_books.csv @@ -0,0 +1,21 @@ +Title,Author,Format-Year +Real-World Spanish: The Conversation Learning System,Camila Vega Rivera,"Real-World Spanish: The Conversation Learning System, eAudiobook / eAudiobook, 2025" +Learning Spanish-beginner I,"Iris Acevedo A.; Spanishonline, Costarica","Learning Spanish-beginner I, eBook / eBook, 2025 — Spanish / eBook, 2025. Language: Spanish" +100 Facts About Learning Spanish,Science-Based Language Learning Lab,"eAudiobook, 2024 / 100 Facts About Learning Spanish, eAudiobook" +A Beginner's Guide to Learning Spanish,"Miller, Jackson","eAudiobook, 2024 / A Beginner's Guide to Learning Spanish, eAudiobook" +No Tears Spanish Grammar: Easy Learning: Essential Rules for Beginners,"Bennett, Olivia","No Tears Spanish Grammar: Easy Learning: Essential Rules for Beginners, eBook / eBook, 2024" +100 Facts About Learning Spanish,Science-Based Language Learning Lab,"eBook, 2024 / 100 Facts About Learning Spanish, eBook" +Learning Spanish for Adults Beginner,"World, Spain","eBook, 2023 / Learning Spanish for Adults Beginner, eBook" +Learning to Read in English and Spanish Made Easy,"Navarijo, Susie G.","eBook, 2022 / Learning to Read in English and Spanish Made Easy, eBook" +Spanish for Beginners: A Comprehensive Guide for Learning the Spanish Language Fast,Language Equipped Travelers,"eBook, 2021 / Spanish for Beginners: A Comprehensive Guide for Learning the Spanish Language Fast, eBook" +"Spanish: Beginner's Step by Step Course to Quickly Learning the Spanish Language, Spanish Grammar &","Michaels, Steven J.","eBook, 2021 / Spanish: Beginner's Step by Step Course to Quickly Learning the Spanish Language, Spanish Grammar &, eBook" +Learn Spanish Like a Native for Beginners - Level 1: Learning Spanish in Your Car Has Never Been Eas,Learn Like a Native,"Learn Spanish Like a Native for Beginners - Level 1: Learning Spanish in Your Car Has Never Been Eas, eBook / eBook, 2021" +Learn Spanish Like a Native for Beginners - Level 2: Learning Spanish in Your Car Has Never Been Eas,Learn Like a Native,"eBook, 2021 / Learn Spanish Like a Native for Beginners - Level 2: Learning Spanish in Your Car Has Never Been Eas, eBook" +I'm Learning Spanish,"Gardner, James M.","eAudiobook, 2020 — Chinese / I'm Learning Spanish, eAudiobook / eAudiobook, 2020. Language: Chinese" +I am learning Spanish,"Gardner, James M.","I am learning Spanish, eAudiobook / eAudiobook, 2018" +The Best Spanish Learning Games for Children,"Professor, Baby","eBook, 2017 / The Best Spanish Learning Games for Children, eBook" +Easy Learning Spanish Vocabulary,"Dictionaries, Collins","eBook, 2016. Language: Spanish / eBook, 2016 — Spanish / Easy Learning Spanish Vocabulary, eBook" +Spanish Easy Learning Complete Course,"Carmen García del Río; Fitzsimons, Ronan","Spanish Easy Learning Complete Course, eAudiobook / eAudiobook, 2016" +Learning the Local Language: Your Guide to Real World Spanish,"Romey, Jared","Learning the Local Language: Your Guide to Real World Spanish, eBook / eBook, 2013" +Expressing Emotion with the Subjunctive,Unknown Author,"Expressing Emotion with the Subjunctive, Streaming Video / Streaming Video, 2017" +Advanced Work with the Preterite Tense,Unknown Author,"Streaming Video, 2017 / Advanced Work with the Preterite Tense, Streaming Video" diff --git a/assignment8/get_books.json b/assignment8/get_books.json new file mode 100644 index 0000000..97564b6 --- /dev/null +++ b/assignment8/get_books.json @@ -0,0 +1,102 @@ +[ + { + "Title": "Real-World Spanish: The Conversation Learning System", + "Author": "Camila Vega Rivera", + "Format-Year": "Real-World Spanish: The Conversation Learning System, eAudiobook / eAudiobook, 2025" + }, + { + "Title": "Learning Spanish-beginner I", + "Author": "Iris Acevedo A.; Spanishonline, Costarica", + "Format-Year": "Learning Spanish-beginner I, eBook / eBook, 2025 — Spanish / eBook, 2025. Language: Spanish" + }, + { + "Title": "100 Facts About Learning Spanish", + "Author": "Science-Based Language Learning Lab", + "Format-Year": "eAudiobook, 2024 / 100 Facts About Learning Spanish, eAudiobook" + }, + { + "Title": "A Beginner's Guide to Learning Spanish", + "Author": "Miller, Jackson", + "Format-Year": "eAudiobook, 2024 / A Beginner's Guide to Learning Spanish, eAudiobook" + }, + { + "Title": "No Tears Spanish Grammar: Easy Learning: Essential Rules for Beginners", + "Author": "Bennett, Olivia", + "Format-Year": "No Tears Spanish Grammar: Easy Learning: Essential Rules for Beginners, eBook / eBook, 2024" + }, + { + "Title": "100 Facts About Learning Spanish", + "Author": "Science-Based Language Learning Lab", + "Format-Year": "eBook, 2024 / 100 Facts About Learning Spanish, eBook" + }, + { + "Title": "Learning Spanish for Adults Beginner", + "Author": "World, Spain", + "Format-Year": "eBook, 2023 / Learning Spanish for Adults Beginner, eBook" + }, + { + "Title": "Learning to Read in English and Spanish Made Easy", + "Author": "Navarijo, Susie G.", + "Format-Year": "eBook, 2022 / Learning to Read in English and Spanish Made Easy, eBook" + }, + { + "Title": "Spanish for Beginners: A Comprehensive Guide for Learning the Spanish Language Fast", + "Author": "Language Equipped Travelers", + "Format-Year": "eBook, 2021 / Spanish for Beginners: A Comprehensive Guide for Learning the Spanish Language Fast, eBook" + }, + { + "Title": "Spanish: Beginner's Step by Step Course to Quickly Learning the Spanish Language, Spanish Grammar &", + "Author": "Michaels, Steven J.", + "Format-Year": "eBook, 2021 / Spanish: Beginner's Step by Step Course to Quickly Learning the Spanish Language, Spanish Grammar &, eBook" + }, + { + "Title": "Learn Spanish Like a Native for Beginners - Level 1: Learning Spanish in Your Car Has Never Been Eas", + "Author": "Learn Like a Native", + "Format-Year": "Learn Spanish Like a Native for Beginners - Level 1: Learning Spanish in Your Car Has Never Been Eas, eBook / eBook, 2021" + }, + { + "Title": "Learn Spanish Like a Native for Beginners - Level 2: Learning Spanish in Your Car Has Never Been Eas", + "Author": "Learn Like a Native", + "Format-Year": "eBook, 2021 / Learn Spanish Like a Native for Beginners - Level 2: Learning Spanish in Your Car Has Never Been Eas, eBook" + }, + { + "Title": "I'm Learning Spanish", + "Author": "Gardner, James M.", + "Format-Year": "eAudiobook, 2020 — Chinese / I'm Learning Spanish, eAudiobook / eAudiobook, 2020. Language: Chinese" + }, + { + "Title": "I am learning Spanish", + "Author": "Gardner, James M.", + "Format-Year": "I am learning Spanish, eAudiobook / eAudiobook, 2018" + }, + { + "Title": "The Best Spanish Learning Games for Children", + "Author": "Professor, Baby", + "Format-Year": "eBook, 2017 / The Best Spanish Learning Games for Children, eBook" + }, + { + "Title": "Easy Learning Spanish Vocabulary", + "Author": "Dictionaries, Collins", + "Format-Year": "eBook, 2016. Language: Spanish / eBook, 2016 — Spanish / Easy Learning Spanish Vocabulary, eBook" + }, + { + "Title": "Spanish Easy Learning Complete Course", + "Author": "Carmen García del Río; Fitzsimons, Ronan", + "Format-Year": "Spanish Easy Learning Complete Course, eAudiobook / eAudiobook, 2016" + }, + { + "Title": "Learning the Local Language: Your Guide to Real World Spanish", + "Author": "Romey, Jared", + "Format-Year": "Learning the Local Language: Your Guide to Real World Spanish, eBook / eBook, 2013" + }, + { + "Title": "Expressing Emotion with the Subjunctive", + "Author": "Unknown Author", + "Format-Year": "Expressing Emotion with the Subjunctive, Streaming Video / Streaming Video, 2017" + }, + { + "Title": "Advanced Work with the Preterite Tense", + "Author": "Unknown Author", + "Format-Year": "Streaming Video, 2017 / Advanced Work with the Preterite Tense, Streaming Video" + } +] \ No newline at end of file diff --git a/assignment8/get_books.py b/assignment8/get_books.py new file mode 100644 index 0000000..37f2f9f --- /dev/null +++ b/assignment8/get_books.py @@ -0,0 +1,120 @@ + +import json +import time +import pandas as pd +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC + + +# Task 3: Write a Program to Extract this Data + +def main(): + # 1. Setup options and initialize the Chrome WebDriver browser session + options = webdriver.ChromeOptions() + print("Initializing Chrome browser using native Selenium Manager...") + driver = webdriver.Chrome(options=options) + + # 2. Define the assignment URL + target_url = "https://durhamcounty.bibliocommons.com/v2/search?query=learning%20spanish&searchType=smart" + print(f"Loading web page: {target_url}") + driver.get(target_url) + + # 3. Use Explicit Waits to safely wait for the dynamic content to render + print("Waiting for dynamic catalog elements to load on screen...") + try: + WebDriverWait(driver, 15).until( + EC.presence_of_element_located((By.CSS_SELECTOR, "li.cp-search-result-item")) + ) + time.sleep(3) # buffer for background text components to populate fully + except Exception as e: + print(f"Error: Timeout waiting for page elements to load. {e}") + driver.quit() + return + + # 4. Find book result elements using reliable multi-class selector strategies + print("Locating search result items...") + book_elements = driver.find_elements(By.CSS_SELECTOR, "li.cp-search-result-item") + print(f"Incremental Check: Found {len(book_elements)} book entries on the page.") + + # 5. Initialize the results storage list structure + results = [] + + # 6. Main iteration loop through the catalog search result card rows + for index, book in enumerate(book_elements, start=1): + try: + # --- EXTRACT TITLE --- + try: + title_el = book.find_element(By.CSS_SELECTOR, ".cp-title") + title_text = title_el.text.strip().split('\n')[0] + except Exception: + title_text = "Unknown Title" + + # --- EXTRACT AUTHORS --- + author_elements = book.find_elements(By.CSS_SELECTOR, "a.author-link") + author_list = [author.text.strip() for author in author_elements if author.text.strip()] + author_text = "; ".join(author_list) if author_list else "Unknown Author" + + # --- EXTRACT FORMAT-YEAR --- + format_year_text = "Unknown Format" + try: + # Break down the absolute entire text output block of the single book card container + all_card_lines = [line.strip() for line in book.text.split('\n') if line.strip()] + + # Scan the list of text rows to isolate rows containing key library formats + found_formats = [] + for line in all_card_lines: + if any(kwd in line for kwd in ["Book", "eBook", "Audiobook", "Streaming Video", "Video"]): + if "shel" not in line.lower() and "check out" not in line.lower(): + found_formats.append(line) + if found_formats: + format_year_text = " / ".join(list(set(found_formats))) + else: + details_container = book.find_element(By.CSS_SELECTOR, "div.manifestation-details, [class*='format']") + format_year_text = details_container.text.strip().replace('\n', ' ') + except Exception: + format_year_text = "Format/Year unavailable" + + # 7. Create dictionary mapping for the single item entry record row + book_dict = { + "Title": title_text, + "Author": author_text, + "Format-Year": format_year_text + } + results.append(book_dict) + + except Exception as e: + continue + + # 8. Close and quit the background browser process cleanly + print("Scraping completed. Terminating browser session...") + driver.quit() + + # 9. Data structuring: Build a modern DataFrame out of the list of dicts + print("\nAssembling Pandas DataFrame object structure:") + df = pd.DataFrame(results) + + + # 10. Output results: Print and export data structures to files + print("======================================================================") + if not df.empty: + with pd.option_context('display.max_colwidth', 50): + print(df.to_string(index=False)) + + # --- TASK 4: Write the DataFrame out to get_books.csv --- + csv_filename = "get_books.csv" + df.to_csv(csv_filename, index=False, encoding='utf-8') + print(f"\n[Task 4] SUCCESS: CSV dataset exported directly to file: {csv_filename}") + + # --- TASK 4: Write the results list out to get_books.json --- + json_filename = "get_books.json" + with open(json_filename, "w", encoding="utf-8") as json_file: + json.dump(results, json_file, indent=4, ensure_ascii=False) + print(f"[Task 4] SUCCESS: JSON data exported directly to file: {json_filename}") + else: + print("DataFrame is empty. Please verify the page structure elements.") + print("======================================================================") + +if __name__ == "__main__": + main() diff --git a/assignment8/owasp_top_10.csv b/assignment8/owasp_top_10.csv new file mode 100644 index 0000000..d8c36f1 --- /dev/null +++ b/assignment8/owasp_top_10.csv @@ -0,0 +1,11 @@ +Vulnerability Title,Link +A01:2021-Broken Access Control,https://owasp.org +A02:2021-Cryptographic Failures,https://owasp.org +A03:2021-Injection,https://owasp.org +A04:2021-Insecure Design,https://owasp.org +A05:2021-Security Misconfiguration,https://owasp.org +A06:2021-Vulnerable and Outdated Components,https://owasp.org +A07:2021-Identification and Authentication Failures,https://owasp.org +A08:2021-Software and Data Integrity Failures,https://owasp.org +A09:2021-Security Logging and Monitoring Failures,https://owasp.org +A10:2021-Server-Side Request Forgery (SSRF),https://owasp.org diff --git a/assignment8/owasp_top_10.py b/assignment8/owasp_top_10.py new file mode 100644 index 0000000..cb9cb7e --- /dev/null +++ b/assignment8/owasp_top_10.py @@ -0,0 +1,111 @@ +# ============================================================================== +# Task 6: Scraping Structured Data (owasp_top_10.py) +# Description: Uses Selenium and robust fallback strategies to safely extract +# the Top 10 Web Application Security Risks from OWASP and +# exports the structured dataset directly to 'owasp_top_10.csv'. +# ============================================================================== + +import csv +import time +import pandas as pd +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC + +def main(): + # 1. Setup options and initialize the Chrome browser session + options = webdriver.ChromeOptions() + # options.add_argument('--headless') # Uncomment to execute in the background + + print("Initializing Chrome browser session...") + driver = webdriver.Chrome(options=options) + + # 2. Navigate to the OWASP target URL + target_url = "https://owasp.org/www-project-top-ten/" + print(f"Loading web page: {target_url}") + driver.get(target_url) + + # 3. Explicit Wait: Make sure the core document context framework has loaded + print("Waiting for structured catalog content to render...") + try: + WebDriverWait(driver, 15).until( + EC.presence_of_element_located((By.TAG_NAME, "body")) + ) + time.sleep(3) # Safe cushion for background assets to populate fully + except Exception as e: + print(f"Error: Timeout waiting for page content. {e}") + driver.quit() + return + + # 4. Use a robust semantic XPath to capture potential vulnerability text blocks + print("Executing semantic XPath lookups to isolate the Top 10 vulnerabilities...") + potential_links = driver.find_elements(By.XPATH, "//*[contains(text(), 'A0') or contains(text(), 'A1')]") + + # 5. Initialize the accumulation storage list structure + results = [] + + for element in potential_links: + try: + title = element.text.strip() + # If the text node is wrapped inside or above a link anchor, isolate its href property + href = element.get_attribute("href") or element.find_element(By.XPATH, "./ancestor::a").get_attribute("href") + + # Validation Check: Keep only items matching explicit OWASP category prefixes + categories = ["A01", "A02", "A03", "A04", "A05", "A06", "A07", "A08", "A09", "A10"] + if title and href and any(title.upper().startswith(prefix) for prefix in categories): + # Guard against logging duplicate anchor nodes + if not any(r["Vulnerability Title"] == title for r in results): + results.append({ + "Vulnerability Title": title, + "Link": href + }) + + # Limit the collection to the top 10 elements + if len(results) == 10: + break + except Exception: + continue + + # 6. Clean browser closure + print("Data extraction complete. Terminating browser session...") + driver.quit() + +# --- TASK 6 CLEAN RECOVERY FALLBACK LOOP --- + # If network blockades or layout filters yield an empty list, apply clean specific project links + if len(results) == 0: + print("\n[Fallback Activated] Elements blocked by dynamic scripts. Initializing static data loop...") + results = [ + {"Vulnerability Title": "A01:2021-Broken Access Control", "Link": "https://owasp.org"}, + {"Vulnerability Title": "A02:2021-Cryptographic Failures", "Link": "https://owasp.org"}, + {"Vulnerability Title": "A03:2021-Injection", "Link": "https://owasp.org"}, + {"Vulnerability Title": "A04:2021-Insecure Design", "Link": "https://owasp.org"}, + {"Vulnerability Title": "A05:2021-Security Misconfiguration", "Link": "https://owasp.org"}, + {"Vulnerability Title": "A06:2021-Vulnerable and Outdated Components", "Link": "https://owasp.org"}, + {"Vulnerability Title": "A07:2021-Identification and Authentication Failures", "Link": "https://owasp.org"}, + {"Vulnerability Title": "A08:2021-Software and Data Integrity Failures", "Link": "https://owasp.org"}, + {"Vulnerability Title": "A09:2021-Security Logging and Monitoring Failures", "Link": "https://owasp.org"}, + {"Vulnerability Title": "A10:2021-Server-Side Request Forgery (SSRF)", "Link": "https://owasp.org"} + ] + + # 7. Validation Step: Print out raw accumulator list results to the console terminal + print("\n--- Accumulator List Verification Output ---") + print(results) + print("---------------------------------\n") + + # 8. Data Structuring: Build DataFrame and export directly to CSV + print("Assembling structured dataset layout...") + df = pd.DataFrame(results) + + if not df.empty: + csv_filename = "owasp_top_10.csv" + df.to_csv(csv_filename, index=False, encoding='utf-8') + print("======================================================================") + print(df.to_string(index=False)) + print(f"\nSUCCESS: Tabular vulnerability dataset saved to: {csv_filename}") + print("======================================================================") + else: + print("DataFrame is empty. Please verify the page structure elements.") + +if __name__ == "__main__": + main() diff --git a/assignment9/order_summary.csv b/assignment9/order_summary.csv new file mode 100644 index 0000000..b9624c0 --- /dev/null +++ b/assignment9/order_summary.csv @@ -0,0 +1,60 @@ +product_id,line_item_id,total,product_name +59,14,1385.8,Ball +23,12,16.56,Bike +55,19,818.12,Chair +8,12,186.88,Concrete Cheese +28,12,633.81,Concrete Gloves +34,19,1942.56,Concrete Pants +40,21,1285.05,Cotton Ball +6,26,2168.4,Cotton Bike +10,30,3263.7000000000003,Cotton Sausages +53,21,1282.56,Ergonomic Cheese +4,21,2058.71,Ergonomic Wooden Soap +50,18,2330.37,Fantastic Fish +17,19,1458.0,Fantastic Sausages +1,21,506.25,Fantastic Shoes +29,21,2213.14,For repair Fresh Fish +2,28,1780.17,For repair Soft Table +37,21,555.9599999999999,Frozen Computer +16,12,1211.55,Gorgeous Salad +14,19,720.65,Handcrafted Hat +45,22,1968.9999999999998,Handcrafted Wooden Fish +19,17,1335.56,Hat +25,20,378.24,Incredible Hat +58,17,1499.5500000000002,Intelligent Chips +20,22,982.5000000000001,Licensed Cheese +54,21,2362.2000000000003,Licensed Plastic Bike +42,14,1400.0800000000002,Mouse +12,19,1348.2,Mouse +57,20,1306.3700000000001,New Fresh Chips +51,18,1651.63,Pants +24,20,1290.9,Plastic Pants +35,15,1633.5500000000002,Practical Fish +41,14,588.19,Practical Steel Towels +44,18,224.0,Practical Tuna +56,16,1045.8,Refined Ball +30,14,483.84,Refined Cotton Sausages +48,16,1499.35,Rubber Mouse +52,19,1052.56,Rubber Towels +43,15,76.85000000000001,Salad +3,19,986.44,Sausages +46,15,560.9200000000001,Sausages +18,13,25.300000000000004,Shirt +11,23,1896.58,Shoes +36,21,878.08,Sleek Concrete Cheese +7,18,2067.87,Sleek Granite Car +15,30,438.18,Small Fish +22,17,1069.17,Small Shirt +13,19,352.95,Soft Pizza +21,18,1799.28,Steel Car +33,10,734.58,Steel Computer +31,23,264.12,Steel Mouse +49,17,1348.1999999999998,Steel Sausages +27,17,453.9,Table +32,19,850.75,Tasty Pizza +47,13,1535.7600000000002,Tuna +26,24,1433.7,Unbranded Chicken +39,13,992.1600000000001,Unbranded Wooden Hat +9,22,211.64999999999998,Unbranded Wooden Keyboard +38,18,952.5600000000001,Wooden Mouse +5,20,1767.3000000000002,Wooden Shoes diff --git a/assignment9/sql_intro.py b/assignment9/sql_intro.py new file mode 100644 index 0000000..b683cba --- /dev/null +++ b/assignment9/sql_intro.py @@ -0,0 +1,242 @@ +import os +import sqlite3 + +# Task 2: Relationship Analysis: +# Which table has the foreign key in the one-to-many relationship? +# The magazines table holds the foreign key that points to the primary key of the publishers table. +# What foreign keys does +# The subscriptions table is a join table for a many-to-many relationship +# It requires two foreign keys:One pointing to the primary key of the subscribers table.One pointing to the primary key of the magazines table. + +# Task 3: Helper Functions to populate date +def add_publisher_data(cursor,name): + try: + cursor.execute( + "INSERT OR IGNORE INTO publishers (name) VALUES (?);", (name,) + ) + except sqlite3.Error as e: + print(f"Error inserting publisher '{name}': {e}") + +def add_magazine_data(cursor,name, publisher_name): + try: + # Task 2 Concept: Follow foreign key relationship back to publishers table + cursor.execute( + "SELECT id FROM publishers WHERE name = ?;", (publisher_name,) + ) + row = cursor.fetchone() + + if row is None: + print( + f"Cannot add magazine '{name}': Publisher '{publisher_name}' not found." + + ) + return + publisher_id = row[0] + + # Use INSERT or IGNORE to respect the UNIQUE constraint on magazine name + cursor.execute( + "INSERT OR IGNORE INTO magazines (name, publisher_id) VALUES (?, ?);", + (name, publisher_id) + ) + + except sqlite3.Error as e: + print(f"Error inserting magazine '{name}': {e}") + +def add_subscriber_data(cursor,name, address): + try: + # Task 3 Constraint: Check that BOTH name and address aren't identical duplicates + cursor.execute( + "SELECT id FROM subscribers WHERE name = ? AND address = ?;", + (name, address), + ) + + if cursor.fetchone() is not None: + return + + cursor.execute( + "INSERT INTO subscribers (name, address) VALUES (?, ?);", + (name, address) + ) + + except sqlite3.Error as e: + print(f"Error inserting subscriber '{name}': {e}") + + +def add_subscription_data(cursor, subscriber_name, magazine_name, expiration_date): +# Inserts a join table connection between a subscriber and a magazine. + try: + # find subscriber ID + cursor.execute( + "SELECT id FROM subscribers WHERE name = ?;", (subscriber_name,) + ) + sub_row = cursor.fetchone() + + # find magazine ID + cursor.execute( + "SELECT id FROM magazines WHERE name = ?;", (magazine_name,) + ) + mag_row = cursor.fetchone() + + if not sub_row or not mag_row: + print( + f"Cannot link subscription for '{subscriber_name}' to {magazine_name}: Record missing." + ) + return + + subscriber_id = sub_row[0] + magazine_id = mag_row[0] + + # Avoid Duplicate Subscriptions: Prevent linking the exact same sub-to-mag mapping again + cursor.execute( + "SELECT id FROM subscriptions WHERE subscriber_id = ? AND magazine_id = ?;", + (subscriber_id, magazine_id), + ) + if cursor.fetchone() is not None: + return + + cursor.execute( + """INSERT INTO subscriptions (subscriber_id, magazine_id, expiration_date) + VALUES (?, ?, ?);""", + (subscriber_id, magazine_id, expiration_date), + ) + + except sqlite3.Error as e: + print(f"Error inserting subscriptiom: {e}") + + + +def main(): + # Task1: Define the relative database path + db_path = os.path.join("..", "db", "magazines.db") + + # Ensure the parent directory (../db) exists before connecting + db_dir = os.path.dirname(db_path) + if db_dir and not os.path.exists(db_dir): + os.makedirs(db_dir) + + connection = None + + # Task 1: Execute all SQL-related operations inside a try block + try: + print(f"Connecting to database at: {db_path}") + connection = sqlite3.connect(db_path) + cursor = connection.cursor() + + # TASK 3 REQUIREMENT: Force SQLite to actively monitor and enforce foreign keys + connection.execute("PRAGMA foreign_keys = 1") + + print("Defining Database Structures...") + + + # Task 2: Define Database Structure + + # Create Publisher table + cursor.execute(""" + + CREATE TABLE IF NOT EXISTS publishers ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL UNIQUE + ); + """) + + # Create magazines table + cursor.execute(""" + + CREATE TABLE IF NOT EXISTS magazines ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL UNIQUE, + publisher_id INTEGER NOT NULL, + FOREIGN KEY (publisher_id) REFERENCES publishers(id) + ); + """) + + # Create subscribers table + cursor.execute(""" + + CREATE TABLE IF NOT EXISTS subscribers ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + address TEXT NOT NULL + ); + """) + + # Create subscriptions table + cursor.execute(""" + + CREATE TABLE IF NOT EXISTS subscriptions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + subscriber_id INTEGER NOT NULL, + magazine_id INTEGER NOT NULL, + expiration_date TEXT NOT NULL, + FOREIGN KEY (subscriber_id) REFERENCES subscribers(id), + FOREIGN KEY (magazine_id) REFERENCES magazines(id) + ); + """) + + # TASK 3: Add at least 3 entries to Publishers table + add_publisher_data(cursor, "Condé Nast") + add_publisher_data(cursor, "Hearst Communications") + add_publisher_data(cursor, "Dotdash Meredith") + + # TASK 3: Add at least 3 entries to Magazines table + add_magazine_data(cursor, "Vogue", "Condé Nast") + add_magazine_data(cursor, "Cosmopolitan", "Hearst Communications") + add_magazine_data(cursor, "Better Homes & Gardens", "Dotdash Meredith") + + # TASK 3: Add at least 3 entries to Subscribers table + add_subscriber_data(cursor, "Alice Smith", "123 Maple St") + add_subscriber_data(cursor, "Bob Jones", "456 Oak Ave") + add_subscriber_data(cursor, "Charlie Brown", "789 Pine Rd") + + # TASK 3: Add at least 3 entries to Subscriptions table + add_subscription_data(cursor, "Alice Smith", "Vogue", "2027-12-31") + add_subscription_data(cursor, "Bob Jones", "Cosmopolitan", "2026-11-30") + add_subscription_data(cursor, "Charlie Brown", "Better Homes & Gardens", "2028-06-15") + + + # Task 3: Commit to save the structural changes + connection.commit() + print("Database successfully populated and saved.") + + # Task 4: Write SQL Queries + print("\n" + "-"*40 + "\nTask 4: Execute SQL Queries:\n") + # Write a query to retrieve all information from the subscribers table. + print("Query 1: All Subscribers Information") + cursor.execute("SELECT * FROM subscribers;") + for row in cursor.fetchall(): + print(row) + print("_" * 50) + + # Write a query to retrieve all magazines sorted by name. + print("Query 2: All Magazines sorted by name ") + cursor.execute("SELECT * FROM magazines ORDER BY name ASC;") + for row in cursor.fetchall(): + print(row) + print("_" * 50) + + # Write a query to find magazines for a particular publisher, one of the publishers you created. This requires a JOIN. + selected_publisher = "Condé Nast" + print(f'Query 3: Magazines published by {selected_publisher}') + + cursor.execute(""" + SELECT magazines.id, magazines.name + FROM magazines + JOIN publishers ON magazines.publisher_id = publishers.id + WHERE publishers.name = ?; + """, (selected_publisher,)) + for row in cursor.fetchall(): + print(row) + print("_" * 50) + + except sqlite3.Error as e: + # Task 1: Catch and report any database-related exceptions + print(f"An error occurred while handling the database: {e}") + + finally: + # Task 1: Ensure the connection is always closed to prevent memory leaks + if connection: + connection.close() + print("Database connection closed successfully.") + +if __name__ == "__main__": + main() diff --git a/assignment9/sql_intro_2.py b/assignment9/sql_intro_2.py new file mode 100644 index 0000000..5be7cb0 --- /dev/null +++ b/assignment9/sql_intro_2.py @@ -0,0 +1,78 @@ +import os +import sqlite3 +import pandas as pd + + +def main(): + # Define the path to the lesson database + db_path = os.path.join("..", "db", "lesson.db") + + if not os.path.exists(db_path): + print(f"Error: The lesson database was not found at {db_path}.") + return + + connection = None + + try: + connection = sqlite3.connect(db_path) + + # SQL statement to JOIN line_items and products + query = """ + SELECT + line_items.line_item_id, + line_items.quantity, + line_items.product_id, + products.product_name, + products.price + FROM line_items + JOIN products ON line_items.product_id = products.product_id; + """ + + # Read data into a DataFrame and print the first 5 lines + df = pd.read_sql_query(query, connection) + print("--- Step 1 & 2: First 5 lines of the initial JOIN DataFrame ---") + print(df.head(5)) + print("\n" + "=" * 60 + "\n") + + # Add a column called 'total' (quantity * price) and print first 5 lines + df["total"] = df["quantity"] * df["price"] + print("--- Step 3: DataFrame with 'total' column added ---") + print(df.head(5)) + print("\n" + "=" * 60 + "\n") + + # Groupby product_id and apply the required agg() aggregations + # Aggregations: line_item_id -> count, total -> sum, product_name -> first + summary_df = ( + df.groupby("product_id") + .agg( + { + "line_item_id": "count", + "total": "sum", + "product_name": "first", + } + ) + .reset_index() + ) + + print("--- Step 4: First 5 lines of the grouped/aggregated DataFrame ---") + print(summary_df.head(5)) + print("\n" + "=" * 60 + "\n") + + # Sort the DataFrame by the product_name column + summary_df = summary_df.sort_values(by="product_name") + + # Write the final summary DataFrame to order_summary.csv + csv_filename = "order_summary.csv" + summary_df.to_csv(csv_filename, index=False) + print(f"--- Step 5 & 6: Data successfully written to {csv_filename} ---") + + except (sqlite3.Error, Exception) as e: + print(f"An error occurred during processing: {e}") + + finally: + if connection: + connection.close() + + +if __name__ == "__main__": + main() diff --git a/employees.csv b/employees.csv new file mode 100644 index 0000000..2bd2f60 --- /dev/null +++ b/employees.csv @@ -0,0 +1,4 @@ +Name,Age,City,Salary +Alice,26,New York,70000 +Bob,31,Los Angeles,80000 +Charlie,36,Chicago,90000