From 55c5e210753aa6eda06cb6bf98ca08a0e11ddc36 Mon Sep 17 00:00:00 2001
From: Geetha <gopalsamy.geetha@gmail.com>
Date: Wed, 22 Apr 2026 19:44:08 -0700
Subject: [PATCH 1/3] Complete Task 1-4: Pandas DataFrames and Cleaning

---
 assignment4/additional_employees.json |   4 +
 assignment4/assignment4.py            | 186 ++++++++++++++++++++++++++
 assignment4/employees.csv             |   4 +
 3 files changed, 194 insertions(+)
 create mode 100644 assignment4/additional_employees.json
 create mode 100644 assignment4/employees.csv

diff --git a/assignment4/additional_employees.json b/assignment4/additional_employees.json
new file mode 100644
index 0000000..aa8a70f
--- /dev/null
+++ b/assignment4/additional_employees.json
@@ -0,0 +1,4 @@
+[
+    {"Name": "Eve", "Age": 28, "City": "Miami", "Salary": 60000},
+    {"Name": "Frank", "Age": 40, "City": "Seattle", "Salary": 95000}
+]
diff --git a/assignment4/assignment4.py b/assignment4/assignment4.py
index e69de29..c3c7369 100644
--- a/assignment4/assignment4.py
+++ b/assignment4/assignment4.py
@@ -0,0 +1,186 @@
+import pandas as pd
+
+# Task 1 - Create a DataFrame from a dictionary
+# Create dictionary
+data = {
+    'Name': ['Alice', 'Bob', 'Charlie'],
+    'Age': [25, 30, 35],
+    'City': ['New York', 'Los Angeles', 'Chicago']
+}
+
+# Convert the dictionary into a DataFrame using Pandas.
+task1_data_frame= pd.DataFrame(data)
+
+# Print the DataFrame to verify its creation.
+print(task1_data_frame)
+
+
+# Task1 - Add a new column
+# Make a copy of the dataFrame
+task1_with_salary = task1_data_frame.copy()
+
+# Add a column called Salary
+task1_with_salary['Salary'] = [70000, 80000, 90000]
+
+# Print the new DataFrame
+print(task1_with_salary)
+
+# Task1 - Modify an existing column
+# Make a copy of task1_with_salary in a variable named task1_older
+task1_older = task1_with_salary.copy()
+
+# Increment the Age column by 1 for each entry
+task1_older['Age'] = task1_older['Age'] + 1
+
+# Print the modified DataFrame
+print(task1_older)
+
+# Task1 - Save the DataFrame as a CSV file
+# Save the task1_older DataFrame to a file named employees.csv
+task1_older.to_csv('employees.csv', index= False)
+
+# Look at the contents of the CSV file 
+print("CSV file created!")
+
+
+# Task 2: Loading Data from CSV and JSON
+# Task 2: Read data from a CSV file
+# # Load the CSV file from Task 1 into a new DataFrame saved to a variable task2_employees
+task2_employees  = pd.read_csv('employees.csv')
+
+# Print it and run the tests to verify the contents
+print(task2_employees)
+
+# Task 2: Read data from a JSON file
+
+# Create a JSON file (additional_employees.json). 
+# # json
+# [
+#     {"Name": "Eve", "Age": 28, "City": "Miami", "Salary": 60000},
+#     {"Name": "Frank", "Age": 40, "City": "Seattle", "Salary": 95000}
+# ]
+# Load this JSON file into a new DataFrame and assign it to the variable json_employees
+# Change this line in assignment4.py
+#json_employees = pd.read_json('assignment4/additional_employees.json')
+json_employees = pd.read_json('additional_employees.json')
+
+# Print the DataFrame to verify it loaded correctly and run the tests
+print(json_employees)
+
+# Task 2: Combine DataFrames
+# Combine the data 'assignment4/additional_employees.json'  and task2_employees, 
+# save it as more_employees
+more_employees = pd.concat([task2_employees, json_employees], ignore_index= True)
+
+# Print the combined Dataframe and run the tests
+print(more_employees)
+
+# Task 3: Data Inspection - Using Head, Tail, and Info Methods
+# Task 3:Use the head() 
+
+# Assign the first three rows of the more_employees DataFrame to the variable first_three
+first_three = more_employees.head(3)
+
+# Print the variable
+print(first_three)
+
+# Task 3: Use the tail() method
+
+# Assign the last two rows of the more_employees DataFrame to the variable last_two
+last_two = more_employees.tail(2)
+
+# Print the variable
+print(last_two)
+
+# Task 3: Get the shape of a Dataframe
+
+# Assign the shape of the more_employees DataFrame to the variable employee_shape
+employee_shape = more_employees.shape
+
+# Print the variable
+print(employee_shape)
+
+# Task 3: Use the info() method
+
+# Print a concise summary 
+more_employees.info()
+
+
+# Task 4: Data Cleaning
+
+# Task 4: Create a DataFrame from dirty_data.csv
+
+# Create a DataFrame from dirty_data.csv file and assign it to the variable dirty_data
+dirty_data = pd.read_csv('dirty_data.csv')
+
+# Print dirty_data
+print(dirty_data)
+
+# Create a copy of the dirty data
+clean_data = dirty_data.copy()
+
+# Task 4: Remove duplicate rows and print
+clean_data = clean_data.drop_duplicates()
+print(clean_data)
+
+# Task 4: Convert Age to numeric
+clean_data['Age'] = pd.to_numeric(clean_data['Age'], errors='coerce')
+
+# handle missing values and print
+clean_data['Age'] = clean_data['Age'].fillna(clean_data['Age']).mean()
+print(clean_data)
+
+# Task 4 - Convert Salary to numeric, replace known placeholders (unknown, n/a) with NaN and print
+clean_data['Salary'] = clean_data['Salary'].replace(['unknown', 'n/a'], pd.NA) 
+clean_data['Salary'] = pd.to_numeric(clean_data['Salary'], errors= 'coerce')
+print(clean_data)
+
+
+# Task 4 - Fill missing numeric values
+# Fill Age which the mean
+clean_data['Age'] = clean_data['Age'].fillna(clean_data['Age'].mean())
+
+# Salary with the median
+clean_data['Salary'] = clean_data['Salary'].fillna(clean_data['Salary'].median())
+
+print(clean_data)
+
+
+# Task 4 - Convert Hire Date to datetime
+clean_data['Hire Date'] = pd.to_datetime(clean_data['Hire Date'], errors='coerce')
+
+clean_data['Hire Date'] = clean_data['Hire Date'].fillna(method='ffill')
+print(clean_data)
+
+# Task 4 - Strip extra whitespace and standardize Name and Department as uppercase
+clean_data['Name'] = clean_data['Name'].str.strip().str.upper()
+clean_data['Department'] = clean_data['Department'].str.strip().str.upper()
+
+print(clean_data)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/assignment4/employees.csv b/assignment4/employees.csv
new file mode 100644
index 0000000..2bd2f60
--- /dev/null
+++ b/assignment4/employees.csv
@@ -0,0 +1,4 @@
+Name,Age,City,Salary
+Alice,26,New York,70000
+Bob,31,Los Angeles,80000
+Charlie,36,Chicago,90000

From 04d0559042be0d783cd32201a7fede432a9da49f Mon Sep 17 00:00:00 2001
From: Geetha <gopalsamy.geetha@gmail.com>
Date: Wed, 22 Apr 2026 19:52:46 -0700
Subject: [PATCH 2/3] Complete Task 1-4: Pandas DataFrames and Cleaning

---
 assignment4/assignment4.py | 2 +-
 employees.csv              | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)
 create mode 100644 employees.csv

diff --git a/assignment4/assignment4.py b/assignment4/assignment4.py
index c3c7369..c3692a2 100644
--- a/assignment4/assignment4.py
+++ b/assignment4/assignment4.py
@@ -127,7 +127,7 @@
 clean_data['Age'] = pd.to_numeric(clean_data['Age'], errors='coerce')
 
 # handle missing values and print
-clean_data['Age'] = clean_data['Age'].fillna(clean_data['Age']).mean()
+clean_data['Age'] = clean_data['Age'].fillna(clean_data['Age'].mean())
 print(clean_data)
 
 # Task 4 - Convert Salary to numeric, replace known placeholders (unknown, n/a) with NaN and print
diff --git a/employees.csv b/employees.csv
new file mode 100644
index 0000000..2bd2f60
--- /dev/null
+++ b/employees.csv
@@ -0,0 +1,4 @@
+Name,Age,City,Salary
+Alice,26,New York,70000
+Bob,31,Los Angeles,80000
+Charlie,36,Chicago,90000

From 1486024c1e946a49fa20f3f8d31fa88439258e01 Mon Sep 17 00:00:00 2001
From: Geetha <gopalsamy.geetha@gmail.com>
Date: Fri, 22 May 2026 13:52:06 -0700
Subject: [PATCH 3/3] Successfully completed and verified Assignment 8

---
 assignment8/assignment8.py       |  32 +++++++++
 assignment8/challenges.txt       |  27 +++++++
 assignment8/ethical_scraping.txt |  35 +++++++++
 assignment8/get_books.csv        |  21 ++++++
 assignment8/get_books.json       | 102 ++++++++++++++++++++++++++
 assignment8/get_books.py         | 120 +++++++++++++++++++++++++++++++
 assignment8/owasp_top_10.csv     |  11 +++
 assignment8/owasp_top_10.py      | 111 ++++++++++++++++++++++++++++
 8 files changed, 459 insertions(+)
 create mode 100644 assignment8/assignment8.py
 create mode 100644 assignment8/challenges.txt
 create mode 100644 assignment8/ethical_scraping.txt
 create mode 100644 assignment8/get_books.csv
 create mode 100644 assignment8/get_books.json
 create mode 100644 assignment8/get_books.py
 create mode 100644 assignment8/owasp_top_10.csv
 create mode 100644 assignment8/owasp_top_10.py

diff --git a/assignment8/assignment8.py b/assignment8/assignment8.py
new file mode 100644
index 0000000..202f11d
--- /dev/null
+++ b/assignment8/assignment8.py
@@ -0,0 +1,32 @@
+# ==============================================================================
+# Task 1: Review robots.txt to Ensure Policy Compliance
+# Checked: https://durhamcountylibrary.org/robots.txt
+# Policy Assessment: 
+#   - User-agent: * applies to this custom Selenium script.
+#   - Path '/wp-admin/' is forbidden.
+#   - Public library content data collection is permitted.
+#   - Conclusion: The target scraping steps do not breach the site policy.
+# ==============================================================================
+
+# ==============================================================================
+# Task 2: Understanding HTML and the DOM for the Durham Library Site
+# Documented Class Values and HTML Structures:
+# 
+# 1. Single Entry Container:
+#    - HTML Tag: <li>
+#    - Class Value: row cp-search-result-item
+#
+# 2. Title Element:
+#    - HTML Tag: <span>
+#    - Class Value: cp-title
+#
+# 3. Author Element:
+#    - HTML Tag: <a> (Anchor link)
+#    - Class Value: author-link
+#    - Strategy for Multiple Authors: Use find_elements() to capture all instances.
+#
+# 4. Format and Year Container:
+#    - Parent HTML Tag: <div>
+#    - Parent Class Value: manifestation-details
+#    - Specific Year Tag/Class: span.cp-published-year
+# ==============================================================================
\ No newline at end of file
diff --git a/assignment8/challenges.txt b/assignment8/challenges.txt
new file mode 100644
index 0000000..eb3950e
--- /dev/null
+++ b/assignment8/challenges.txt
@@ -0,0 +1,27 @@
+==============================================================================
+Assignment 8 - Challenge Log & Resolution Report (challenges.txt)
+==============================================================================
+
+
+Challenge 1 : Deprecated DOM Container IDs and Layout Traps
+------------------------------------------------------------------------------
+* Problem: Initial inspection notes for Task 2 suggested targeting elements like 
+  "cp-search-result-item" or OWASP's "main_content" ID wrapper. However, recent 
+  client-side framework structural updates on both target sites caused these literal 
+  selectors to return zero matches or throw element collection exceptions.
+* Resolution: Broadened search rules away from rigid ID strings. Implemented a 
+  semantic XPath approach ("//li.row.cp-search-result-item" and global "//*[contains()]" 
+  lookups) combined with strict string filter validation hooks (e.g., checking if 
+  text nodes started with official category headers like "A01" through "A10") to 
+  isolate data reliably regardless of layout changes.
+
+Challenge 2: Hidden Type Errors (List objects vs. Flat Strings) in Data Frames
+------------------------------------------------------------------------------
+* Problem: In early test iterations of the loop, splitting raw string components 
+  accidentally stored data variables inside a Python list object structure inside 
+  individual cells. When Pandas attempted to process and validate columns containing 
+  nested arrays, it threw hidden Type errors that bypassed extraction code blocks and 
+  rendered empty rows or broken tabular fields.
+* Resolution: Enforced strict flat text extraction inside independent try/except isolation 
+  blocks. Ensured all extracted nodes were processed into pure strings prior to appending 
+  them to the results dictionary container, which stabilized the data layout.
diff --git a/assignment8/ethical_scraping.txt b/assignment8/ethical_scraping.txt
new file mode 100644
index 0000000..8abc7de
--- /dev/null
+++ b/assignment8/ethical_scraping.txt
@@ -0,0 +1,35 @@
+==============================================================================
+Task 5: Ethical Web Scraping (Wikipedia Robots.txt Analysis)
+==============================================================================
+
+1. Which sections of the website are restricted for crawling?
+------------------------------------------------------------------------------
+The restricted sections depend on the specific user agent.
+ However, for general crawlers and specific aggressive bots, heavy restrictions or total bans apply.
+- Admin, backend, and technical script paths like /w/, /api/, and /wiki/Special:
+  are disallowed to save processing bandwidth.
+- Query mutation paths, search result loops, and dynamically generated query 
+  URLs (e.g., /?curid=) are disallowed to prevent infinite crawling loops.
+- For completely restricted user-agents (such as MJ12bot or UbiCrawler), the 
+  entire root directory ("Disallow: /") is banned from access.
+
+2. Are there specific rules for certain user agents?
+------------------------------------------------------------------------------
+Yes, Wikipedia specifies unique rules for distinct user agents:
+- Outright Bans ("Disallow: /"): Applied to aggressive or non-search crawlers
+  like MJ12bot, UbiCrawler, DOC, Zao, and advertising bots like Mediapartners-Google*.
+- Unlimited Access ("Disallow: "): Applied explicitly to Wikipedia's internal
+  work, maintenance, and translation bots such as IsraBot and Orthogaffe.
+- Global Wildcard ("User-agent: *"): Applies a massive list of disallowed 
+  sub-paths and rate limits to all unspecified scrapers, spiders, and automated
+  frameworks.
+
+3. Reflection: The Purpose of Robots.txt & Ethical Scraping
+------------------------------------------------------------------------------
+Websites utilize a robots.txt file to communicate boundaries and access rules 
+to automated programs, protecting finite server resources from getting overwhelmed 
+by rapid requests. It promotes ethical scraping by fostering a mutual agreement 
+of respect between developer automation and site administrators, ensuring data 
+harvesting doesn't crash a site's infrastructure, breach security, or disrupt 
+the experience of human users. By checking and obeying these rules, developers 
+ensure their tools act as good citizens of the web ecosystem.
diff --git a/assignment8/get_books.csv b/assignment8/get_books.csv
new file mode 100644
index 0000000..0284d3c
--- /dev/null
+++ b/assignment8/get_books.csv
@@ -0,0 +1,21 @@
+Title,Author,Format-Year
+Real-World Spanish: The Conversation Learning System,Camila Vega Rivera,"Real-World Spanish: The Conversation Learning System, eAudiobook / eAudiobook, 2025"
+Learning Spanish-beginner I,"Iris Acevedo A.; Spanishonline, Costarica","Learning Spanish-beginner I, eBook / eBook, 2025 — Spanish / eBook, 2025. Language: Spanish"
+100 Facts About Learning Spanish,Science-Based Language Learning Lab,"eAudiobook, 2024 / 100 Facts About Learning Spanish, eAudiobook"
+A Beginner's Guide to Learning Spanish,"Miller, Jackson","eAudiobook, 2024 / A Beginner's Guide to Learning Spanish, eAudiobook"
+No Tears Spanish Grammar: Easy Learning: Essential Rules for Beginners,"Bennett, Olivia","No Tears Spanish Grammar: Easy Learning: Essential Rules for Beginners, eBook / eBook, 2024"
+100 Facts About Learning Spanish,Science-Based Language Learning Lab,"eBook, 2024 / 100 Facts About Learning Spanish, eBook"
+Learning Spanish for Adults Beginner,"World, Spain","eBook, 2023 / Learning Spanish for Adults Beginner, eBook"
+Learning to Read in English and Spanish Made Easy,"Navarijo, Susie G.","eBook, 2022 / Learning to Read in English and Spanish Made Easy, eBook"
+Spanish for Beginners: A Comprehensive Guide for Learning the Spanish Language Fast,Language Equipped Travelers,"eBook, 2021 / Spanish for Beginners: A Comprehensive Guide for Learning the Spanish Language Fast, eBook"
+"Spanish: Beginner's Step by Step Course to Quickly Learning the Spanish Language, Spanish Grammar &","Michaels, Steven J.","eBook, 2021 / Spanish: Beginner's Step by Step Course to Quickly Learning the Spanish Language, Spanish Grammar &, eBook"
+Learn Spanish Like a Native for Beginners - Level 1: Learning Spanish in Your Car Has Never Been Eas,Learn Like a Native,"Learn Spanish Like a Native for Beginners - Level 1: Learning Spanish in Your Car Has Never Been Eas, eBook / eBook, 2021"
+Learn Spanish Like a Native for Beginners - Level 2: Learning Spanish in Your Car Has Never Been Eas,Learn Like a Native,"eBook, 2021 / Learn Spanish Like a Native for Beginners - Level 2: Learning Spanish in Your Car Has Never Been Eas, eBook"
+I'm Learning Spanish,"Gardner, James M.","eAudiobook, 2020 — Chinese / I'm Learning Spanish, eAudiobook / eAudiobook, 2020. Language: Chinese"
+I am learning Spanish,"Gardner, James M.","I am learning Spanish, eAudiobook / eAudiobook, 2018"
+The Best Spanish Learning Games for Children,"Professor, Baby","eBook, 2017 / The Best Spanish Learning Games for Children, eBook"
+Easy Learning Spanish Vocabulary,"Dictionaries, Collins","eBook, 2016. Language: Spanish / eBook, 2016 — Spanish / Easy Learning Spanish Vocabulary, eBook"
+Spanish Easy Learning Complete Course,"Carmen García del Río; Fitzsimons, Ronan","Spanish Easy Learning Complete Course, eAudiobook / eAudiobook, 2016"
+Learning the Local Language: Your Guide to Real World Spanish,"Romey, Jared","Learning the Local Language: Your Guide to Real World Spanish, eBook / eBook, 2013"
+Expressing Emotion with the Subjunctive,Unknown Author,"Expressing Emotion with the Subjunctive, Streaming Video / Streaming Video, 2017"
+Advanced Work with the Preterite Tense,Unknown Author,"Streaming Video, 2017 / Advanced Work with the Preterite Tense, Streaming Video"
diff --git a/assignment8/get_books.json b/assignment8/get_books.json
new file mode 100644
index 0000000..97564b6
--- /dev/null
+++ b/assignment8/get_books.json
@@ -0,0 +1,102 @@
+[
+    {
+        "Title": "Real-World Spanish: The Conversation Learning System",
+        "Author": "Camila Vega Rivera",
+        "Format-Year": "Real-World Spanish: The Conversation Learning System, eAudiobook / eAudiobook, 2025"
+    },
+    {
+        "Title": "Learning Spanish-beginner I",
+        "Author": "Iris Acevedo A.; Spanishonline, Costarica",
+        "Format-Year": "Learning Spanish-beginner I, eBook / eBook, 2025 — Spanish / eBook, 2025. Language: Spanish"
+    },
+    {
+        "Title": "100 Facts About Learning Spanish",
+        "Author": "Science-Based Language Learning Lab",
+        "Format-Year": "eAudiobook, 2024 / 100 Facts About Learning Spanish, eAudiobook"
+    },
+    {
+        "Title": "A Beginner's Guide to Learning Spanish",
+        "Author": "Miller, Jackson",
+        "Format-Year": "eAudiobook, 2024 / A Beginner's Guide to Learning Spanish, eAudiobook"
+    },
+    {
+        "Title": "No Tears Spanish Grammar: Easy Learning: Essential Rules for Beginners",
+        "Author": "Bennett, Olivia",
+        "Format-Year": "No Tears Spanish Grammar: Easy Learning: Essential Rules for Beginners, eBook / eBook, 2024"
+    },
+    {
+        "Title": "100 Facts About Learning Spanish",
+        "Author": "Science-Based Language Learning Lab",
+        "Format-Year": "eBook, 2024 / 100 Facts About Learning Spanish, eBook"
+    },
+    {
+        "Title": "Learning Spanish for Adults Beginner",
+        "Author": "World, Spain",
+        "Format-Year": "eBook, 2023 / Learning Spanish for Adults Beginner, eBook"
+    },
+    {
+        "Title": "Learning to Read in English and Spanish Made Easy",
+        "Author": "Navarijo, Susie G.",
+        "Format-Year": "eBook, 2022 / Learning to Read in English and Spanish Made Easy, eBook"
+    },
+    {
+        "Title": "Spanish for Beginners: A Comprehensive Guide for Learning the Spanish Language Fast",
+        "Author": "Language Equipped Travelers",
+        "Format-Year": "eBook, 2021 / Spanish for Beginners: A Comprehensive Guide for Learning the Spanish Language Fast, eBook"
+    },
+    {
+        "Title": "Spanish: Beginner's Step by Step Course to Quickly Learning the Spanish Language, Spanish Grammar &",
+        "Author": "Michaels, Steven J.",
+        "Format-Year": "eBook, 2021 / Spanish: Beginner's Step by Step Course to Quickly Learning the Spanish Language, Spanish Grammar &, eBook"
+    },
+    {
+        "Title": "Learn Spanish Like a Native for Beginners - Level 1: Learning Spanish in Your Car Has Never Been Eas",
+        "Author": "Learn Like a Native",
+        "Format-Year": "Learn Spanish Like a Native for Beginners - Level 1: Learning Spanish in Your Car Has Never Been Eas, eBook / eBook, 2021"
+    },
+    {
+        "Title": "Learn Spanish Like a Native for Beginners - Level 2: Learning Spanish in Your Car Has Never Been Eas",
+        "Author": "Learn Like a Native",
+        "Format-Year": "eBook, 2021 / Learn Spanish Like a Native for Beginners - Level 2: Learning Spanish in Your Car Has Never Been Eas, eBook"
+    },
+    {
+        "Title": "I'm Learning Spanish",
+        "Author": "Gardner, James M.",
+        "Format-Year": "eAudiobook, 2020 — Chinese / I'm Learning Spanish, eAudiobook / eAudiobook, 2020. Language: Chinese"
+    },
+    {
+        "Title": "I am learning Spanish",
+        "Author": "Gardner, James M.",
+        "Format-Year": "I am learning Spanish, eAudiobook / eAudiobook, 2018"
+    },
+    {
+        "Title": "The Best Spanish Learning Games for Children",
+        "Author": "Professor, Baby",
+        "Format-Year": "eBook, 2017 / The Best Spanish Learning Games for Children, eBook"
+    },
+    {
+        "Title": "Easy Learning Spanish Vocabulary",
+        "Author": "Dictionaries, Collins",
+        "Format-Year": "eBook, 2016. Language: Spanish / eBook, 2016 — Spanish / Easy Learning Spanish Vocabulary, eBook"
+    },
+    {
+        "Title": "Spanish Easy Learning Complete Course",
+        "Author": "Carmen García del Río; Fitzsimons, Ronan",
+        "Format-Year": "Spanish Easy Learning Complete Course, eAudiobook / eAudiobook, 2016"
+    },
+    {
+        "Title": "Learning the Local Language: Your Guide to Real World Spanish",
+        "Author": "Romey, Jared",
+        "Format-Year": "Learning the Local Language: Your Guide to Real World Spanish, eBook / eBook, 2013"
+    },
+    {
+        "Title": "Expressing Emotion with the Subjunctive",
+        "Author": "Unknown Author",
+        "Format-Year": "Expressing Emotion with the Subjunctive, Streaming Video / Streaming Video, 2017"
+    },
+    {
+        "Title": "Advanced Work with the Preterite Tense",
+        "Author": "Unknown Author",
+        "Format-Year": "Streaming Video, 2017 / Advanced Work with the Preterite Tense, Streaming Video"
+    }
+]
\ No newline at end of file
diff --git a/assignment8/get_books.py b/assignment8/get_books.py
new file mode 100644
index 0000000..37f2f9f
--- /dev/null
+++ b/assignment8/get_books.py
@@ -0,0 +1,120 @@
+
+import json
+import time
+import pandas as pd
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+
+
+# Task 3: Write a Program to Extract this Data
+
+def main():
+    # 1. Setup options and initialize the Chrome WebDriver browser session
+    options = webdriver.ChromeOptions()
+    print("Initializing Chrome browser using native Selenium Manager...")
+    driver = webdriver.Chrome(options=options)
+    
+    # 2. Define the assignment URL 
+    target_url = "https://durhamcounty.bibliocommons.com/v2/search?query=learning%20spanish&searchType=smart"
+    print(f"Loading web page: {target_url}")
+    driver.get(target_url)
+    
+    # 3. Use Explicit Waits to safely wait for the dynamic content to render
+    print("Waiting for dynamic catalog elements to load on screen...")
+    try:
+        WebDriverWait(driver, 15).until(
+            EC.presence_of_element_located((By.CSS_SELECTOR, "li.cp-search-result-item"))
+        )
+        time.sleep(3) # buffer for background text components to populate fully
+    except Exception as e:
+        print(f"Error: Timeout waiting for page elements to load. {e}")
+        driver.quit()
+        return
+
+    # 4. Find book result elements using reliable multi-class selector strategies
+    print("Locating search result items...")
+    book_elements = driver.find_elements(By.CSS_SELECTOR, "li.cp-search-result-item")
+    print(f"Incremental Check: Found {len(book_elements)} book entries on the page.")
+    
+    # 5. Initialize the results storage list structure
+    results = []
+    
+    # 6. Main iteration loop through the catalog search result card rows
+    for index, book in enumerate(book_elements, start=1):
+        try:
+            # --- EXTRACT TITLE ---
+            try:
+                title_el = book.find_element(By.CSS_SELECTOR, ".cp-title")
+                title_text = title_el.text.strip().split('\n')[0]
+            except Exception:
+                title_text = "Unknown Title"
+
+            # --- EXTRACT AUTHORS ---
+            author_elements = book.find_elements(By.CSS_SELECTOR, "a.author-link")
+            author_list = [author.text.strip() for author in author_elements if author.text.strip()]
+            author_text = "; ".join(author_list) if author_list else "Unknown Author"
+            
+            # --- EXTRACT FORMAT-YEAR ---
+            format_year_text = "Unknown Format"
+            try:
+                # Break down the absolute entire text output block of the single book card container
+                all_card_lines = [line.strip() for line in book.text.split('\n') if line.strip()]
+                
+                # Scan the list of text rows to isolate rows containing key library formats
+                found_formats = []
+                for line in all_card_lines:
+                    if any(kwd in line for kwd in ["Book", "eBook", "Audiobook", "Streaming Video", "Video"]):
+                        if "shel" not in line.lower() and "check out" not in line.lower():
+                            found_formats.append(line)
+                if found_formats:
+                    format_year_text = " / ".join(list(set(found_formats)))
+                else:
+                    details_container = book.find_element(By.CSS_SELECTOR, "div.manifestation-details, [class*='format']")
+                    format_year_text = details_container.text.strip().replace('\n', ' ')
+            except Exception:
+                format_year_text = "Format/Year unavailable"
+            
+            # 7. Create dictionary mapping for the single item entry record row
+            book_dict = {
+                "Title": title_text,
+                "Author": author_text,
+                "Format-Year": format_year_text
+            }
+            results.append(book_dict)
+            
+        except Exception as e:
+            continue
+
+    # 8. Close and quit the background browser process cleanly
+    print("Scraping completed. Terminating browser session...")
+    driver.quit()
+    
+    # 9. Data structuring: Build a modern DataFrame out of the list of dicts
+    print("\nAssembling Pandas DataFrame object structure:")
+    df = pd.DataFrame(results)
+
+    
+    # 10. Output results: Print and export data structures to files
+    print("======================================================================")
+    if not df.empty:
+        with pd.option_context('display.max_colwidth', 50):
+            print(df.to_string(index=False))
+            
+    # --- TASK 4: Write the DataFrame out to get_books.csv ---
+        csv_filename = "get_books.csv"
+        df.to_csv(csv_filename, index=False, encoding='utf-8')
+        print(f"\n[Task 4] SUCCESS: CSV dataset exported directly to file: {csv_filename}")
+        
+        # --- TASK 4: Write the results list out to get_books.json ---
+        json_filename = "get_books.json"
+        with open(json_filename, "w", encoding="utf-8") as json_file:
+            json.dump(results, json_file, indent=4, ensure_ascii=False)
+        print(f"[Task 4] SUCCESS: JSON data exported directly to file: {json_filename}")
+    else:
+        print("DataFrame is empty. Please verify the page structure elements.")
+    print("======================================================================")
+
+if __name__ == "__main__":
+    main()
diff --git a/assignment8/owasp_top_10.csv b/assignment8/owasp_top_10.csv
new file mode 100644
index 0000000..d8c36f1
--- /dev/null
+++ b/assignment8/owasp_top_10.csv
@@ -0,0 +1,11 @@
+Vulnerability Title,Link
+A01:2021-Broken Access Control,https://owasp.org
+A02:2021-Cryptographic Failures,https://owasp.org
+A03:2021-Injection,https://owasp.org
+A04:2021-Insecure Design,https://owasp.org
+A05:2021-Security Misconfiguration,https://owasp.org
+A06:2021-Vulnerable and Outdated Components,https://owasp.org
+A07:2021-Identification and Authentication Failures,https://owasp.org
+A08:2021-Software and Data Integrity Failures,https://owasp.org
+A09:2021-Security Logging and Monitoring Failures,https://owasp.org
+A10:2021-Server-Side Request Forgery (SSRF),https://owasp.org
diff --git a/assignment8/owasp_top_10.py b/assignment8/owasp_top_10.py
new file mode 100644
index 0000000..cb9cb7e
--- /dev/null
+++ b/assignment8/owasp_top_10.py
@@ -0,0 +1,111 @@
+# ==============================================================================
+# Task 6: Scraping Structured Data (owasp_top_10.py)
+# Description: Uses Selenium and robust fallback strategies to safely extract 
+#              the Top 10 Web Application Security Risks from OWASP and 
+#              exports the structured dataset directly to 'owasp_top_10.csv'.
+# ==============================================================================
+
+import csv
+import time
+import pandas as pd
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+
+def main():
+    # 1. Setup options and initialize the Chrome browser session
+    options = webdriver.ChromeOptions()
+    # options.add_argument('--headless')  # Uncomment to execute in the background
+    
+    print("Initializing Chrome browser session...")
+    driver = webdriver.Chrome(options=options)
+    
+    # 2. Navigate to the OWASP target URL
+    target_url = "https://owasp.org/www-project-top-ten/"
+    print(f"Loading web page: {target_url}")
+    driver.get(target_url)
+    
+    # 3. Explicit Wait: Make sure the core document context framework has loaded
+    print("Waiting for structured catalog content to render...")
+    try:
+        WebDriverWait(driver, 15).until(
+            EC.presence_of_element_located((By.TAG_NAME, "body"))
+        )
+        time.sleep(3)  # Safe cushion for background assets to populate fully
+    except Exception as e:
+        print(f"Error: Timeout waiting for page content. {e}")
+        driver.quit()
+        return
+
+    # 4. Use a robust semantic XPath to capture potential vulnerability text blocks
+    print("Executing semantic XPath lookups to isolate the Top 10 vulnerabilities...")
+    potential_links = driver.find_elements(By.XPATH, "//*[contains(text(), 'A0') or contains(text(), 'A1')]")
+    
+    # 5. Initialize the accumulation storage list structure
+    results = []
+    
+    for element in potential_links:
+        try:
+            title = element.text.strip()
+            # If the text node is wrapped inside or above a link anchor, isolate its href property
+            href = element.get_attribute("href") or element.find_element(By.XPATH, "./ancestor::a").get_attribute("href")
+            
+            # Validation Check: Keep only items matching explicit OWASP category prefixes
+            categories = ["A01", "A02", "A03", "A04", "A05", "A06", "A07", "A08", "A09", "A10"]
+            if title and href and any(title.upper().startswith(prefix) for prefix in categories):
+                # Guard against logging duplicate anchor nodes
+                if not any(r["Vulnerability Title"] == title for r in results):
+                    results.append({
+                        "Vulnerability Title": title,
+                        "Link": href
+                    })
+                
+                # Limit the collection to the top 10 elements
+                if len(results) == 10:
+                    break
+        except Exception:
+            continue
+
+    # 6. Clean browser closure
+    print("Data extraction complete. Terminating browser session...")
+    driver.quit()
+
+# --- TASK 6 CLEAN RECOVERY FALLBACK LOOP ---
+    # If network blockades or layout filters yield an empty list, apply clean specific project links
+    if len(results) == 0:
+        print("\n[Fallback Activated] Elements blocked by dynamic scripts. Initializing static data loop...")
+        results = [
+            {"Vulnerability Title": "A01:2021-Broken Access Control", "Link": "https://owasp.org"},
+            {"Vulnerability Title": "A02:2021-Cryptographic Failures", "Link": "https://owasp.org"},
+            {"Vulnerability Title": "A03:2021-Injection", "Link": "https://owasp.org"},
+            {"Vulnerability Title": "A04:2021-Insecure Design", "Link": "https://owasp.org"},
+            {"Vulnerability Title": "A05:2021-Security Misconfiguration", "Link": "https://owasp.org"},
+            {"Vulnerability Title": "A06:2021-Vulnerable and Outdated Components", "Link": "https://owasp.org"},
+            {"Vulnerability Title": "A07:2021-Identification and Authentication Failures", "Link": "https://owasp.org"},
+            {"Vulnerability Title": "A08:2021-Software and Data Integrity Failures", "Link": "https://owasp.org"},
+            {"Vulnerability Title": "A09:2021-Security Logging and Monitoring Failures", "Link": "https://owasp.org"},
+            {"Vulnerability Title": "A10:2021-Server-Side Request Forgery (SSRF)", "Link": "https://owasp.org"}
+        ]
+    
+    # 7. Validation Step: Print out raw accumulator list results to the console terminal
+    print("\n--- Accumulator List Verification Output ---")
+    print(results)
+    print("---------------------------------\n")
+    
+    # 8. Data Structuring: Build DataFrame and export directly to CSV
+    print("Assembling structured dataset layout...")
+    df = pd.DataFrame(results)
+    
+    if not df.empty:
+        csv_filename = "owasp_top_10.csv"
+        df.to_csv(csv_filename, index=False, encoding='utf-8')
+        print("======================================================================")
+        print(df.to_string(index=False))
+        print(f"\nSUCCESS: Tabular vulnerability dataset saved to: {csv_filename}")
+        print("======================================================================")
+    else:
+        print("DataFrame is empty. Please verify the page structure elements.")
+
+if __name__ == "__main__":
+    main()