germxz · germxz · May 8, 2026 · Jun 4, 2026 · Jun 4, 2026
diff --git a/assignment4/additional_employees.json b/assignment4/additional_employees.json
@@ -0,0 +1,14 @@
+[
+  {
+    "Name": "Eve",
+    "Age": 28,
+    "City": "Miami",
+    "Salary": 60000
+  },
+  {
+    "Name": "Frank",
+    "Age": 40,
+    "City": "Seattle",
+    "Salary": 95000
+  }
+]
diff --git a/assignment4/assignment4.py b/assignment4/assignment4.py
@@ -0,0 +1,64 @@
+import pandas as pd
+
+# Task 1
+data = {
+    "Name": ['Alice', 'Bob', 'Charlie'],
+    "Age": [25, 30, 35],
+    "City": ['New York', 'Los Angeles', 'Chicago']
+}
+
+task1_data_frame = pd.DataFrame(data)
+print(task1_data_frame)
+
+task1_with_salary = task1_data_frame.copy()
+task1_with_salary['Salary'] = [70000, 80000, 90000]
+print(task1_with_salary)
+
+task1_older = task1_with_salary.copy()
+task1_older["Age"] = task1_older["Age"] + 1
+print(task1_older)
+
+task1_older.to_csv("employees.csv", index=False)
+
+df = pd.read_csv("employees.csv")
+print(df)
+
+# Task 2
+task2_employees = pd.read_csv("employees.csv")
+print(task2_employees)
+json_employees = pd.read_json("additional_employees.json")
+print(json_employees)
+more_employees = pd.concat([task2_employees, json_employees], ignore_index=True)
+print(more_employees)
+
+first_three = more_employees.head(3)
+print(first_three)
+
+last_two = more_employees.tail(2)
+print(last_two)
+
+employee_shape = more_employees.shape
+print(employee_shape)
+
+more_employees.info()
+
+# Task 4
+dirty_data = pd.read_csv("dirty_data.csv")
+
+clean_data = dirty_data.copy()
+clean_data = clean_data.drop_duplicates()
+
+clean_data["Age"] = pd.to_numeric(clean_data["Age"], errors="coerce")
+clean_data["Salary"] = pd.to_numeric(clean_data["Salary"], errors="coerce")
+
+clean_data["Age"] = clean_data["Age"].fillna(clean_data["Age"].mean())
+clean_data["Salary"] = clean_data["Salary"].fillna(clean_data["Salary"].median())
+
+clean_data["Hire Date"] = pd.to_datetime(clean_data["Hire Date"], errors="coerce")
+clean_data["Hire Date"] = clean_data["Hire Date"].fillna(pd.Timestamp("2000-01-01"))
+print(clean_data)
+
+clean_data["Name"] = clean_data["Name"].str.strip().str.upper()
+clean_data["Department"] = clean_data["Department"].str.strip().str.upper()
+
+print(clean_data)
diff --git a/assignment4/employees.csv b/assignment4/employees.csv
@@ -0,0 +1,4 @@
+Name,Age,City,Salary
+Alice,26,New York,70000
+Bob,31,Los Angeles,80000
+Charlie,36,Chicago,90000
diff --git a/assignment8/challenges.txt b/assignment8/challenges.txt
@@ -0,0 +1,4 @@
+Challenges faced and how I resolved them:
+- I needed to find the correct page structure for the OWASP Top Ten list.
+- The landing page URL did not show the top 10 items directly, so I followed the OWASP Top Ten 2025 link and extracted the actual list there.
+- I kept the code simple with short comments so it is easy to read and understand.
diff --git a/assignment8/ethical_scraping.txt b/assignment8/ethical_scraping.txt
@@ -0,0 +1,44 @@
+Question 1: Which sections of the website are restricted for crawling?
+
+The following URL paths are disallowed in Wikipedia's robots.txt:
+
+Disallow: /wiki/MediaWiki:Spam-blacklist
+Disallow: /wiki/MediaWiki_talk:Spam-blacklist
+Disallow: /wiki/Wikipedia:WikiProject_Spam
+Disallow: /wiki/Wikipedia_talk:WikiProject_Spam
+Disallow: /wiki/Wikipedia:Articles_for_deletion
+Disallow: /wiki/Wikipedia:Article_Incubator
+Disallow: /wiki/Wikipedia_talk:Article_Incubator
+Disallow: /wiki/Category:Noindexed_pages
+Disallow: /wiki/Module:Sandbox
+Disallow: /wiki/Template:TemplateStyles_sandbox
+Disallow: /wiki/Wikipedia:Administrator_recall
+Disallow: /wiki/Wikipedia_talk:Administrator_recall
+Disallow: /wiki/Wikipedia:Administrator_elections
+Disallow: /wiki/Wikipedia_talk:Administrator_elections
+Disallow: /wiki/Wikipedia:Requests_for_comment/
+
+Question 2: Are there specific rules for certain user agents?
+
+Yes. Wikipedia's robots.txt has rules for specific user agents.
+Certain bots are completely blocked with Disallow: / including:
+- sitecheck.internetseer.com
+- Zealbot
+- MSIECrawler
+- SiteSnagger
+- WebStripper
+- WebCopier
+- HTTrack
+- larbin
+- libwww
+- fast (blocked for requesting too fast)
+- wget (warned about recursive mode)
+
+Question 3: Why do websites use robots.txt?
+
+Websites use robots.txt to communicate crawling policies to web robots 
+and scrapers. It helps protect server resources by preventing bots from 
+making too many requests too quickly, which could cause a denial-of-service 
+effect. It also allows sites to keep certain pages like spam lists, 
+deletion discussions, and sandbox pages out of search engine indexes, 
+protecting both the site's integrity and its users' privacy.
diff --git a/assignment8/get_books.csv b/assignment8/get_books.csv
@@ -0,0 +1,21 @@
+Title,Author,Format-Year
+Spanish Romance Stories for Language Learning,"Rowett, Mary","eAudiobook, 2026"
+Learning Spanish-beginner I,"Iris Acevedo A.; Spanishonline, Costarica","eBook, 2025 — Spanish"
+Real-World Spanish: The Conversation Learning System,Camila Vega Rivera,"eAudiobook, 2025"
+100 Facts About Learning Spanish,Science-Based Language Learning Lab,"eAudiobook, 2024"
+A Beginner's Guide to Learning Spanish,"Miller, Jackson","eAudiobook, 2024"
+No Tears Spanish Grammar: Easy Learning: Essential Rules for Beginners,"Bennett, Olivia","eBook, 2024"
+100 Facts About Learning Spanish,Science-Based Language Learning Lab,"eBook, 2024"
+The Ultimate Learning Spanish Blueprint - 10 Essential Steps,"Ramirez, Andres","eAudiobook, 2024"
+Learning Spanish for Adults Beginner,"World, Spain","eBook, 2023"
+Learning to Read in English and Spanish Made Easy,"Navarijo, Susie G.","eBook, 2022"
+Spanish for Beginners: A Comprehensive Guide for Learning the Spanish Language Fast,Language Equipped Travelers,"eBook, 2021"
+"Spanish: Beginner's Step by Step Course to Quickly Learning the Spanish Language, Spanish Grammar &","Michaels, Steven J.","eBook, 2021"
+Learn Spanish Like a Native for Beginners - Level 1: Learning Spanish in Your Car Has Never Been Eas,Learn Like a Native,"eBook, 2021"
+Learn Spanish Like a Native for Beginners - Level 2: Learning Spanish in Your Car Has Never Been Eas,Learn Like a Native,"eBook, 2021"
+I'm Learning Spanish,"Gardner, James M.","eAudiobook, 2020 — Chinese"
+I am learning Spanish,"Gardner, James M.","eAudiobook, 2018"
+The Best Spanish Learning Games for Children,"Professor, Baby","eBook, 2017"
+Easy Learning Spanish Vocabulary,"Dictionaries, Collins","eBook, 2016 — Spanish"
+Spanish Easy Learning Complete Course,"Carmen García del Río; Fitzsimons, Ronan","eAudiobook, 2016"
+Learning the Local Language: Your Guide to Real World Spanish,"Romey, Jared","eBook, 2013"
diff --git a/assignment8/get_books.json b/assignment8/get_books.json
@@ -0,0 +1,102 @@
+[
+  {
+    "Title": "Spanish Romance Stories for Language Learning",
+    "Author": "Rowett, Mary",
+    "Format-Year": "eAudiobook, 2026"
+  },
+  {
+    "Title": "Learning Spanish-beginner I",
+    "Author": "Iris Acevedo A.; Spanishonline, Costarica",
+    "Format-Year": "eBook, 2025 — Spanish"
+  },
+  {
+    "Title": "Real-World Spanish: The Conversation Learning System",
+    "Author": "Camila Vega Rivera",
+    "Format-Year": "eAudiobook, 2025"
+  },
+  {
+    "Title": "100 Facts About Learning Spanish",
+    "Author": "Science-Based Language Learning Lab",
+    "Format-Year": "eAudiobook, 2024"
+  },
+  {
+    "Title": "A Beginner's Guide to Learning Spanish",
+    "Author": "Miller, Jackson",
+    "Format-Year": "eAudiobook, 2024"
+  },
+  {
+    "Title": "No Tears Spanish Grammar: Easy Learning: Essential Rules for Beginners",
+    "Author": "Bennett, Olivia",
+    "Format-Year": "eBook, 2024"
+  },
+  {
+    "Title": "100 Facts About Learning Spanish",
+    "Author": "Science-Based Language Learning Lab",
+    "Format-Year": "eBook, 2024"
+  },
+  {
+    "Title": "The Ultimate Learning Spanish Blueprint - 10 Essential Steps",
+    "Author": "Ramirez, Andres",
+    "Format-Year": "eAudiobook, 2024"
+  },
+  {
+    "Title": "Learning Spanish for Adults Beginner",
+    "Author": "World, Spain",
+    "Format-Year": "eBook, 2023"
+  },
+  {
+    "Title": "Learning to Read in English and Spanish Made Easy",
+    "Author": "Navarijo, Susie G.",
+    "Format-Year": "eBook, 2022"
+  },
+  {
+    "Title": "Spanish for Beginners: A Comprehensive Guide for Learning the Spanish Language Fast",
+    "Author": "Language Equipped Travelers",
+    "Format-Year": "eBook, 2021"
+  },
+  {
+    "Title": "Spanish: Beginner's Step by Step Course to Quickly Learning the Spanish Language, Spanish Grammar &",
+    "Author": "Michaels, Steven J.",
+    "Format-Year": "eBook, 2021"
+  },
+  {
+    "Title": "Learn Spanish Like a Native for Beginners - Level 1: Learning Spanish in Your Car Has Never Been Eas",
+    "Author": "Learn Like a Native",
+    "Format-Year": "eBook, 2021"
+  },
+  {
+    "Title": "Learn Spanish Like a Native for Beginners - Level 2: Learning Spanish in Your Car Has Never Been Eas",
+    "Author": "Learn Like a Native",
+    "Format-Year": "eBook, 2021"
+  },
+  {
+    "Title": "I'm Learning Spanish",
+    "Author": "Gardner, James M.",
+    "Format-Year": "eAudiobook, 2020 — Chinese"
+  },
+  {
+    "Title": "I am learning Spanish",
+    "Author": "Gardner, James M.",
+    "Format-Year": "eAudiobook, 2018"
+  },
+  {
+    "Title": "The Best Spanish Learning Games for Children",
+    "Author": "Professor, Baby",
+    "Format-Year": "eBook, 2017"
+  },
+  {
+    "Title": "Easy Learning Spanish Vocabulary",
+    "Author": "Dictionaries, Collins",
+    "Format-Year": "eBook, 2016 — Spanish"
+  },
+  {
+    "Title": "Spanish Easy Learning Complete Course",
+    "Author": "Carmen García del Río; Fitzsimons, Ronan",
+    "Format-Year": "eAudiobook, 2016"
+  },
+  {
+    "Title": "Learning the Local Language: Your Guide to Real World Spanish",
+    "Author": "Romey, Jared",
+    "Format-Year": "eBook, 2013"
+  }
+]
diff --git a/assignment8/get_books.py b/assignment8/get_books.py
@@ -0,0 +1,60 @@
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from webdriver_manager.chrome import ChromeDriverManager
+import pandas as pd
+import json
+
+# Task 3: Load the Durham County search page
+# Step 1: Load the web page
+url = "https://durhamcounty.bibliocommons.com/v2/search?query=learning%20spanish&searchType=smart"
+driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
+driver.get(url)
+
+# Step 2: Wait until result items appear on the page
+wait = WebDriverWait(driver, 15)
+wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "li.cp-search-result-item")))
+
+# Step 3: Find all the li elements
+books = driver.find_elements(By.CSS_SELECTOR, "li.cp-search-result-item")
+
+# Step 4: Create an empty list to store results
+results = []
+
+# Step 5: Main loop - extract data from each book
+for book in books:
+    # Extract the title
+    title = book.find_element(By.CSS_SELECTOR, ".title-content").text
+
+    # Extract all author elements and join with semicolons
+    authors = book.find_elements(By.CSS_SELECTOR, ".author-link")
+    author_text = "; ".join([author.text for author in authors])
+
+    # Extract the format and year
+    format_year = ""
+    format_elements = book.find_elements(By.CSS_SELECTOR, ".manifestation-item-format-info-wrap span")
+    if format_elements:
+        format_year = format_elements[0].text
+
+    # Create a dict and add it to results list
+    results.append({
+        "Title": title,
+        "Author": author_text,
+        "Format-Year": format_year,
+    })
+
+# Task 4: Prepare the DataFrame and save files
+# Step 1: Create a DataFrame from the results list
+df = pd.DataFrame(results)
+print(df)
+
+# Step 2: Write the DataFrame to get_books.csv
+df.to_csv("get_books.csv", index=False)
+
+# Step 3: Write the results list to get_books.json
+with open("get_books.json", "w", encoding="utf-8") as f:
+    json.dump(results, f, indent=2, ensure_ascii=False)
+
+driver.quit()
diff --git a/assignment8/owasp_top_10.csv b/assignment8/owasp_top_10.csv
@@ -0,0 +1,11 @@
+Title,Link
+A01:2025 - Broken Access Control,https://owasp.org/Top10/2025/A01_2025-Broken_Access_Control/
+A02:2025 - Security Misconfiguration,https://owasp.org/Top10/2025/A02_2025-Security_Misconfiguration/
+A03:2025 - Software Supply Chain Failures,https://owasp.org/Top10/2025/A03_2025-Software_Supply_Chain_Failures/
+A04:2025 - Cryptographic Failures,https://owasp.org/Top10/2025/A04_2025-Cryptographic_Failures/
+A05:2025 - Injection,https://owasp.org/Top10/2025/A05_2025-Injection/
+A06:2025 - Insecure Design,https://owasp.org/Top10/2025/A06_2025-Insecure_Design/
+A07:2025 - Authentication Failures,https://owasp.org/Top10/2025/A07_2025-Authentication_Failures/
+A08:2025 - Software or Data Integrity Failures,https://owasp.org/Top10/2025/A08_2025-Software_or_Data_Integrity_Failures/
+A09:2025 - Security Logging and Alerting Failures,https://owasp.org/Top10/2025/A09_2025-Security_Logging_and_Alerting_Failures/
+A10:2025 - Mishandling of Exceptional Conditions,https://owasp.org/Top10/2025/A10_2025-Mishandling_of_Exceptional_Conditions/
diff --git a/assignment8/owasp_top_10.py b/assignment8/owasp_top_10.py
@@ -0,0 +1,55 @@
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from webdriver_manager.chrome import ChromeDriverManager
+import pandas as pd
+
+# Task 6: Extract OWASP Top 10 vulnerabilities
+# Step 1: Load the OWASP project page
+base_url = "https://owasp.org/www-project-top-ten/"
+driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
+driver.get(base_url)
+
+wait = WebDriverWait(driver, 15)
+
+# Step 2: Navigate to the OWASP Top Ten 2025 page
+link_2025 = wait.until(
+    EC.presence_of_element_located((By.LINK_TEXT, "OWASP Top Ten 2025"))
+)
+page_2025 = link_2025.get_attribute("href")
+driver.get(page_2025)
+wait.until(EC.presence_of_element_located((By.TAG_NAME, "main")))
+
+# Step 3: Find the top 10 vulnerability links using XPath
+xpath_expr = (
+    "//main//a[starts-with(normalize-space(.), 'A01:2025') or "
+    "starts-with(normalize-space(.), 'A02:2025') or "
+    "starts-with(normalize-space(.), 'A03:2025') or "
+    "starts-with(normalize-space(.), 'A04:2025') or "
+    "starts-with(normalize-space(.), 'A05:2025') or "
+    "starts-with(normalize-space(.), 'A06:2025') or "
+    "starts-with(normalize-space(.), 'A07:2025') or "
+    "starts-with(normalize-space(.), 'A08:2025') or "
+    "starts-with(normalize-space(.), 'A09:2025') or "
+    "starts-with(normalize-space(.), 'A10:2025')]"
+)
+items = driver.find_elements(By.XPATH, xpath_expr)
+
+# Step 4: Extract title and link for each vulnerability
+results = []
+for item in items:
+    title = item.text.strip()
+    href = item.get_attribute("href")
+    results.append({"Title": title, "Link": href})
+
+# Step 5: Save results to CSV and print them
+if results:
+    df = pd.DataFrame(results)
+    print(df)
+    df.to_csv("owasp_top_10.csv", index=False)
+else:
+    print("No OWASP top 10 items found.")
+
+driver.quit()