From 8a331f27c53bd87627fb25c692aefda1642345ff Mon Sep 17 00:00:00 2001
From: fisa712 <101712610+fisa712@users.noreply.github.com>
Date: Fri, 24 Feb 2023 17:19:09 +0500
Subject: [PATCH 1/5] Create preprocessing.ipynb

---
 ch08/additional_features/preprocessing.ipynb | 28 ++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 ch08/additional_features/preprocessing.ipynb

diff --git a/ch08/additional_features/preprocessing.ipynb b/ch08/additional_features/preprocessing.ipynb
new file mode 100644
index 00000000..c3f2576e
--- /dev/null
+++ b/ch08/additional_features/preprocessing.ipynb
@@ -0,0 +1,28 @@
+import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+from nltk.stem import WordNetLemmatizer, PorterStemmer
+
+def remove_stopwords(text):
+    stop_words = set(stopwords.words('english'))
+    word_tokens = word_tokenize(text)
+    filtered_text = [word for word in word_tokens if word.lower() not in stop_words]
+    return ' '.join(filtered_text)
+
+def perform_lemmatization(text):
+    lemmatizer = WordNetLemmatizer()
+    word_tokens = word_tokenize(text)
+    lemmatized_text = [lemmatizer.lemmatize(word) for word in word_tokens]
+    return ' '.join(lemmatized_text)
+
+def perform_stemming(text):
+    stemmer = PorterStemmer()
+    word_tokens = word_tokenize(text)
+    stemmed_text = [stemmer.stem(word) for word in word_tokens]
+    return ' '.join(stemmed_text)
+
+def preprocess_text(text):
+    text = remove_stopwords(text)
+    text = perform_lemmatization(text)
+    text = perform_stemming(text)
+    return text

From 8968c20945eadc41687fa40a31fda0da562fd0d0 Mon Sep 17 00:00:00 2001
From: fisa712 <101712610+fisa712@users.noreply.github.com>
Date: Fri, 24 Feb 2023 17:20:39 +0500
Subject: [PATCH 2/5] Create README.md

---
 ch08/additional_features/README.md | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 ch08/additional_features/README.md

diff --git a/ch08/additional_features/README.md b/ch08/additional_features/README.md
new file mode 100644
index 00000000..ea70841a
--- /dev/null
+++ b/ch08/additional_features/README.md
@@ -0,0 +1,22 @@
+Task: Add more data preprocessing steps
+In this task, we will explore the impact of adding more data preprocessing steps on the accuracy and generalization of our sentiment analysis model. Specifically, we will add stemming, lemmatization, and/or stop-word removal to the existing data preprocessing steps.
+
+Files and Folders
+sentiment_analysis.py: This is the main script that performs sentiment analysis on a given input text.
+
+preprocessing.py: This script contains the existing data preprocessing steps. You will modify this script to add more preprocessing steps.
+
+data: This folder contains the training and test data.
+
+Instructions
+Clone the repository and create a new branch for this task.
+
+Open the preprocessing.py script and add more data preprocessing steps such as stemming, lemmatization, or stop-word removal. You can use any NLP library such as NLTK or spaCy to implement these preprocessing steps.
+
+Train the model using the modified data preprocessing steps and evaluate its accuracy and generalization using the test data.
+
+Update the README file with the results of the evaluation and a description of the added preprocessing steps.
+
+Push the changes to the branch and create a pull request.
+
+Wait for the reviewer to approve the pull request and merge it with the main branch.

From 47b11b39d9dac8cbe2e3fd3660894062e1b87509 Mon Sep 17 00:00:00 2001
From: fisa712 <101712610+fisa712@users.noreply.github.com>
Date: Fri, 24 Feb 2023 18:05:43 +0500
Subject: [PATCH 3/5] Create test_preprocessing.py

---
 .../additional_features/test_preprocessing.py | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 ch08/additional_features/test_preprocessing.py

diff --git a/ch08/additional_features/test_preprocessing.py b/ch08/additional_features/test_preprocessing.py
new file mode 100644
index 00000000..bfaf844c
--- /dev/null
+++ b/ch08/additional_features/test_preprocessing.py
@@ -0,0 +1,27 @@
+import unittest
+from preprocessing import *
+
+class TestPreprocessing(unittest.TestCase):
+    
+    def test_remove_stopwords(self):
+        text = "this is a sample text that includes some stop words such as the, and, etc."
+        expected_output = "sample text includes stop words like , , etc ."
+        self.assertEqual(remove_stopwords(text), expected_output)
+    
+    def test_perform_lemmatization(self):
+        text = "running played plays"
+        expected_output = "running played play"
+        self.assertEqual(perform_lemmatization(text), expected_output)
+    
+    def test_perform_stemming(self):
+        text = "running played plays"
+        expected_output = "run play play"
+        self.assertEqual(perform_stemming(text), expected_output)
+    
+    def test_preprocess_text(self):
+        text = "This is a sample text. It includes some stop words, and it has words in different tenses (e.g. playing, played)."
+        expected_output = "thi sampl text . includ stop word , word differ tens ( e.g. play , play ) ."
+        self.assertEqual(preprocess_text(text), expected_output)
+
+if __name__ == '__main__':
+    unittest.main()

From c9e7ae4f88f99f2be87f0964c08ea90a68be733a Mon Sep 17 00:00:00 2001
From: fisa712 <101712610+fisa712@users.noreply.github.com>
Date: Fri, 24 Feb 2023 18:14:20 +0500
Subject: [PATCH 4/5] Create test_preprocessing.py

---
 .../.github/workflows/test_preprocessing.py   | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 ch08/additional_features/.github/workflows/test_preprocessing.py

diff --git a/ch08/additional_features/.github/workflows/test_preprocessing.py b/ch08/additional_features/.github/workflows/test_preprocessing.py
new file mode 100644
index 00000000..bfaf844c
--- /dev/null
+++ b/ch08/additional_features/.github/workflows/test_preprocessing.py
@@ -0,0 +1,27 @@
+import unittest
+from preprocessing import *
+
+class TestPreprocessing(unittest.TestCase):
+    
+    def test_remove_stopwords(self):
+        text = "this is a sample text that includes some stop words such as the, and, etc."
+        expected_output = "sample text includes stop words like , , etc ."
+        self.assertEqual(remove_stopwords(text), expected_output)
+    
+    def test_perform_lemmatization(self):
+        text = "running played plays"
+        expected_output = "running played play"
+        self.assertEqual(perform_lemmatization(text), expected_output)
+    
+    def test_perform_stemming(self):
+        text = "running played plays"
+        expected_output = "run play play"
+        self.assertEqual(perform_stemming(text), expected_output)
+    
+    def test_preprocess_text(self):
+        text = "This is a sample text. It includes some stop words, and it has words in different tenses (e.g. playing, played)."
+        expected_output = "thi sampl text . includ stop word , word differ tens ( e.g. play , play ) ."
+        self.assertEqual(preprocess_text(text), expected_output)
+
+if __name__ == '__main__':
+    unittest.main()

From 3f3180b9723aae8c4f5c69a546971801002b8ff5 Mon Sep 17 00:00:00 2001
From: fisa712 <101712610+fisa712@users.noreply.github.com>
Date: Fri, 24 Feb 2023 18:17:19 +0500
Subject: [PATCH 5/5] Update and rename test_preprocessing.py to
 i191855_update.yml

---
 .../.github/workflows/i191855_update.yml      | 17 ++++++++++++
 .../.github/workflows/test_preprocessing.py   | 27 -------------------
 2 files changed, 17 insertions(+), 27 deletions(-)
 create mode 100644 ch08/additional_features/.github/workflows/i191855_update.yml
 delete mode 100644 ch08/additional_features/.github/workflows/test_preprocessing.py

diff --git a/ch08/additional_features/.github/workflows/i191855_update.yml b/ch08/additional_features/.github/workflows/i191855_update.yml
new file mode 100644
index 00000000..153592f9
--- /dev/null
+++ b/ch08/additional_features/.github/workflows/i191855_update.yml
@@ -0,0 +1,17 @@
+name : push events workflow
+
+on : push 
+
+jobs :
+  unit-testing :
+    runs-on: ubuntu-latest
+    
+    steps :
+      - name : Checkout Code
+        uses : actions/checkout@v2
+        
+      - name : Install Package
+        run  : pip install pytest numpy pandas
+        
+      - name : Run Test
+        run  : pytest test_preprocessing.py 
diff --git a/ch08/additional_features/.github/workflows/test_preprocessing.py b/ch08/additional_features/.github/workflows/test_preprocessing.py
deleted file mode 100644
index bfaf844c..00000000
--- a/ch08/additional_features/.github/workflows/test_preprocessing.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import unittest
-from preprocessing import *
-
-class TestPreprocessing(unittest.TestCase):
-    
-    def test_remove_stopwords(self):
-        text = "this is a sample text that includes some stop words such as the, and, etc."
-        expected_output = "sample text includes stop words like , , etc ."
-        self.assertEqual(remove_stopwords(text), expected_output)
-    
-    def test_perform_lemmatization(self):
-        text = "running played plays"
-        expected_output = "running played play"
-        self.assertEqual(perform_lemmatization(text), expected_output)
-    
-    def test_perform_stemming(self):
-        text = "running played plays"
-        expected_output = "run play play"
-        self.assertEqual(perform_stemming(text), expected_output)
-    
-    def test_preprocess_text(self):
-        text = "This is a sample text. It includes some stop words, and it has words in different tenses (e.g. playing, played)."
-        expected_output = "thi sampl text . includ stop word , word differ tens ( e.g. play , play ) ."
-        self.assertEqual(preprocess_text(text), expected_output)
-
-if __name__ == '__main__':
-    unittest.main()