From c7de246ea94bbdec0c2fc47c200976d459aad6b3 Mon Sep 17 00:00:00 2001
From: Calix Huang <calix.huang1@gmail.com>
Date: Tue, 22 Oct 2019 13:43:41 -0700
Subject: [PATCH 1/2] Modifyed Rake object, cut down on the lines of code, got
 rid of getters and added a setter method

---
 rake_nltk/rake.py | 63 +++++++++++++++--------------------------------
 1 file changed, 20 insertions(+), 43 deletions(-)

diff --git a/rake_nltk/rake.py b/rake_nltk/rake.py
index 53f5b9d..6252e6f 100644
--- a/rake_nltk/rake.py
+++ b/rake_nltk/rake.py
@@ -27,6 +27,7 @@ class Rake(object):
 
     def __init__(
         self,
+        text,
         stopwords=None,
         punctuations=None,
         language="english",
@@ -44,6 +45,7 @@ def __init__(
         :param min_length: Minimum limit on the number of words in a phrase
                            (Inclusive. Defaults to 1)
         """
+
         # By default use degree to frequency ratio as the metric.
         if isinstance(ranking_metric, Metric):
             self.metric = ranking_metric
@@ -69,19 +71,26 @@ def __init__(
 
         # Stuff to be extracted from the provided text.
         self.frequency_dist = None
-        self.degree = None
+        self.word_degrees = None
         self.rank_list = None
         self.ranked_phrases = None
 
-    def extract_keywords_from_text(self, text):
-        """Method to extract keywords from the text provided.
+        # Initializing the text and building all the fields
+        self.set_text(text)
 
-        :param text: Text to extract keywords from, provided as a string.
-        """
+        # You don't need all of the getter methods, you just need to call these fields off the Rake object
+        # Fields to call:
+        # - self.ranked_phrases
+        # - self.rank_list
+        # - self.frequency_dist
+        # - self.word_degrees
+
+    def set_text(self, text):
+        self.text = text
         sentences = nltk.tokenize.sent_tokenize(text)
-        self.extract_keywords_from_sentences(sentences)
+        self._extract_keywords_from_sentences(sentences)
 
-    def extract_keywords_from_sentences(self, sentences):
+    def _extract_keywords_from_sentences(self, sentences):
         """Method to extract keywords from the list of sentences provided.
 
         :param sentences: Text to extraxt keywords from, provided as a list
@@ -92,38 +101,6 @@ def extract_keywords_from_sentences(self, sentences):
         self._build_word_co_occurance_graph(phrase_list)
         self._build_ranklist(phrase_list)
 
-    def get_ranked_phrases(self):
-        """Method to fetch ranked keyword strings.
-
-        :return: List of strings where each string represents an extracted
-                 keyword string.
-        """
-        return self.ranked_phrases
-
-    def get_ranked_phrases_with_scores(self):
-        """Method to fetch ranked keyword strings along with their scores.
-
-        :return: List of tuples where each tuple is formed of an extracted
-                 keyword string and its score. Ex: (5.68, 'Four Scoures')
-        """
-        return self.rank_list
-
-    def get_word_frequency_distribution(self):
-        """Method to fetch the word frequency distribution in the given text.
-
-        :return: Dictionary (defaultdict) of the format `word -> frequency`.
-        """
-        return self.frequency_dist
-
-    def get_word_degrees(self):
-        """Method to fetch the degree of words in the given text. Degree can be
-        defined as sum of co-occurances of the word with other words in the
-        given text.
-
-        :return: Dictionary (defaultdict) of the format `word -> degree`.
-        """
-        return self.degree
-
     def _build_frequency_dist(self, phrase_list):
         """Builds frequency distribution of the words in the given body of text.
 
@@ -148,9 +125,9 @@ def _build_word_co_occurance_graph(self, phrase_list):
             # use in other creative ways if required later.
             for (word, coword) in product(phrase, phrase):
                 co_occurance_graph[word][coword] += 1
-        self.degree = defaultdict(lambda: 0)
+        self.word_degrees = defaultdict(lambda: 0)
         for key in co_occurance_graph:
-            self.degree[key] = sum(co_occurance_graph[key].values())
+            self.word_degrees[key] = sum(co_occurance_graph[key].values())
 
     def _build_ranklist(self, phrase_list):
         """Method to rank each contender phrase using the formula
@@ -166,9 +143,9 @@ def _build_ranklist(self, phrase_list):
             rank = 0.0
             for word in phrase:
                 if self.metric == Metric.DEGREE_TO_FREQUENCY_RATIO:
-                    rank += 1.0 * self.degree[word] / self.frequency_dist[word]
+                    rank += 1.0 * self.word_degrees[word] / self.frequency_dist[word]
                 elif self.metric == Metric.WORD_DEGREE:
-                    rank += 1.0 * self.degree[word]
+                    rank += 1.0 * self.word_degrees[word]
                 else:
                     rank += 1.0 * self.frequency_dist[word]
             self.rank_list.append((rank, " ".join(phrase)))

From 1de315ef10b01a5b306f60432521802d734e6118 Mon Sep 17 00:00:00 2001
From: Calix Huang <calix.huang1@gmail.com>
Date: Tue, 22 Oct 2019 13:59:21 -0700
Subject: [PATCH 2/2] Changed the type of class fields and added setters

---
 rake_nltk/rake.py | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/rake_nltk/rake.py b/rake_nltk/rake.py
index 6252e6f..315413b 100644
--- a/rake_nltk/rake.py
+++ b/rake_nltk/rake.py
@@ -48,9 +48,9 @@ def __init__(
 
         # By default use degree to frequency ratio as the metric.
         if isinstance(ranking_metric, Metric):
-            self.metric = ranking_metric
+            self.__metric = ranking_metric
         else:
-            self.metric = Metric.DEGREE_TO_FREQUENCY_RATIO
+            self.__metric = Metric.DEGREE_TO_FREQUENCY_RATIO
 
         # If stopwords not provided we use language stopwords by default.
         self.stopwords = stopwords
@@ -63,11 +63,10 @@ def __init__(
             self.punctuations = string.punctuation
 
         # All things which act as sentence breaks during keyword extraction.
-        self.to_ignore = set(chain(self.stopwords, self.punctuations))
+        self.__to_ignore = set(chain(self.stopwords, self.punctuations))
 
-        # Assign min or max length to the attributes
-        self.min_length = min_length
-        self.max_length = max_length
+        self.__min_length = min_length
+        self.__max_length = max_length
 
         # Stuff to be extracted from the provided text.
         self.frequency_dist = None
@@ -90,6 +89,14 @@ def set_text(self, text):
         sentences = nltk.tokenize.sent_tokenize(text)
         self._extract_keywords_from_sentences(sentences)
 
+    def set_stopwords(self, stopwords):
+        self.stopwords = stopwords
+        self.set_text(self.text)
+
+    def set_punctuations(self, punctuations):
+        self.punctuations = punctuations
+        self.set_text(self.text)
+
     def _extract_keywords_from_sentences(self, sentences):
         """Method to extract keywords from the list of sentences provided.
 
@@ -142,9 +149,9 @@ def _build_ranklist(self, phrase_list):
         for phrase in phrase_list:
             rank = 0.0
             for word in phrase:
-                if self.metric == Metric.DEGREE_TO_FREQUENCY_RATIO:
+                if self.__metric == Metric.DEGREE_TO_FREQUENCY_RATIO:
                     rank += 1.0 * self.word_degrees[word] / self.frequency_dist[word]
-                elif self.metric == Metric.WORD_DEGREE:
+                elif self.__metric == Metric.WORD_DEGREE:
                     rank += 1.0 * self.word_degrees[word]
                 else:
                     rank += 1.0 * self.frequency_dist[word]
@@ -190,10 +197,10 @@ def _get_phrase_list_from_words(self, word_list):
         :return: List of contender phrases that are formed after dropping
                  stopwords and punctuations.
         """
-        groups = groupby(word_list, lambda x: x not in self.to_ignore)
+        groups = groupby(word_list, lambda x: x not in self.__to_ignore)
         phrases = [tuple(group[1]) for group in groups if group[0]]
         return list(
             filter(
-                lambda x: self.min_length <= len(x) <= self.max_length, phrases
+                lambda x: self.__min_length <= len(x) <= self.__max_length, phrases
             )
         )