diff --git a/.travis.yml b/.travis.yml index 0a3637f..9556c32 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,7 @@ language: python python: - "2.7" - "3.5" + install: - pip install -e .[test] diff --git a/README.md b/README.md index f7cc5cc..7e54ac1 100755 --- a/README.md +++ b/README.md @@ -1,6 +1,11 @@ [![Build Status](https://travis-ci.org/julienawilson/data-structures.svg?branch=master)](https://travis-ci.org/julienawilson/data-structures) # Data Structures +======= +[![Build Status](https://travis-ci.org/julienawilson/data-structures.svg?branch=bst)](https://travis-ci.org/julienawilson/data-structures) + +# data-structures +>>>>>>> 2f8af436a8271e1a77876030b065993c23274041 Patrick Saunders and Julien Wilson
Data Structures created in Python401 @@ -26,7 +31,7 @@ A linked list that points in both directions A tree of nodes sorted by values less than and greater than root branching to the left and right, respectively. Methods include: -* insert(self, val): Insert value into tree; if value already exists, ignore it. +* insert(self, val): Insert value into tree; if value already exists, ignore it. Method autobalances after insertion, and tree size increments by one. * search(self, val): Return node containing that value, else None. * size(self): Return number of nodes/vertices in tree, 0 if empty. * depth(self): Return number of levels in tree. Tree with one value has depth of 0. @@ -35,3 +40,21 @@ Methods include: Trees that are higher on the left than the right should return a positive value; trees that are higher on the right than the left should return a negative value; an ideally-balanced tree should return 0. +* in_order(self): Return a generator that returns each node value from in-order traversal. +* pre_order(self): Return a generator that returns each node value from pre-order traversal. +* post_order(self): Return a generator that returns each node value from post_order traversal. +* breadth_first(self): Return a generator returns each node value from breadth-first traversal. +* delete(value): Delete a node's connections (edges), effectively deleting node. Method autobalances after deletion, and tree size decrements by one. + +##Hash Table +Stores key-value pairs using a given hashing algorithm. Choices for hashing algorithms are additive hash and xor hash. +Additive hash sums the Unicode code point for each letter in the word or string, then calls modulo with the number of buckets in the table. +XOR hash runs exclusive or with the letters of the word or string. + +Methods include: +* set(key, value): Add a key-value pair to the hash table. calls a hash function, but is otherwise O(k) where k is the number of items in the bucket. +* get(key): Retrieve a value for the given key. Add a key-value pair to the hash table. calls a hash function, but is otherwise O(k) where k is the number of items in the bucket. +* _hash(hash_alg): Decide which hash algorithm to call. O(1) +* _additive_hash(word): return a hash using an additive method; O(n) where n=length of word +* _xor_hash(word): return a hash using an additive method; O(n) where n=length of word +* diff --git a/src/bst.py b/src/bst.py index 1e2f3cc..c87e934 100644 --- a/src/bst.py +++ b/src/bst.py @@ -1,15 +1,23 @@ """Classes for binary search tree. Methods include: -insert(self, val): Insert value into tree; if value already exists, ignore it. -search(self, val): Return node containing that value, else None. -size(self): Return number of nodes/vertices in tree, 0 if empty. -depth(self): Return number of levels in tree. Tree with one value has depth of 0. -contains(self, val): Return True if value is in tree, False if not. -balance(self): Return a positive or negative integer representing tree's balance. +insert(val): Insert value into tree; if value already exists, ignore it. + Method autobalances after insertion, and tree size increments by one. +search(val): Return node containing that value, else None. +size(): Return number of nodes/vertices in tree, 0 if empty. +depth(): Return number of levels in tree. Tree with one value has depth of 0. +contains(val): Return True if value is in tree, False if not. +balance(): Return a positive or negative integer representing tree's balance. Trees that are higher on the left than the right should return a positive value; trees that are higher on the right than the left should return a negative value; an ideally-balanced tree should return 0. +in_order(): Return a generator that returns each node value from in-order traversal. +pre_order(): Return a generator that returns each node value from pre-order traversal. +post_order(): Return a generator that returns each node value from post_order traversal. +breadth_first(): Return a generator returns each node value from breadth-first traversal. +delete(value): Delete a node's connections (edges), effectively deleting node. + Method autobalances after deletion, and tree size decrements by one. + """ from queue import Queue @@ -18,11 +26,12 @@ class Node(): """Node object for the binary search tree.""" - def __init__(self, value, left=None, right=None): + def __init__(self, value, left=None, right=None, parent=None): """Instantiate a node object.""" self.value = value self.left = left self.right = right + self.parent = parent class BinarySearchTree(): @@ -36,7 +45,7 @@ def __init__(self): self._size = 0 self.root = None - def insert(self, value): + def insert(self, value, autobalance=True): """Insert a value in to the binary search tree.""" if self.root is None: self.root = Node(value) @@ -49,6 +58,7 @@ def insert(self, value): current_node = current_node.left else: current_node.left = Node(value) + current_node.left.parent = current_node self._size += 1 break elif value > current_node.value: @@ -56,10 +66,13 @@ def insert(self, value): current_node = current_node.right else: current_node.right = Node(value) + current_node.right.parent = current_node self._size += 1 break else: break + if autobalance: + self._autobalance() def search(self, value): """Search the Binary Search Tree for a value, return node or none.""" @@ -123,19 +136,96 @@ def contains(self, value): else: return False - def balance(self): - """Return numerical representation of how balanced the tree is.""" - if self.root.left: - depth_left = self.depth(self.root.left) + def balance(self, node='root'): + """Return numerical representation of how balanced the tree (or branches) is.""" + if node is None: + return 0 + if node == 'root': + node = self.root + if node.left: + depth_left = self.depth(node.left) + 1 else: depth_left = 0 - if self.root.right: - depth_right = self.depth(self.root.right) + if node.right: + depth_right = self.depth(node.right) + 1 else: depth_right = 0 balance = depth_right - depth_left return balance + def _autobalance(self, node=None): + """Make sure tree rebalances itself.""" + if node is None: + node = self.root + nodes = self.post_order() + while True: + try: + this_node = next(nodes) + if abs(self.balance(this_node)) > 1: + self._rebalance(this_node) + except StopIteration: + break + + # pass + + def _rebalance(self, node): + """Balance the given node.""" + if self.balance(node) > 1: + if self.balance(node.right) >= 1: + self._rotate_left(node) + else: + self._rotate_right(node.right) + self._rotate_left(node) + elif self.balance(node) < -1: + if self.balance(node.left) <= -1: + self._rotate_right(node) + else: + self._rotate_left(node.left) + self._rotate_right(node) + + def _rotate_right(self, node, holder_node=None): + """Helper function to shift nodes clockwise.""" + if node is None: + return + try: + if node.left.right: + holder_node = node.left.right + except AttributeError: + pass + if node.left: + node.left.parent = node.parent + node.left.right = node + if node.parent: + node.parent.left = node.left + node.parent = node.left + node.left = holder_node + if holder_node: + node.left.parent = node + if node == self.root: + self.root = node.parent + + def _rotate_left(self, node, holder_node=None): + """Helper function to shift nodes counterclockwise.""" + if node is None: + return + try: + if node.right.left: + holder_node = node.right.left + except AttributeError: + pass + + if node.right: + node.right.parent = node.parent + node.right.left = node + if node.parent: + node.parent.right = node.right + node.parent = node.right + node.right = holder_node + if holder_node: + node.right.parent = node + if node == self.root: + self.root = node.parent + def in_order(self): """Return generator that returns tree values one at a time using in-order traversal.""" stack = [] @@ -191,3 +281,43 @@ def breadth_first(self): if current_node.right: trav_list.enqueue(current_node.right) yield current_node + + def delete(self, value, autobalance=True): + """Get rid of a node. Or at least its connection.""" + target_node = self.search(value) + if not target_node: + return + if not (target_node.left or target_node.right): + if target_node.value > target_node.parent.value: + target_node.parent.right = None + else: + target_node.parent.left = None + elif not (target_node.left and target_node.right): + if target_node.left: + target_node.left.parent = target_node.parent + target_node.parent.left = target_node.left + else: + target_node.right.parent = target_node.parent + target_node.parent.right = target_node.right + else: + current_node = target_node.right + while current_node.left: + current_node = current_node.left + replace_node = current_node + self.delete(current_node.value) + self._size += 1 # undoes size change within delete + if target_node.parent: + replace_node.parent = target_node.parent + if replace_node.value < target_node.value: + target_node.parent.left = replace_node + else: + target_node.parent.right = replace_node + replace_node.left = target_node.left + replace_node.right = target_node.right + target_node.parent = None + target_node.left = None + target_node.right = None + self._size -= 1 + if autobalance: + self._autobalance() + diff --git a/src/hash_table.py b/src/hash_table.py new file mode 100644 index 0000000..29bbdd5 --- /dev/null +++ b/src/hash_table.py @@ -0,0 +1,62 @@ +"""Class for hash tables. + +Choices for hashing algorithms are additive hash and xor hash. +Additive hash sums the Unicode code point for each letter in the word or string, +then calls modulo with the number of buckets in the table. +XOR hash runs exclusive or with the letters of the word or string. +Methods include: +set(key, value): Add a key-value pair to the hash table. +get(key): Retrieve a value for the given key. + +""" + + + +class HashTable(object): + """Something something.""" + + def __init__(self, size, hash_alg='additive'): + """Initialize a hash table.""" + self._size = size + self.buckets = [[] for bucket in range(self._size)] + self._hash_alg = self._hash(hash_alg) + + def _hash(self, hash_alg): + if hash_alg == 'additive': + return self._additive_hash + if hash_alg == 'xor': + return self._xor_hash + else: + raise ValueError("Please enter a valid hash algorithm. The options are 'additive' and 'xor'.") + + def _additive_hash(self, word): + """Return Additive hash value.""" + return sum([ord(char) for char in list(word)]) % self._size + + def _xor_hash(self, word): + """Return a xor hash.""" + hash_val = 0 + for i in range(len(word)): + hash_val ^= ord(word[i]) + return hash_val + + def set(self, key, value): + """Set a new key-value pair in the hash table.""" + if type(key) is not str: + raise TypeError("Key for hash table must be a string.") + hash_val = self._hash_alg(key) + for pair in self.buckets[hash_val]: + if pair[0] == key: + pair[1] = value + return + self.buckets[hash_val].append([key, value]) + + def get(self, key): + """Get the value from the hash table.""" + if type(key) is not str: + raise TypeError("Key for hash table must be a string.") + hash_val = self._hash_alg(key) + for pair in self.buckets[hash_val]: + if pair[0] == key: + return pair[1] + return diff --git a/src/test_bst.py b/src/test_bst.py index 9572c27..1aa7243 100644 --- a/src/test_bst.py +++ b/src/test_bst.py @@ -22,18 +22,18 @@ def small_tree(): def weird_tree(): """Grow a small tree with five nodes.""" tree = BinarySearchTree() - tree.insert(50) - tree.insert(79) - tree.insert(80) - tree.insert(83) - tree.insert(90) - tree.insert(100) - tree.insert(44) - tree.insert(48) - tree.insert(49) - tree.insert(103) - tree.insert(2) - tree.insert(102) + tree.insert(50, autobalance=False) + tree.insert(79, autobalance=False) + tree.insert(80, autobalance=False) + tree.insert(83, autobalance=False) + tree.insert(90, autobalance=False) + tree.insert(100, autobalance=False) + tree.insert(44, autobalance=False) + tree.insert(48, autobalance=False) + tree.insert(49, autobalance=False) + tree.insert(103, autobalance=False) + tree.insert(2, autobalance=False) + tree.insert(102, autobalance=False) return tree @@ -108,25 +108,25 @@ def test_inserting_higher_val_pushes_right(): def test_inserting_less_but_more_into_populated_tree(small_tree): """Test inserting lower value that would push left then right.""" - small_tree.insert(43) + small_tree.insert(43, autobalance=False) assert small_tree.root.left.right.value == 43 def test_inserting_lower_item_into_populated_tree(small_tree): """Test inserting value that pushes all the way left.""" - small_tree.insert(33) + small_tree.insert(33, autobalance=False) assert small_tree.root.left.left.left.value == 33 def test_insert_to_small_tree_updates_size(small_tree): """Test that insert on small tree increments size.""" - small_tree.insert(43) + small_tree.insert(43, autobalance=False) assert small_tree._size == 7 def test_insert_to_small_tree_existing_num(small_tree): """Test that inserting existing number doesn't change size.""" - small_tree.insert(40) + small_tree.insert(40, autobalance=False) assert small_tree.size() == 6 @@ -194,6 +194,11 @@ def test_contains_true_weird_tree_root(weird_tree): assert weird_tree.contains(50) is True +def test_contains_with_nonexistent_val_gt_root(small_tree): + """Test contains returns False when value is greater than root but node nonexistent.""" + assert small_tree.contains(99) is False + + def test_depth_on_small_tree(small_tree): """Test the size on a small Tree.""" assert small_tree.depth() == 2 @@ -214,6 +219,14 @@ def test_balance_on_weird_tree(weird_tree): assert weird_tree.balance() == 4 +def test_balance_w_no_left_nodes(): + """Test the balance of a tree with only a root and its right child.""" + b_tree = BinarySearchTree() + b_tree.insert(17) + b_tree.insert(43) + assert b_tree.balance() == 1 + + def test_inorder_no_nodes(): """Test in-order traversal on empty tree returns empty path.""" b_tree = BinarySearchTree() @@ -352,3 +365,145 @@ def test_bfs_weird_tree(weird_tree): for node in weird_tree.breadth_first(): bfs_list.append(node.value) assert bfs_list == [50, 44, 79, 2, 48, 80, 49, 83, 90, 100, 103, 102] + + +def test_delete_node_with_no_children(small_tree): + """Test calling delete on node with no children.""" + small_tree.delete(35, autobalance=False) + assert small_tree.search(35) == None + + +def test_delete_node_with_no_children_update_size(small_tree): + """Test calling delete on node with no children.""" + small_tree.delete(35, autobalance=False) + assert small_tree.size() == 5 + + +def test_delete_node_with_no_children_annuls_parent_connection(small_tree): + """Test calling delete on node with no children kills parent's connection.""" + small_tree.delete(35, autobalance=False) + assert small_tree.search(40).left is None + with pytest.raises(AttributeError): + assert small_tree.search(35).parent + + +def test_delete_node_with_no_children_annuls_own_connection(small_tree): + """Test calling delete on node with no children kills parent's connection.""" + small_tree.delete(35, autobalance=False) + with pytest.raises(AttributeError): + assert small_tree.search(35).parent + + +def test_delete_node_with_one_child_reassigns_connections(small_tree): + """Test deleting a node reassigns its one child to expected new parent.""" + small_tree.delete(40, autobalance=False) + assert small_tree.search(35).parent.value == 50 + assert small_tree.search(50).left.value == 35 + + +def test_delete_node_annuls_own_connections(small_tree): + """Test calling delete on node kills parent and child connections.""" + small_tree.delete(40, autobalance=False) + with pytest.raises(AttributeError): + assert small_tree.search(40).parent is None + with pytest.raises(AttributeError): + assert small_tree.search(40).left is None + + +def test_delete_updates_size(small_tree): + """Test that deleting a node updates tree's size.""" + small_tree.delete(40, autobalance=False) + assert small_tree.size() == 5 + + +def test_delete_node_with_two_childs_updates_size(small_tree): + """Test that delete node with two childs updates size.""" + small_tree.delete(80, autobalance=False) + assert small_tree.size() == 5 + + +def test_rotate_left_small_tree_assign_child(small_tree): + """Test that left rotation on small tree reassigns children.""" + small_tree._rotate_left(small_tree.root) + assert small_tree.search(80).left.value == 50 + + +def test_rotate_left_small_tree_assign_parent(small_tree): + """Test that left rotation on small tree reassigns parent.""" + small_tree._rotate_left(small_tree.root) + assert small_tree.search(50).parent.value == 80 + + +def test_rotate_left_reassigns_root(small_tree): + """Test the left rotation reassigns root.""" + small_tree._rotate_left(small_tree.root) + assert small_tree.root.value == 80 + + +def test_rotate_left_doesnt_reassign_root(small_tree): + """Test the left rotation does not reassign root.""" + small_tree._rotate_left(small_tree.search(80)) + assert small_tree.root.value == 50 + + +def test_rotate_left_small_tree_assign_parent_not_root(small_tree): + """Test that left rotation on small tree reassigns children.""" + small_tree._rotate_left(small_tree.search(80)) + assert small_tree.search(80).parent.value == 90 + + +def test_rotate_left_small_tree_assign_child_not_root(small_tree): + """Test that left rotation on small tree reassigns children.""" + small_tree._rotate_left(small_tree.search(80)) + assert small_tree.search(80).right is None + + +def test_rotate_right_small_tree_assign_child(small_tree): + """Test that right rotation on small tree reassigns children.""" + small_tree._rotate_right(small_tree.root) + assert small_tree.search(40).right.value == 50 + + +def test_rotate_right_small_tree_assign_parent(small_tree): + """Test that right rotation on small tree reassigns parent.""" + small_tree._rotate_right(small_tree.root) + assert small_tree.search(50).parent.value == 40 + + +def test_rotate_right_small_tree_assign_parent_not_root(small_tree): + """Test that right rotation on small tree reassigns children.""" + small_tree._rotate_right(small_tree.search(40)) + assert small_tree.search(40).parent.value == 35 + + +def test_rotate_right_small_tree_assign_child_not_root(small_tree): + """Test that right rotation on small tree reassigns children.""" + small_tree._rotate_right(small_tree.search(40)) + assert small_tree.search(40).left is None + + +def test_rotate_right_small_tree_assign_parent_child_not_root(small_tree): + """Test that right rotation on small tree reassigns children.""" + small_tree._rotate_right(small_tree.search(40)) + assert small_tree.search(50).left.value == 35 + + +def test_rotate_right_small_tree_assign_right_child_not_root(small_tree): + """Test that right rotation on small tree reassigns children.""" + small_tree._rotate_right(small_tree.search(40)) + assert small_tree.search(35).left is None + + +def test_tree_autobalances(): + """Test that a right skewed tree is balanced after many insertions.""" + tree = BinarySearchTree() + tree.insert(50) + tree.insert(60) + tree.insert(70) + tree.insert(80) + tree.insert(90) + tree.insert(100) + tree.insert(66) + tree.insert(59) + tree.insert(89) + assert abs(tree.balance()) <= 1 diff --git a/src/test_hash_table.py b/src/test_hash_table.py new file mode 100644 index 0000000..41e1585 --- /dev/null +++ b/src/test_hash_table.py @@ -0,0 +1,126 @@ +"""Test for our implemtation of hash tables.""" + +import pytest +# import os +from hash_table import HashTable + + +def test_simple_additive_hash(): + """Test that additive hash on small word.""" + h_table = HashTable(10) + assert h_table._additive_hash('a') == 7 + + +def test_simple_xor_hash(): + """Test that additive hash on small word.""" + h_table = HashTable(10, 'xor') + assert h_table._xor_hash('at') == 21 + + +def test_set_add_a_nonstring(): + """Test that set() won't take in a number type.""" + h_table = HashTable(10) + with pytest.raises(TypeError): + h_table.set(3, 'AI') + + +def test_set_add_word(): + """Test that set() adds key-value pair.""" + h_table = HashTable(10) + h_table.set('thinking', 'tiring') + assert h_table.get('thinking') == 'tiring' + + +def test_get(): + """Test that get() retrieves value.""" + h_table = HashTable(10) + h_table.set('thinking', 'tiring') + assert h_table.get('thinking') == 'tiring' + + +def test_dictionary_attacks_me_test(): + """Dictionary test.""" + h_table = HashTable(3000) + f = open("/usr/share/dict/words", 'r') + while True: + word = f.readline() + if not word: + break + h_table.set(word, word) + f.close() + f_again = open("/usr/share/dict/words", 'r') + while True: + word = f_again.readline() + if not word: + break + if word != h_table.get(word): + assert False + f_again.close() + assert True + + +def test_dictionary_attacks_me_with_change(): + """Add a whole dictionary, change a key's value, test the changed happened.""" + not_matching = 0 + h_table = HashTable(3000) + f = open("/usr/share/dict/words", 'r') + while True: + word = f.readline() + if not word: + break + h_table.set(word, word) + f.close() + h_table.set("Adirondack\n", "pickle") + f_again = open("/usr/share/dict/words", 'r') + while True: + word = f_again.readline() + if not word: + break + if word != h_table.get(word): + not_matching += 1 + f_again.close() + assert not_matching == 1 + + +def test_dictionary_test_with_xor(): + """Dictionary test.""" + h_table = HashTable(3000, 'xor') + f = open("/usr/share/dict/words", 'r') + while True: + word = f.readline() + if not word: + break + h_table.set(word, word) + f.close() + f_again = open("/usr/share/dict/words", 'r') + while True: + word = f_again.readline() + if not word: + break + if word != h_table.get(word): + assert False + f_again.close() + assert True + + +def test_dictionary_and_change_with_xor(): + """Add a whole dictionary, change a key's value, test the changed happened.""" + not_matching = 0 + h_table = HashTable(3000, 'xor') + f = open("/usr/share/dict/words", 'r') + while True: + word = f.readline() + if not word: + break + h_table.set(word, word) + f.close() + h_table.set("Adirondack\n", "pickle") + f_again = open("/usr/share/dict/words", 'r') + while True: + word = f_again.readline() + if not word: + break + if word != h_table.get(word): + not_matching += 1 + f_again.close() + assert not_matching == 1 diff --git a/tox.ini b/tox.ini index 8721979..3b2ab95 100755 --- a/tox.ini +++ b/tox.ini @@ -2,6 +2,6 @@ envlist = py27, py35 [testenv] -commands = py.test src/test_bst.py +commands = py.test src/test_hash_table.py deps = pytest \ No newline at end of file