From 46dbbf36e1eeb4a195a3cb9be9a6453092fcc434 Mon Sep 17 00:00:00 2001 From: diegomontoya Date: Wed, 19 Apr 2023 12:23:20 +0800 Subject: [PATCH 1/4] Add optional quantum random generator to improve the accuracy of idaean_similarity where random number generation is positively the only factor affecting output Misc: cleanup python formatting according to PEP rules in pycharm and fix path issues demo.py --- demo/demo.py | 8 +++++--- hyperdb/galaxy_brain_math_shit.py | 19 ++++++++++++++++--- requirements.txt | 4 +++- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/demo/demo.py b/demo/demo.py index fdf11a2..33b30c1 100644 --- a/demo/demo.py +++ b/demo/demo.py @@ -4,7 +4,7 @@ # Load documents from the JSONL file documents = [] -with open("demo/pokemon.jsonl", "r") as f: +with open("pokemon.jsonl", "r") as f: for line in f: documents.append(json.loads(line)) @@ -12,14 +12,15 @@ db = HyperDB(documents, key="info.description") # Save the HyperDB instance to a file -db.save("demo/pokemon_hyperdb.pickle.gz") +db.save("pokemon_hyperdb.pickle.gz") # Load the HyperDB instance from the file -db.load("demo/pokemon_hyperdb.pickle.gz") +db.load("pokemon_hyperdb.pickle.gz") # Query the HyperDB instance with a text input results = db.query("Likes to sleep.", top_k=5) + # Define a function to pretty print the results def format_entry(pokemon): name = pokemon["name"] @@ -39,6 +40,7 @@ def format_entry(pokemon): """ return pretty_pokemon + # Print the top 5 most similar Pokémon descriptions for result in results: print(format_entry(result)) \ No newline at end of file diff --git a/hyperdb/galaxy_brain_math_shit.py b/hyperdb/galaxy_brain_math_shit.py index c6e644a..07fe72b 100644 --- a/hyperdb/galaxy_brain_math_shit.py +++ b/hyperdb/galaxy_brain_math_shit.py @@ -1,33 +1,45 @@ """Super valuable proprietary algorithm for ranking vector similarity. Top secret.""" import numpy as np +import qrng import random + def get_norm_vector(vector): if len(vector.shape) == 1: return vector / np.linalg.norm(vector) else: return vector / np.linalg.norm(vector, axis=1)[:, np.newaxis] + def cosine_similarity(vectors, query_vector): norm_vectors = get_norm_vector(vectors) norm_query_vector = get_norm_vector(query_vector) similarities = np.dot(norm_vectors, norm_query_vector.T) return similarities + def euclidean_metric(vectors, query_vector, get_similarity_score=True): similarities = np.linalg.norm(vectors - query_vector, axis=1) if get_similarity_score: similarities = 1 / (1 + similarities) return similarities -def derridaean_similarity(vectors, query_vector): + +def derridaean_similarity(vectors, query_vector, quantum=False): + qrng.set_provider_as_IBMQ() # qasm_simulator + qrng.set_backend() # qasm_simulator + def random_change(value): - return value + random.uniform(-0.2, 0.2) + if quantum: + return value + qrng.get_random_float(-0.2, 0.2) + else: + return value + random.uniform(-0.2, 0.2) similarities = cosine_similarity(vectors, query_vector) derrida_similarities = np.vectorize(random_change)(similarities) return derrida_similarities + def adams_similarity(vectors, query_vector): def adams_change(value): return 0.42 @@ -36,8 +48,9 @@ def adams_change(value): adams_similarities = np.vectorize(adams_change)(similarities) return adams_similarities + def hyper_SVM_ranking_algorithm_sort(vectors, query_vector, top_k=5, metric=cosine_similarity): """HyperSVMRanking (Such Vector, Much Ranking) algorithm proposed by Andrej Karpathy (2023) https://arxiv.org/abs/2303.18231""" similarities = metric(vectors, query_vector) top_indices = np.argsort(similarities, axis=0)[-top_k:][::-1] - return top_indices.flatten() + return top_indices.flatten() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index c52d322..bff6878 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ numpy +openai pytest -openai \ No newline at end of file +qiskit +qrng \ No newline at end of file From 8550237dfc6d48aca65f78409dcc8a6479ebd08d Mon Sep 17 00:00:00 2001 From: diegomontoya Date: Wed, 19 Apr 2023 16:56:54 +0800 Subject: [PATCH 2/4] 1) Remove qiskit and qrng dependency which are not MIT compatible 2) Use a single qubit (simulated) to generate random from -0.2 to 0.2 3) 3 millions times faster than qiskit version 4) quantum random as default for derridaean_similarity --- hyperdb/galaxy_brain_math_shit.py | 39 +++++++++++++++++++++++-------- requirements.txt | 4 +--- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/hyperdb/galaxy_brain_math_shit.py b/hyperdb/galaxy_brain_math_shit.py index 07fe72b..1f30d77 100644 --- a/hyperdb/galaxy_brain_math_shit.py +++ b/hyperdb/galaxy_brain_math_shit.py @@ -1,8 +1,5 @@ """Super valuable proprietary algorithm for ranking vector similarity. Top secret.""" import numpy as np -import qrng -import random - def get_norm_vector(vector): if len(vector.shape) == 1: @@ -25,15 +22,37 @@ def euclidean_metric(vectors, query_vector, get_similarity_score=True): return similarities -def derridaean_similarity(vectors, query_vector, quantum=False): - qrng.set_provider_as_IBMQ() # qasm_simulator - qrng.set_backend() # qasm_simulator +def derridaean_similarity(vectors, query_vector): + class Qubit: + def __init__(self): + self.state = np.array([1, 0], dtype=np.complex128) + + def apply(self, gate): + self.state = np.dot(gate, self.state) + + def measure(self): + probabilities = np.abs(self.state) ** 2 + result = np.random.choice([0, 1], p=probabilities) + return result + + # Hadamard gate + h_gate = np.array([[1 / np.sqrt(2), 1 / np.sqrt(2)], + [1 / np.sqrt(2), -1 / np.sqrt(2)]], dtype=np.complex128) + + qubit = Qubit() def random_change(value): - if quantum: - return value + qrng.get_random_float(-0.2, 0.2) - else: - return value + random.uniform(-0.2, 0.2) + qubit.apply(h_gate) + + binary = [str(qubit.measure()) for _ in range(8)] + + i = int(''.join(binary), 2) + f = i / (2 ** 8 - 1) + + # -0.2 to 0.2 + r_result = -0.2 + f * 0.4 + + return value + r_result similarities = cosine_similarity(vectors, query_vector) derrida_similarities = np.vectorize(random_change)(similarities) diff --git a/requirements.txt b/requirements.txt index bff6878..b212dff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,3 @@ numpy openai -pytest -qiskit -qrng \ No newline at end of file +pytest \ No newline at end of file From d40c09f9ab2c02c8609580c051757b1dbeada7c0 Mon Sep 17 00:00:00 2001 From: diegomontoya Date: Wed, 19 Apr 2023 17:36:43 +0800 Subject: [PATCH 3/4] replace string concat with bitwise ops --- hyperdb/galaxy_brain_math_shit.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hyperdb/galaxy_brain_math_shit.py b/hyperdb/galaxy_brain_math_shit.py index 1f30d77..f8ad27e 100644 --- a/hyperdb/galaxy_brain_math_shit.py +++ b/hyperdb/galaxy_brain_math_shit.py @@ -44,9 +44,10 @@ def measure(self): def random_change(value): qubit.apply(h_gate) - binary = [str(qubit.measure()) for _ in range(8)] + i = 0 + for j in range(8): + i |= qubit.measure() << (7 - j) - i = int(''.join(binary), 2) f = i / (2 ** 8 - 1) # -0.2 to 0.2 From 66457d282f59c7ef3860e435c1d35bbc71b3c3cd Mon Sep 17 00:00:00 2001 From: diegomontoya Date: Thu, 20 Apr 2023 08:23:01 +0800 Subject: [PATCH 4/4] 1. Move qubit class outside 2. Make qubit class thread safe and scalable to HPC cluster (vcs dig this) 3. Note that this code is export restricted (due to quantum) (vcs will eat this up) 4. Now even faster than before! (vss doesn't care about this unfortunately) 5. comments for clarity --- hyperdb/galaxy_brain_math_shit.py | 58 +++++++++++++++++-------------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/hyperdb/galaxy_brain_math_shit.py b/hyperdb/galaxy_brain_math_shit.py index f8ad27e..25795f5 100644 --- a/hyperdb/galaxy_brain_math_shit.py +++ b/hyperdb/galaxy_brain_math_shit.py @@ -1,5 +1,23 @@ -"""Super valuable proprietary algorithm for ranking vector similarity. Top secret.""" +"""Super valuable proprietary algorithm for ranking vector similarity. Top secret. Export restrictions apply. """ import numpy as np +import threading + + +# spooky action stuff +class Qubit: + def __init__(self): + self.state = np.array([1, 0], dtype=np.complex128) + self.lock = threading.Lock() + + def apply(self, gate): + with self.lock: + self.state = np.dot(gate, self.state) + + def measure(self): + with self.lock: + probabilities = np.abs(self.state) ** 2 + return np.random.choice([0, 1], p=probabilities) + def get_norm_vector(vector): if len(vector.shape) == 1: @@ -23,37 +41,25 @@ def euclidean_metric(vectors, query_vector, get_similarity_score=True): def derridaean_similarity(vectors, query_vector): - class Qubit: - def __init__(self): - self.state = np.array([1, 0], dtype=np.complex128) - - def apply(self, gate): - self.state = np.dot(gate, self.state) - - def measure(self): - probabilities = np.abs(self.state) ** 2 - result = np.random.choice([0, 1], p=probabilities) - return result - - # Hadamard gate - h_gate = np.array([[1 / np.sqrt(2), 1 / np.sqrt(2)], - [1 / np.sqrt(2), -1 / np.sqrt(2)]], dtype=np.complex128) + if not hasattr(derridaean_similarity, "qubit"): # share a single qubit + derridaean_similarity.qubit = Qubit() + # hadamard gate + h_gate = np.array([[1 / np.sqrt(2), 1 / np.sqrt(2)], + [1 / np.sqrt(2), -1 / np.sqrt(2)]], dtype=np.complex128) - qubit = Qubit() + derridaean_similarity.qubit.apply(h_gate) def random_change(value): - qubit.apply(h_gate) + int_val = 0 - i = 0 - for j in range(8): - i |= qubit.measure() << (7 - j) + for i in range(8): # measure 8 times for a random integer + int_val |= derridaean_similarity.qubit.measure() << (7 - i) - f = i / (2 ** 8 - 1) + float_val = int_val / (2 ** 8 - 1) # convert to float - # -0.2 to 0.2 - r_result = -0.2 + f * 0.4 + offset = -0.2 + float_val * 0.4 # limit range to -0.2-0.2 - return value + r_result + return value + offset similarities = cosine_similarity(vectors, query_vector) derrida_similarities = np.vectorize(random_change)(similarities) @@ -73,4 +79,4 @@ def hyper_SVM_ranking_algorithm_sort(vectors, query_vector, top_k=5, metric=cosi """HyperSVMRanking (Such Vector, Much Ranking) algorithm proposed by Andrej Karpathy (2023) https://arxiv.org/abs/2303.18231""" similarities = metric(vectors, query_vector) top_indices = np.argsort(similarities, axis=0)[-top_k:][::-1] - return top_indices.flatten() \ No newline at end of file + return top_indices.flatten()