diff --git a/__init__.pyc b/__init__.pyc index 6c0d88a..19ce758 100644 Binary files a/__init__.pyc and b/__init__.pyc differ diff --git a/q01_k_means/__init__.pyc b/q01_k_means/__init__.pyc index bff55bc..db0846a 100644 Binary files a/q01_k_means/__init__.pyc and b/q01_k_means/__init__.pyc differ diff --git a/q01_k_means/build.py b/q01_k_means/build.py index fca565c..f2c9421 100644 --- a/q01_k_means/build.py +++ b/q01_k_means/build.py @@ -1,16 +1,43 @@ +# %load q01_k_means/build.py # Default imports from sklearn.cluster import KMeans import matplotlib.pyplot as plt from sklearn import datasets - +import pandas as pd +import numpy as np +import time +import seaborn as sns digits = datasets.load_digits() X_train = digits.images y_train = digits.target - +print(X_train.shape) +print(y_train.shape) # Write your solution here : +def k_means(X_train,y_train,cluster=10,random_state=9): + X = np.reshape(X_train, (len(X_train), -1)) + data=X + for i in range(cluster): + #print(i) + + km = KMeans(init="random", n_clusters=i+1) + #y=km.fit_predict(X) + #print(y) + labels =km.fit_predict(data) + #end_time = time.time() + palette = sns.color_palette('deep', np.unique(labels).max() + 1) + colors = [palette[x] if x >= 0 else (0.0, 0.0, 0.0) for x in labels] + plt.scatter(data.T[0], data.T[1]) + frame = plt.gca() + frame.axes.get_xaxis().set_visible(False) + frame.axes.get_yaxis().set_visible(False) + #plt.title('Clusters found by {}'.format(str(algorithm.__name__)), fontsize=24) + #plt.text(5, 10, 'Clustering took {:.2f} s'.format(end_time - start_time), fontsize=14) + plt.show() + +#k_means(X_train,y_train,cluster=10,random_state=9) diff --git a/q01_k_means/build.pyc b/q01_k_means/build.pyc index fa56657..e1074dd 100644 Binary files a/q01_k_means/build.pyc and b/q01_k_means/build.pyc differ diff --git a/q01_k_means/tests/__init__.pyc b/q01_k_means/tests/__init__.pyc index f6a37b9..281d0a6 100644 Binary files a/q01_k_means/tests/__init__.pyc and b/q01_k_means/tests/__init__.pyc differ diff --git a/q01_k_means/tests/test_q01_k_means.pyc b/q01_k_means/tests/test_q01_k_means.pyc index ac55928..2d34247 100644 Binary files a/q01_k_means/tests/test_q01_k_means.pyc and b/q01_k_means/tests/test_q01_k_means.pyc differ diff --git a/q02_hierarchy_clustering/__init__.pyc b/q02_hierarchy_clustering/__init__.pyc index 9e9464b..88a8ec8 100644 Binary files a/q02_hierarchy_clustering/__init__.pyc and b/q02_hierarchy_clustering/__init__.pyc differ diff --git a/q02_hierarchy_clustering/build.py b/q02_hierarchy_clustering/build.py index 2ba8b26..a00e23c 100644 --- a/q02_hierarchy_clustering/build.py +++ b/q02_hierarchy_clustering/build.py @@ -1,3 +1,4 @@ +# %load q02_hierarchy_clustering/build.py # Default imports import pandas as pd @@ -5,8 +6,48 @@ from sklearn.preprocessing import scale from scipy.cluster import hierarchy from sklearn import datasets +from scipy.cluster.hierarchy import dendrogram, linkage digits = datasets.load_digits() df = pd.DataFrame(scale(digits.data), index=digits.target) +#print(df.head) # Write your solution here : + +def hierarchy_clustering(df): + Z_single = linkage(df, 'single') + Z_avg = linkage(df, 'average') + z_ward=linkage(df, 'ward') + z_comp=linkage(df,'complete') + plt.figure(figsize=(25, 10)) + plt.title('Hierarchical Clustering Dendrogram') + plt.xlabel('sample index') + plt.ylabel('distance') + plt.subplot(1,2,1) + + dendrogram( + Z_single, + leaf_rotation=90., # rotates the x axis labels + leaf_font_size=8., # font size for the x axis labels + ) + plt.subplot(2,2,1) + dendrogram( + z_comp, + leaf_rotation=90., # rotates the x axis labels + leaf_font_size=8., # font size for the x axis labels + ) + plt.subplot(2,2,2) + dendrogram( + Z_avg, + leaf_rotation=90., # rotates the x axis labels + leaf_font_size=8., # font size for the x axis labels + ) + plt.subplot(1,2,2) + dendrogram( + z_ward, + leaf_rotation=90., # rotates the x axis labels + leaf_font_size=8., # font size for the x axis labels + ) + plt.show() + +#hierarchy_clustering(df) diff --git a/q02_hierarchy_clustering/build.pyc b/q02_hierarchy_clustering/build.pyc index 59f6156..80bab7e 100644 Binary files a/q02_hierarchy_clustering/build.pyc and b/q02_hierarchy_clustering/build.pyc differ diff --git a/q02_hierarchy_clustering/tests/__init__.pyc b/q02_hierarchy_clustering/tests/__init__.pyc index bb41aea..3be2961 100644 Binary files a/q02_hierarchy_clustering/tests/__init__.pyc and b/q02_hierarchy_clustering/tests/__init__.pyc differ diff --git a/q02_hierarchy_clustering/tests/test_q02_hierarchy_clustering.pyc b/q02_hierarchy_clustering/tests/test_q02_hierarchy_clustering.pyc index d1b4567..41adb6a 100644 Binary files a/q02_hierarchy_clustering/tests/test_q02_hierarchy_clustering.pyc and b/q02_hierarchy_clustering/tests/test_q02_hierarchy_clustering.pyc differ