From 9cc9ba3b82918ce3f7dc45e7c8c9ad1760bdb4c3 Mon Sep 17 00:00:00 2001 From: drakeeee Date: Mon, 27 Aug 2018 11:47:52 -0700 Subject: [PATCH 1/3] start making this into a package --- plot_toy_example.py | 2 +- pyowl/__init__.py | 0 pyowl/fista.py | 55 ++++++++++++ pyowl/loss.py | 30 +++++++ pyowl/pyowl.py | 209 ++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 0 setup.py | 11 +++ tests/test_pyowl.py | 57 ++++++++++++ 8 files changed, 363 insertions(+), 1 deletion(-) create mode 100644 pyowl/__init__.py create mode 100644 pyowl/fista.py create mode 100644 pyowl/loss.py create mode 100644 pyowl/pyowl.py create mode 100644 requirements.txt create mode 100644 setup.py create mode 100644 tests/test_pyowl.py diff --git a/plot_toy_example.py b/plot_toy_example.py index 3e66719..5cd0f09 100644 --- a/plot_toy_example.py +++ b/plot_toy_example.py @@ -13,7 +13,7 @@ import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import Lasso -from pyowl import OwlRegressor +from pyowl.pyowl import OwlRegressor n_samples = 10 n_features = 100 diff --git a/pyowl/__init__.py b/pyowl/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pyowl/fista.py b/pyowl/fista.py new file mode 100644 index 0000000..be1a01f --- /dev/null +++ b/pyowl/fista.py @@ -0,0 +1,55 @@ +""" +Efficient implementation of FISTA. +""" + +# Author: Mathieu Blondel +# License: BSD 3 clause +# based on https://gist.github.com/mblondel/5105786d740693a6996bcb8e482c7083 + +import numpy as np + + +def fista(sfunc, nsfunc, x0, max_iter=500, max_linesearch=20, eta=2.0, tol=1e-3, + verbose=0): + + y = x0.copy() + x = y + L = 1.0 + t = 1.0 + + for it in range(max_iter): + f_old, grad = sfunc(y, True) + + for ls in range(max_linesearch): + y_proj = nsfunc(y - grad / L, L) + diff = (y_proj - y).ravel() + sqdist = np.dot(diff, diff) + dist = np.sqrt(sqdist) + + F = sfunc(y_proj) + Q = f_old + np.dot(diff, grad.ravel()) + 0.5 * L * sqdist + + if F <= Q: + break + + L *= eta + + if ls == max_linesearch - 1 and verbose: + print("Line search did not converge.") + + if verbose: + print("%d. %f" % (it + 1, dist)) + + if dist <= tol: + if verbose: + print("Converged.") + break + + x_next = y_proj + t_next = (1 + np.sqrt(1 + 4 * t ** 2)) / 2. + y = x_next + (t-1) / t_next * (x_next - x) + t = t_next + x = x_next + + return y_proj + diff --git a/pyowl/loss.py b/pyowl/loss.py new file mode 100644 index 0000000..401ab87 --- /dev/null +++ b/pyowl/loss.py @@ -0,0 +1,30 @@ +# Author: Vlad Niculae +# License: BSD 3 clause + +import numpy as np + + +def squared_loss(y_true, y_pred, return_derivative=False): + diff = y_pred - y_true + obj = 0.5 * np.dot(diff, diff) + if return_derivative: + return obj, diff + else: + return obj + + +def squared_hinge_loss(y_true, y_scores, return_derivative=False): + # labels in (-1, 1) + z = np.maximum(0, 1 - y_true * y_scores) + obj = np.sum(z ** 2) + + if return_derivative: + return obj, -2 * y_true * z + else: + return obj + + +def get_loss(name): + losses = {'squared': squared_loss, + 'squared-hinge': squared_hinge_loss} + return losses[name] diff --git a/pyowl/pyowl.py b/pyowl/pyowl.py new file mode 100644 index 0000000..50492ce --- /dev/null +++ b/pyowl/pyowl.py @@ -0,0 +1,209 @@ +# Author: Vlad Niculae +# License: BSD 3 clause + +from __future__ import print_function +from __future__ import division + +import numpy as np + +from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin +from sklearn.utils.extmath import safe_sparse_dot +from sklearn.isotonic import isotonic_regression +from sklearn.preprocessing import LabelBinarizer + +from pyowl.fista import fista +from pyowl.loss import get_loss + + +def prox_owl(v, w): + """Proximal operator of the OWL norm dot(w, reversed(sort(v))) + + Follows description and notation from: + X. Zeng, M. Figueiredo, + The ordered weighted L1 norm: Atomic formulation, dual norm, + and projections. + eprint http://arxiv.org/abs/1409.4271 + """ + + # wlog operate on absolute values + v_abs = np.abs(v) + ix = np.argsort(v_abs)[::-1] + v_abs = v_abs[ix] + # project to K+ (monotone non-negative decreasing cone) + v_abs = isotonic_regression(v_abs - w, y_min=0, increasing=False) + + # undo the sorting + inv_ix = np.zeros_like(ix) + inv_ix[ix] = np.arange(len(v)) + v_abs = v_abs[inv_ix] + + return np.sign(v) * v_abs + + +def _oscar_weights(alpha, beta, size): + w = np.arange(size - 1, -1, -1, dtype=np.double) + w *= beta + w += alpha + return w + + +def _fit_owl_fista(X, y, w, loss, max_iter=500, max_linesearch=20, eta=2.0, + tol=1e-3, verbose=0): + + # least squares loss + def sfunc(coef, grad=False): + y_scores = safe_sparse_dot(X, coef) + if grad: + obj, lp = loss(y, y_scores, return_derivative=True) + grad = safe_sparse_dot(X.T, lp) + return obj, grad + else: + return loss(y, y_scores) + + def nsfunc(coef, L): + return prox_owl(coef, w / L) + + coef = np.zeros(X.shape[1]) + return fista(sfunc, nsfunc, coef, max_iter, max_linesearch, + eta, tol, verbose) + + +class _BaseOwl(BaseEstimator): + """ + + Solves sum loss(y_pred, y) + sum_j weights_j |coef|_(j) + where u_(j) is the jth largest component of the vector u. + and weights is a monotonic nonincreasing vector. + + OWL is also known as: sorted L1 norm, SLOPE + + Parameters + ---------- + + weights: array, shape (n_features,) or tuple, length 2 + Nonincreasing weights vector for the ordered weighted L1 penalty. + If weights = (alpha, 0, 0, ..., 0), this amounts to a L_inf penalty. + If weights = alpha * np.ones(n_features) it amounts to L1. + If weights is a tuple = (alpha, beta), the OSCAR penalty is used:: + alpha ||coef||_1 + beta sum_{i 0 + return self.lb_.inverse_transform(y_pred) + + +if __name__ == '__main__': + + from sklearn.model_selection import train_test_split + from sklearn.datasets import load_boston, load_breast_cancer + + print("OSCAR proximal operator on toy example:") + v = np.array([1, 3, 2.9, 4, 0]) + w_oscar = _oscar_weights(alpha=0.01, beta=1, size=5) + print(prox_owl(v, w_oscar)) + print() + + print("Regression") + X, y = load_boston(return_X_y=True) + X = np.column_stack([X, -X[:, 0] + 0.01 * np.random.randn(X.shape[0])]) + X_tr, X_te, y_tr, y_te = train_test_split(X, y, random_state=0) + clf = OwlRegressor(weights=(1, 100)) + clf.fit(X_tr, y_tr) + print("Correlated coefs", clf.coef_[0], clf.coef_[-1]) + print("Test score", clf.score(X_te, y_te)) + print() + + print("Classification") + X, y = load_breast_cancer(return_X_y=True) + X = np.column_stack([X, -X[:, 0] + 0.01 * np.random.randn(X.shape[0])]) + X_tr, X_te, y_tr, y_te = train_test_split(X, y, random_state=0) + clf = OwlClassifier(weights=(1, 100), loss='squared-hinge') + clf.fit(X_tr, y_tr) + print("Correlated coefs", clf.coef_[0], clf.coef_[-1]) + print("Test score", clf.score(X_te, y_te)) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e69de29 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..d1058fe --- /dev/null +++ b/setup.py @@ -0,0 +1,11 @@ +from setuptools import find_packages +from setuptools import setup + +setup(name='pyowl', + version='0.1', + description='pyowl: Ordered Weighted L1 Regularization in Python', + long_description=open('README.md').read(), + url='https://github.com/vene/pyowl', + packages=find_packages(), + install_requires=[ + ]) diff --git a/tests/test_pyowl.py b/tests/test_pyowl.py new file mode 100644 index 0000000..3dec713 --- /dev/null +++ b/tests/test_pyowl.py @@ -0,0 +1,57 @@ +# Author: Vlad Niculae +# License: BSD 3 clause + +import numpy as np +from numpy.testing import assert_array_almost_equal +from pyowl import prox_owl + +rng = np.random.RandomState(0) + +# cf. scikit-learn-contrib/lightning impl/penalty.py +def project_simplex(v, z=1): + if np.sum(v) <= z: + return v + + n_features = v.shape[0] + u = np.sort(v)[::-1] + cssv = np.cumsum(u) - z + ind = np.arange(n_features) + 1 + cond = u - cssv / ind > 0 + rho = ind[cond][-1] + theta = cssv[cond][-1] / float(rho) + w = np.maximum(v - theta, 0) + return w + + +# cf. scikit-learn-contrib/lightning impl/penalty.py +def project_l1_ball(v, z=1): + return np.sign(v) * project_simplex(np.abs(v), z) + + +def prox_linf(v, alpha): + # cf. Proximal Algorithms, Parikh & Boyd, eq. 6.8 + # dual ball B is the L1 ball + + p = project_l1_ball(v / alpha) + return v - alpha * p + + + +def test_prox_special_cases(): + for _ in range(20): + v = rng.randn(10) + alpha = rng.uniform(0.001, 1) + + # l1 proximal operator + z_expected = np.maximum(0, v - alpha) + z_expected -= np.maximum(0, -v - alpha) + z_obtained = prox_owl(v, alpha * np.ones_like(v)) + + assert_array_almost_equal(z_expected, z_obtained) + + # l_inf proximal operator + z_expected = prox_linf(v, alpha) + w = np.zeros_like(v) + w[0] = alpha + z_obtained = prox_owl(v, w) + assert_array_almost_equal(z_expected, z_obtained) From 0282069c738b6f892f2705887aff14f6134d8cb0 Mon Sep 17 00:00:00 2001 From: drakeeee Date: Mon, 27 Aug 2018 11:48:31 -0700 Subject: [PATCH 2/3] remove --- fista.py | 55 ------------- loss.py | 30 -------- pyowl.py | 209 -------------------------------------------------- test_pyowl.py | 57 -------------- 4 files changed, 351 deletions(-) delete mode 100644 fista.py delete mode 100644 loss.py delete mode 100644 pyowl.py delete mode 100644 test_pyowl.py diff --git a/fista.py b/fista.py deleted file mode 100644 index be1a01f..0000000 --- a/fista.py +++ /dev/null @@ -1,55 +0,0 @@ -""" -Efficient implementation of FISTA. -""" - -# Author: Mathieu Blondel -# License: BSD 3 clause -# based on https://gist.github.com/mblondel/5105786d740693a6996bcb8e482c7083 - -import numpy as np - - -def fista(sfunc, nsfunc, x0, max_iter=500, max_linesearch=20, eta=2.0, tol=1e-3, - verbose=0): - - y = x0.copy() - x = y - L = 1.0 - t = 1.0 - - for it in range(max_iter): - f_old, grad = sfunc(y, True) - - for ls in range(max_linesearch): - y_proj = nsfunc(y - grad / L, L) - diff = (y_proj - y).ravel() - sqdist = np.dot(diff, diff) - dist = np.sqrt(sqdist) - - F = sfunc(y_proj) - Q = f_old + np.dot(diff, grad.ravel()) + 0.5 * L * sqdist - - if F <= Q: - break - - L *= eta - - if ls == max_linesearch - 1 and verbose: - print("Line search did not converge.") - - if verbose: - print("%d. %f" % (it + 1, dist)) - - if dist <= tol: - if verbose: - print("Converged.") - break - - x_next = y_proj - t_next = (1 + np.sqrt(1 + 4 * t ** 2)) / 2. - y = x_next + (t-1) / t_next * (x_next - x) - t = t_next - x = x_next - - return y_proj - diff --git a/loss.py b/loss.py deleted file mode 100644 index 401ab87..0000000 --- a/loss.py +++ /dev/null @@ -1,30 +0,0 @@ -# Author: Vlad Niculae -# License: BSD 3 clause - -import numpy as np - - -def squared_loss(y_true, y_pred, return_derivative=False): - diff = y_pred - y_true - obj = 0.5 * np.dot(diff, diff) - if return_derivative: - return obj, diff - else: - return obj - - -def squared_hinge_loss(y_true, y_scores, return_derivative=False): - # labels in (-1, 1) - z = np.maximum(0, 1 - y_true * y_scores) - obj = np.sum(z ** 2) - - if return_derivative: - return obj, -2 * y_true * z - else: - return obj - - -def get_loss(name): - losses = {'squared': squared_loss, - 'squared-hinge': squared_hinge_loss} - return losses[name] diff --git a/pyowl.py b/pyowl.py deleted file mode 100644 index c7d423f..0000000 --- a/pyowl.py +++ /dev/null @@ -1,209 +0,0 @@ -# Author: Vlad Niculae -# License: BSD 3 clause - -from __future__ import print_function -from __future__ import division - -import numpy as np - -from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin -from sklearn.utils.extmath import safe_sparse_dot -from sklearn.isotonic import isotonic_regression -from sklearn.preprocessing import LabelBinarizer - -from fista import fista -from loss import get_loss - - -def prox_owl(v, w): - """Proximal operator of the OWL norm dot(w, reversed(sort(v))) - - Follows description and notation from: - X. Zeng, M. Figueiredo, - The ordered weighted L1 norm: Atomic formulation, dual norm, - and projections. - eprint http://arxiv.org/abs/1409.4271 - """ - - # wlog operate on absolute values - v_abs = np.abs(v) - ix = np.argsort(v_abs)[::-1] - v_abs = v_abs[ix] - # project to K+ (monotone non-negative decreasing cone) - v_abs = isotonic_regression(v_abs - w, y_min=0, increasing=False) - - # undo the sorting - inv_ix = np.zeros_like(ix) - inv_ix[ix] = np.arange(len(v)) - v_abs = v_abs[inv_ix] - - return np.sign(v) * v_abs - - -def _oscar_weights(alpha, beta, size): - w = np.arange(size - 1, -1, -1, dtype=np.double) - w *= beta - w += alpha - return w - - -def _fit_owl_fista(X, y, w, loss, max_iter=500, max_linesearch=20, eta=2.0, - tol=1e-3, verbose=0): - - # least squares loss - def sfunc(coef, grad=False): - y_scores = safe_sparse_dot(X, coef) - if grad: - obj, lp = loss(y, y_scores, return_derivative=True) - grad = safe_sparse_dot(X.T, lp) - return obj, grad - else: - return loss(y, y_scores) - - def nsfunc(coef, L): - return prox_owl(coef, w / L) - - coef = np.zeros(X.shape[1]) - return fista(sfunc, nsfunc, coef, max_iter, max_linesearch, - eta, tol, verbose) - - -class _BaseOwl(BaseEstimator): - """ - - Solves sum loss(y_pred, y) + sum_j weights_j |coef|_(j) - where u_(j) is the jth largest component of the vector u. - and weights is a monotonic nonincreasing vector. - - OWL is also known as: sorted L1 norm, SLOPE - - Parameters - ---------- - - weights: array, shape (n_features,) or tuple, length 2 - Nonincreasing weights vector for the ordered weighted L1 penalty. - If weights = (alpha, 0, 0, ..., 0), this amounts to a L_inf penalty. - If weights = alpha * np.ones(n_features) it amounts to L1. - If weights is a tuple = (alpha, beta), the OSCAR penalty is used:: - alpha ||coef||_1 + beta sum_{i 0 - return self.lb_.inverse_transform(y_pred) - - -if __name__ == '__main__': - - from sklearn.model_selection import train_test_split - from sklearn.datasets import load_boston, load_breast_cancer - - print("OSCAR proximal operator on toy example:") - v = np.array([1, 3, 2.9, 4, 0]) - w_oscar = _oscar_weights(alpha=0.01, beta=1, size=5) - print(prox_owl(v, w_oscar)) - print() - - print("Regression") - X, y = load_boston(return_X_y=True) - X = np.column_stack([X, -X[:, 0] + 0.01 * np.random.randn(X.shape[0])]) - X_tr, X_te, y_tr, y_te = train_test_split(X, y, random_state=0) - clf = OwlRegressor(weights=(1, 100)) - clf.fit(X_tr, y_tr) - print("Correlated coefs", clf.coef_[0], clf.coef_[-1]) - print("Test score", clf.score(X_te, y_te)) - print() - - print("Classification") - X, y = load_breast_cancer(return_X_y=True) - X = np.column_stack([X, -X[:, 0] + 0.01 * np.random.randn(X.shape[0])]) - X_tr, X_te, y_tr, y_te = train_test_split(X, y, random_state=0) - clf = OwlClassifier(weights=(1, 100), loss='squared-hinge') - clf.fit(X_tr, y_tr) - print("Correlated coefs", clf.coef_[0], clf.coef_[-1]) - print("Test score", clf.score(X_te, y_te)) diff --git a/test_pyowl.py b/test_pyowl.py deleted file mode 100644 index 3dec713..0000000 --- a/test_pyowl.py +++ /dev/null @@ -1,57 +0,0 @@ -# Author: Vlad Niculae -# License: BSD 3 clause - -import numpy as np -from numpy.testing import assert_array_almost_equal -from pyowl import prox_owl - -rng = np.random.RandomState(0) - -# cf. scikit-learn-contrib/lightning impl/penalty.py -def project_simplex(v, z=1): - if np.sum(v) <= z: - return v - - n_features = v.shape[0] - u = np.sort(v)[::-1] - cssv = np.cumsum(u) - z - ind = np.arange(n_features) + 1 - cond = u - cssv / ind > 0 - rho = ind[cond][-1] - theta = cssv[cond][-1] / float(rho) - w = np.maximum(v - theta, 0) - return w - - -# cf. scikit-learn-contrib/lightning impl/penalty.py -def project_l1_ball(v, z=1): - return np.sign(v) * project_simplex(np.abs(v), z) - - -def prox_linf(v, alpha): - # cf. Proximal Algorithms, Parikh & Boyd, eq. 6.8 - # dual ball B is the L1 ball - - p = project_l1_ball(v / alpha) - return v - alpha * p - - - -def test_prox_special_cases(): - for _ in range(20): - v = rng.randn(10) - alpha = rng.uniform(0.001, 1) - - # l1 proximal operator - z_expected = np.maximum(0, v - alpha) - z_expected -= np.maximum(0, -v - alpha) - z_obtained = prox_owl(v, alpha * np.ones_like(v)) - - assert_array_almost_equal(z_expected, z_obtained) - - # l_inf proximal operator - z_expected = prox_linf(v, alpha) - w = np.zeros_like(v) - w[0] = alpha - z_obtained = prox_owl(v, w) - assert_array_almost_equal(z_expected, z_obtained) From 271989be8efe08db5ac964b69f9cfdaf12eadd6a Mon Sep 17 00:00:00 2001 From: drakeeee Date: Mon, 27 Aug 2018 13:43:33 -0700 Subject: [PATCH 3/3] requirements and imports --- requirements.txt | 4 ++++ setup.py | 3 +++ tests/test_pyowl.py | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e69de29..887b771 100644 --- a/requirements.txt +++ b/requirements.txt @@ -0,0 +1,4 @@ +matplotlib==2.2.3 +numpy==1.15.1 +scikit-learn==0.19.2 +scipy==1.1.0 \ No newline at end of file diff --git a/setup.py b/setup.py index d1058fe..64c1120 100644 --- a/setup.py +++ b/setup.py @@ -8,4 +8,7 @@ url='https://github.com/vene/pyowl', packages=find_packages(), install_requires=[ + "numpy==1.15.1", + "scikit-learn==0.19.2", + "scipy==1.1.0" ]) diff --git a/tests/test_pyowl.py b/tests/test_pyowl.py index 3dec713..6b75456 100644 --- a/tests/test_pyowl.py +++ b/tests/test_pyowl.py @@ -3,7 +3,7 @@ import numpy as np from numpy.testing import assert_array_almost_equal -from pyowl import prox_owl +from pyowl.pyowl import prox_owl rng = np.random.RandomState(0)