Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions .github/workflows/clean_template.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: Benchmark creation

# This step triggers after a user creates a new repository from the template
# It replaces the placeholder with the correct names and
# cleans up template scripts

# This will run every time we create push a commit to `main`
# Reference https://docs.github.com/en/actions/learn-github-actions/events-that-trigger-workflows
on:
workflow_dispatch:
push:
branches:
- main

permissions:
# Need `contents: read` to checkout the repository
# Need `contents: write` to update the step metadata
contents: write

jobs:
clean_template:
name: Clean up template
runs-on: ubuntu-latest
# Only run this action when this repository isn't the template repo
if: ${{ !github.event.repository.is_template}}

steps:
- name: Checkout
uses: actions/checkout@v5

- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: '3.12'
- name: Clean template scripts
run: |
python clean_template.py

# Make a branch, file, commit, and pull request for the learner
- name: Commit clean up on the repository
run: |
echo "Configure git"
git config user.name github-actions[bot]
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"

echo "Commit all changes"
git commit -am "CLN remove template scripts"

echo "Push to main"
git push
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
21 changes: 0 additions & 21 deletions .github/workflows/lint_benchmarks.yml

This file was deleted.

4 changes: 2 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ on:
branches:
- main
schedule:
# Run every day at 7:42am UTC.
- cron: '42 7 * * *'
# Run every 1st of the month at 7:42am UTC.
- cron: '42 7 1 * *'

jobs:
benchopt_dev:
Expand Down
95 changes: 0 additions & 95 deletions .github/workflows/test_benchmarks.yml

This file was deleted.

19 changes: 9 additions & 10 deletions datasets/simulated.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
from benchopt import BaseDataset, safe_import_context
from benchopt import BaseDataset


# Protect the import with `safe_import_context()`. This allows:
# - skipping import to speed up autocompletion in CLI.
# - getting requirements info when all dependencies are not installed.
with safe_import_context() as import_ctx:
from sklearn.datasets import make_classification
from sklearn.datasets import make_classification


# All datasets must be named `Dataset` and inherit from `BaseDataset`
Expand Down Expand Up @@ -34,12 +29,16 @@ def get_data(self):
# to `Objective.set_data`. This defines the benchmark's
# API to pass data. It is customizable for each benchmark.
#
# Data splitting is handled by the `Objective.get_objective` method and `Objective.cv` property
# Data splitting is handled by the `Objective.get_objective` method
# and `Objective.cv` property

# Generate pseudorandom data using `sklearn` for classification.
# Generating synthetic dataset
X, y = make_classification(n_samples=self.n_samples, n_features=self.n_features, n_informative=1,
n_redundant=0, n_clusters_per_class=1, random_state=self.random_state)
X, y = make_classification(
n_samples=self.n_samples, n_features=self.n_features,
n_informative=1, n_redundant=0, n_clusters_per_class=1,
random_state=self.random_state
)

# The dictionary defines the keyword arguments for `Objective.set_data`
return dict(X=X, y=y)
41 changes: 24 additions & 17 deletions objective.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
from benchopt import BaseObjective, safe_import_context
from benchopt import BaseObjective

from sklearn.model_selection import KFold
from sklearn.dummy import DummyClassifier

# Protect the import with `safe_import_context()`. This allows:
# - skipping import to speed up autocompletion in CLI.
# - getting requirements info when all dependencies are not installed.
with safe_import_context() as import_ctx:
import numpy as np
from sklearn.model_selection import KFold
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score

# The benchmark objective must be named `Objective` and
# inherit from `BaseObjective` for `benchopt` to work properly.
Expand Down Expand Up @@ -38,7 +33,12 @@ class Objective(BaseObjective):

# Minimal version of benchopt required to run this benchmark.
# Bump it up if the benchmark depends on a new feature of benchopt.
min_benchopt_version = "1.6"
min_benchopt_version = "1.8"

# Disable performance curves - each solver runs once to completion
# See https://benchopt.github.io/stable/user_guide/performance_curves.html
# for more details.
sampling_strategy = "run_once"

def set_data(self, X, y):
# The keyword arguments of this function are the keys of the dictionary
Expand All @@ -49,7 +49,9 @@ def set_data(self, X, y):
# Specify a cross-validation splitter as the `cv` attribute.
# This will be automatically used in `self.get_split` to split
# the arrays provided.
self.cv = KFold(n_splits=5, shuffle=True, random_state=self.random_state)
self.cv = KFold(
n_splits=5, shuffle=True, random_state=self.random_state
)

# If the cross-validation requires some metadata, it can be
# provided in the `cv_metadata` attribute. This will be passed
Expand All @@ -61,13 +63,16 @@ def evaluate_result(self, model):
# dictionary returned by `Solver.get_result`. This defines the
# benchmark's API to pass the solvers' result. This can be
# customized for each benchmark.
y_pred = model.predict(self.X_test)
accuracy = accuracy_score(self.y_test, y_pred)
#
# Here, the solver returns a trained model,
# with which we can call ``score`` to get the accurcay.
accuracy_train = model.score(self.X_train, self.y_train)
accuracy_test = model.score(self.X_test, self.y_test)

# This method can return many metrics in a dictionary. One of these
# metrics needs to be `value` for convergence detection purposes.
# This method can return many metrics in a dictionary.
return dict(
value=accuracy,
accuracy_test=accuracy_test,
accuracy_train=accuracy_train,
)

def get_one_result(self):
Expand All @@ -84,7 +89,9 @@ def get_objective(self):
# benchmark's API for passing the objective to the solver.
# This can be customized in each benchmark.

self.X_train, self.X_test, self.y_train, self.y_test = self.get_split(self.X, self.y)
self.X_train, self.X_test, self.y_train, self.y_test = self.get_split(
self.X, self.y
)

return dict(
X_train=self.X_train,
Expand Down
23 changes: 9 additions & 14 deletions solvers/svm.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
from benchopt import BaseSolver, safe_import_context
from benchopt import BaseSolver

# Protect the import with `safe_import_context()`. This allows:
# - skipping import to speed up autocompletion in CLI.
# - getting requirements info when all dependencies are not installed.
with safe_import_context() as import_ctx:

# import your reusable functions here
from sklearn.svm import SVC
from sklearn.svm import SVC


# The benchmark solvers must be named `Solver` and
Expand All @@ -29,9 +23,6 @@ class Solver(BaseSolver):
# so no need to add it again.
requirements = []

# Force the solver to run only once if you don't want to record training steps
sampling_strategy = "run_once"

def set_objective(self, X_train, y_train):
# Define the information received by each solver from the objective.
# The arguments of this function are the results of the
Expand All @@ -42,10 +33,14 @@ def set_objective(self, X_train, y_train):
self.clf = SVC(kernel=self.kernel)

def run(self, _):
"""Run the solver.

Parameters
----------
_ : ignored
With sampling_strategy="run_once", this parameter is unused.
"""
# This is the method that is called to fit the model.
# The input param is only defined if you change the sampling strategy
# to value different than "run_once".
# See https://benchopt.github.io/performance_curves.html
self.clf.fit(self.X_train, self.y_train)

def get_result(self):
Expand Down
Loading