Running ML experiments#

IRIS Flowers Classification#

In this complete example, we:

Load the IRIS dataset in step load
Instantiate, train and predict with a classifier in step train_predict
Determine the accuracy score in step evaluate

Results are averaged on 10 independent runs with param seed=range(10) and multiple classifiers are evaluated thanks to param classifier=[...], for a total of 50 runs.

IRIS Flowers Classification with some statistics

from functools import partial

from sklearn.cluster import KMeans
from sklearn.datasets import load_iris
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils import shuffle

import mltraq


def load(run: mltraq.Run):
    # Load the IRIS dataset, taking care of shuffling the samples.
    # We use run.vars, accessible only within the execution of the runs.
    run.vars.X, run.vars.y = shuffle(
        *load_iris(return_X_y=True), random_state=run.params.seed
    )


def train_predict(run: mltraq.Run):
    # Instantiate and train classifier on 100 samples (50 random samples left for evaluation).
    model = run.params.classifier(random_state=run.params.seed).fit(
        run.vars.X[:100], run.vars.y[:100]
    )

    # Track the classifier name on run.fields, persisted to database.
    run.fields.model_name = model.__class__.__name__

    # Use trained model to make predictions.
    run.vars.y_pred = model.predict(run.vars.X[100:])
    run.vars.y_true = run.vars.y[100:]


def evaluate(run: mltraq.Run):
    # Track accuracy score from previously determined predictions.
    run.fields.accuracy = accuracy_score(run.vars.y_true, run.vars.y_pred)


# Connect to the MLtraq session and create an experiment.
session = mltraq.create_session()
experiment = session.create_experiment()

# Use a parameter grid to define the experiment's runs.
experiment.add_runs(
    classifier=[
        partial(DummyClassifier, strategy="most_frequent"),
        partial(LogisticRegression, max_iter=1000),
        DecisionTreeClassifier,
        RandomForestClassifier,
        partial(KMeans, n_clusters=3, n_init="auto"),
    ],
    seed=range(10),
)

# Execute experiment, running in parallel the step functions on each run.
experiment.execute(steps=[load, train_predict, evaluate])

# Stats on the experiment
print("Experiment:")
print(experiment)
print("\n--")

# A sample run
print("A random run:")
print(experiment.runs.first().fields)
print("\n--")

# Query the results and report the ML models leaderboard.
df_leaderboard = (
    experiment.runs.df()
    .groupby("model_name")
    .mean(numeric_only=True)
    .sort_values(by="accuracy", ascending=False)
)
print("Leaderboard:")
print(df_leaderboard)

Output

Experiment:
Experiment(name="akeut6", runs.count=50, id="d65df69e-1175-44a5-be2f-2232765703b8")

--
A random run:
{'model_name': 'KMeans', 'accuracy': 0.2}

--
Leaderboard:
                        accuracy
model_name                      
LogisticRegression         0.960
RandomForestClassifier     0.952
DecisionTreeClassifier     0.938
KMeans                     0.336
DummyClassifier            0.288

Congratulations!

You can now run ML experiments, persist and share reproducible results with your team.