diff --git a/template.py b/template.py index e69de29..ec070a4 100644 --- a/template.py +++ b/template.py @@ -0,0 +1,38 @@ +import argparse +import sklearn +import sklearn.datasets +import sklearn.pipeline +import sklearn.model_selection +import sklearn.linear_model + +parser = argparse.ArgumentParser(description='Playground for sklearn models') +parser.add_argument('--seed', type=int, default=42, help='random seed') +parser.add_argument('--test_size', type=float, default=0.2, help='float indicating the ratio of the test examples to the whole dataset') + +def fetch_dataset(args): + return sklearn.datasets.fetch_california_housing() + +def create_model(args): + model = sklearn.pipeline.Pipeline([ + ('linear_regression', sklearn.linear_model.SGDClassifier(verbose=1, random_state=args.seed)) + ]) + + return model + +if __name__ == "__main__": + args = parser.parse_args() + dataset = sklearn.datasets.load_breast_cancer() + train_data, test_data, train_target, test_target = sklearn.model_selection.train_test_split(dataset.data, dataset.target, test_size=args.test_size, random_state=args.seed) + + # create the model + # this is your main playground + model = create_model(args) + + # fit (train) the model on the training data + model.fit(train_data, train_target) + + # predict on the test set + prediction = model.predict(test_data) + accuracy = sklearn.metrics.accuracy_score(test_target, prediction) + print(accuracy) +