import argparse import sklearn import sklearn.datasets import sklearn.pipeline import sklearn.model_selection import sklearn.linear_model parser = argparse.ArgumentParser(description='Playground for sklearn models') parser.add_argument('--seed', type=int, default=42, help='random seed') parser.add_argument('--test_size', type=float, default=0.2, help='float indicating the ratio of the test examples to the whole dataset') def fetch_dataset(args): return sklearn.datasets.fetch_california_housing() def create_model(args): model = sklearn.pipeline.Pipeline([ ('linear_regression', sklearn.linear_model.SGDClassifier(verbose=1, random_state=args.seed)) ]) return model if __name__ == "__main__": args = parser.parse_args() dataset = sklearn.datasets.load_breast_cancer() train_data, test_data, train_target, test_target = sklearn.model_selection.train_test_split(dataset.data, dataset.target, test_size=args.test_size, random_state=args.seed) # create the model # this is your main playground model = create_model(args) # fit (train) the model on the training data model.fit(train_data, train_target) # predict on the test set prediction = model.predict(test_data) accuracy = sklearn.metrics.accuracy_score(test_target, prediction) print(accuracy)