39 lines
1.3 KiB
Python
39 lines
1.3 KiB
Python
import argparse
|
|
import sklearn
|
|
import sklearn.datasets
|
|
import sklearn.pipeline
|
|
import sklearn.model_selection
|
|
import sklearn.linear_model
|
|
|
|
parser = argparse.ArgumentParser(description='Playground for sklearn models')
|
|
parser.add_argument('--seed', type=int, default=42, help='random seed')
|
|
parser.add_argument('--test_size', type=float, default=0.2, help='float indicating the ratio of the test examples to the whole dataset')
|
|
|
|
def fetch_dataset(args):
|
|
return sklearn.datasets.fetch_california_housing()
|
|
|
|
def create_model(args):
|
|
model = sklearn.pipeline.Pipeline([
|
|
('linear_regression', sklearn.linear_model.SGDClassifier(verbose=1, random_state=args.seed))
|
|
])
|
|
|
|
return model
|
|
|
|
if __name__ == "__main__":
|
|
args = parser.parse_args()
|
|
dataset = sklearn.datasets.load_breast_cancer()
|
|
train_data, test_data, train_target, test_target = sklearn.model_selection.train_test_split(dataset.data, dataset.target, test_size=args.test_size, random_state=args.seed)
|
|
|
|
# create the model
|
|
# this is your main playground
|
|
model = create_model(args)
|
|
|
|
# fit (train) the model on the training data
|
|
model.fit(train_data, train_target)
|
|
|
|
# predict on the test set
|
|
prediction = model.predict(test_data)
|
|
accuracy = sklearn.metrics.accuracy_score(test_target, prediction)
|
|
print(accuracy)
|
|
|