|
20 | 20 | import numpy as np
|
21 | 21 | from sklearn.metrics import accuracy_score
|
22 | 22 |
|
23 |
| -parser = argparse.ArgumentParser(description='scikit-learn random forest ' |
24 |
| - 'classification benchmark') |
25 | 23 |
|
26 |
| -parser.add_argument('--criterion', type=str, default='gini', |
27 |
| - choices=('gini', 'entropy'), |
28 |
| - help='The function to measure the quality of a split') |
29 |
| -parser.add_argument('--num-trees', type=int, default=100, |
30 |
| - help='Number of trees in the forest') |
31 |
| -parser.add_argument('--max-features', type=bench.float_or_int, default=None, |
32 |
| - help='Upper bound on features used at each split') |
33 |
| -parser.add_argument('--max-depth', type=int, default=None, |
34 |
| - help='Upper bound on depth of constructed trees') |
35 |
| -parser.add_argument('--min-samples-split', type=bench.float_or_int, default=2, |
36 |
| - help='Minimum samples number for node splitting') |
37 |
| -parser.add_argument('--max-leaf-nodes', type=int, default=None, |
38 |
| - help='Maximum leaf nodes per tree') |
39 |
| -parser.add_argument('--min-impurity-decrease', type=float, default=0., |
40 |
| - help='Needed impurity decrease for node splitting') |
41 |
| -parser.add_argument('--no-bootstrap', dest='bootstrap', default=True, |
42 |
| - action='store_false', help="Don't control bootstraping") |
| 24 | +def main(): |
| 25 | + from sklearn.ensemble import RandomForestClassifier |
43 | 26 |
|
44 |
| -params = bench.parse_args(parser) |
| 27 | + # Load and convert data |
| 28 | + X_train, X_test, y_train, y_test = bench.load_data(params) |
45 | 29 |
|
46 |
| -if not params.no_intel_optimized: |
47 |
| - from sklearn.ensemble import RandomForestClassifier |
| 30 | + # Create our random forest classifier |
| 31 | + clf = RandomForestClassifier(criterion=params.criterion, |
| 32 | + n_estimators=params.num_trees, |
| 33 | + max_depth=params.max_depth, |
| 34 | + max_features=params.max_features, |
| 35 | + min_samples_split=params.min_samples_split, |
| 36 | + max_leaf_nodes=params.max_leaf_nodes, |
| 37 | + min_impurity_decrease=params.min_impurity_decrease, |
| 38 | + bootstrap=params.bootstrap, |
| 39 | + random_state=params.seed, |
| 40 | + n_jobs=params.n_jobs) |
| 41 | + |
| 42 | + params.n_classes = len(np.unique(y_train)) |
| 43 | + |
| 44 | + fit_time, _ = bench.measure_function_time(clf.fit, X_train, y_train, params=params) |
| 45 | + y_pred = clf.predict(X_train) |
| 46 | + train_acc = 100 * accuracy_score(y_pred, y_train) |
48 | 47 |
|
49 |
| -# Load and convert data |
50 |
| -X_train, X_test, y_train, y_test = bench.load_data(params) |
| 48 | + predict_time, y_pred = bench.measure_function_time( |
| 49 | + clf.predict, X_test, params=params) |
| 50 | + test_acc = 100 * accuracy_score(y_pred, y_test) |
51 | 51 |
|
52 |
| -# Create our random forest classifier |
53 |
| -clf = RandomForestClassifier(criterion=params.criterion, |
54 |
| - n_estimators=params.num_trees, |
55 |
| - max_depth=params.max_depth, |
56 |
| - max_features=params.max_features, |
57 |
| - min_samples_split=params.min_samples_split, |
58 |
| - max_leaf_nodes=params.max_leaf_nodes, |
59 |
| - min_impurity_decrease=params.min_impurity_decrease, |
60 |
| - bootstrap=params.bootstrap, |
61 |
| - random_state=params.seed, |
62 |
| - n_jobs=params.n_jobs) |
| 52 | + bench.print_output(library='sklearn', algorithm='decision_forest_classification', |
| 53 | + stages=['training', 'prediction'], params=params, |
| 54 | + functions=['df_clsf.fit', 'df_clsf.predict'], |
| 55 | + times=[fit_time, predict_time], accuracy_type='accuracy[%]', |
| 56 | + accuracies=[train_acc, test_acc], data=[X_train, X_test], |
| 57 | + alg_instance=clf) |
63 | 58 |
|
64 |
| -params.n_classes = len(np.unique(y_train)) |
65 | 59 |
|
66 |
| -fit_time, _ = bench.measure_function_time(clf.fit, X_train, y_train, params=params) |
67 |
| -y_pred = clf.predict(X_train) |
68 |
| -train_acc = 100 * accuracy_score(y_pred, y_train) |
| 60 | +if __name__ == "__main__": |
| 61 | + parser = argparse.ArgumentParser(description='scikit-learn random forest ' |
| 62 | + 'classification benchmark') |
69 | 63 |
|
70 |
| -predict_time, y_pred = bench.measure_function_time( |
71 |
| - clf.predict, X_test, params=params) |
72 |
| -test_acc = 100 * accuracy_score(y_pred, y_test) |
| 64 | + parser.add_argument('--criterion', type=str, default='gini', |
| 65 | + choices=('gini', 'entropy'), |
| 66 | + help='The function to measure the quality of a split') |
| 67 | + parser.add_argument('--num-trees', type=int, default=100, |
| 68 | + help='Number of trees in the forest') |
| 69 | + parser.add_argument('--max-features', type=bench.float_or_int, default=None, |
| 70 | + help='Upper bound on features used at each split') |
| 71 | + parser.add_argument('--max-depth', type=int, default=None, |
| 72 | + help='Upper bound on depth of constructed trees') |
| 73 | + parser.add_argument('--min-samples-split', type=bench.float_or_int, default=2, |
| 74 | + help='Minimum samples number for node splitting') |
| 75 | + parser.add_argument('--max-leaf-nodes', type=int, default=None, |
| 76 | + help='Maximum leaf nodes per tree') |
| 77 | + parser.add_argument('--min-impurity-decrease', type=float, default=0., |
| 78 | + help='Needed impurity decrease for node splitting') |
| 79 | + parser.add_argument('--no-bootstrap', dest='bootstrap', default=True, |
| 80 | + action='store_false', help="Don't control bootstraping") |
73 | 81 |
|
74 |
| -bench.print_output(library='sklearn', algorithm='decision_forest_classification', |
75 |
| - stages=['training', 'prediction'], params=params, |
76 |
| - functions=['df_clsf.fit', 'df_clsf.predict'], |
77 |
| - times=[fit_time, predict_time], accuracy_type='accuracy[%]', |
78 |
| - accuracies=[train_acc, test_acc], data=[X_train, X_test], |
79 |
| - alg_instance=clf) |
| 82 | + params = bench.parse_args(parser) |
| 83 | + bench.run_with_context(params, main) |
0 commit comments