Skip to content

Commit c44943c

Browse files
committed
Update xgboost configs;
Update conda envs; Fix for data loaders
1 parent 780a141 commit c44943c

File tree

7 files changed

+68
-8
lines changed

7 files changed

+68
-8
lines changed

configs/common/xgboost.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"algorithm": {
66
"device": "cpu",
77
"estimator_params": { "tree_method": "hist" },
8-
"enable_modelbuilders": [true, false]
8+
"enable_modelbuilders": false
99
}
1010
},
1111
{

configs/regular/xgboost_binary.json

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@
6565
"data": {
6666
"dataset": "epsilon",
6767
"split_kwargs": {
68-
"train_size": 4000,
69-
"test_size": 80000
68+
"train_size": 10000,
69+
"test_size": 100000
7070
}
7171
},
7272
"algorithm": {
@@ -77,12 +77,34 @@
7777
"n_estimators": 200
7878
}
7979
}
80+
},
81+
{
82+
"data": {
83+
"dataset": "gisette",
84+
"split_kwargs": {
85+
"train_size": 2000,
86+
"test_size": 5000
87+
}
88+
},
89+
"algorithm": {
90+
"estimator_params": {
91+
"learning_rate": 0.15,
92+
"max_leaves": 256,
93+
"colsample_bytree": 0.1,
94+
"colsample_bynode": 0.1,
95+
"n_estimators": 100
96+
}
97+
}
8098
}
8199
]
82100
},
83101
"TEMPLATES": {
84102
"binary classification": {
85-
"SETS": ["xgboost binary classification", "xgboost implementations", "binary classification data"]
103+
"SETS": [
104+
"xgboost binary classification",
105+
"xgboost implementations",
106+
"binary classification data"
107+
]
86108
}
87109
}
88110
}

configs/regular/xgboost_multi.json

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,11 @@
6363
},
6464
"TEMPLATES": {
6565
"multi classification": {
66-
"SETS": ["xgboost multiclassification", "xgboost implementations", "multiclassification data"]
66+
"SETS": [
67+
"xgboost multiclassification",
68+
"xgboost implementations",
69+
"multiclassification data"
70+
]
6771
}
6872
}
6973
}

configs/regular/xgboost_regression.json

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,34 @@
7171
"n_estimators": 500
7272
}
7373
}
74+
},
75+
{
76+
"data": {
77+
"dataset": "gisette",
78+
"split_kwargs": {
79+
"train_size": 2000,
80+
"test_size": 5000
81+
}
82+
},
83+
"algorithm": {
84+
"estimator_params": {
85+
"learning_rate": 0.15,
86+
"max_leaves": 256,
87+
"colsample_bytree": 0.1,
88+
"colsample_bynode": 0.1,
89+
"n_estimators": 100
90+
}
91+
}
7492
}
7593
]
7694
},
7795
"TEMPLATES": {
7896
"regression": {
79-
"SETS": ["xgboost regression", "xgboost implementations", "regression data"]
97+
"SETS": [
98+
"xgboost regression",
99+
"xgboost implementations",
100+
"regression data"
101+
]
80102
}
81103
}
82104
}

envs/conda-env-rapids.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,16 @@ dependencies:
66
- python=3.10
77
- rapids
88
- cudatoolkit
9+
# blas and openmp
10+
- libblas=*=*mkl
11+
- _openmp_mutex=*=*llvm
912
# sklbench dependencies
13+
- scikit-learn
1014
- pandas
1115
- tabulate
1216
- fastparquet
17+
- h5py
18+
- kaggle
1319
- openpyxl
1420
- tqdm
1521
- psutil

envs/conda-env-sklearn.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,16 @@ dependencies:
1111
- faiss-cpu
1212
- intel::scikit-learn-intelex
1313
- intel::daal4py
14+
# blas and openmp
15+
- libblas=*=*mkl
16+
- _openmp_mutex=*=*llvm
1417
# sklbench dependencies
18+
- scikit-learn
1519
- pandas
1620
- tabulate
1721
- fastparquet
22+
- h5py
23+
- kaggle
1824
- openpyxl
1925
- tqdm
2026
- psutil

sklbench/datasets/loaders.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ def load_epsilon(
361361
x_test, y_test = load_svmlight_file(local_url_test, dtype=np.float32)
362362

363363
x = sparse.vstack([x_train, x_test])
364-
y = np.vstack([y_train, y_test])
364+
y = np.hstack([y_train, y_test])
365365
y[y <= 0] = 0
366366

367367
data_desc = {
@@ -423,7 +423,7 @@ def convert_y(y, n_samples):
423423
y_test = convert_y(data["y_test"], test_size)
424424

425425
x = np.vstack([x_train, x_test])
426-
y = np.vstack([y_train, y_test])
426+
y = np.hstack([y_train, y_test])
427427

428428
data_desc = {
429429
"n_classes": 2,

0 commit comments

Comments
 (0)