IntelPython · PetrovKP · Apr 17, 2021 · Apr 15, 2021 · Apr 15, 2021 · Apr 15, 2021
diff --git a/README.md b/README.md
@@ -39,25 +39,29 @@ Create a suitable conda environment for each framework to test. Each item in the
 * [**scikit-learn**](sklearn_bench#how-to-create-conda-environment-for-benchmarking)
 
 ```bash
-conda create -n bench -c intel python=3.7 scikit-learn scikit-learn-intelex pandas
+pip install -r sklearn_bench/requirements.txt
+# or
+conda install -c conda-forge scikit-learn scikit-learn-intelex pandas
 ```
 
 * [**daal4py**](daal4py_bench#how-to-create-conda-environment-for-benchmarking)
 
 ```bash
-conda create -n bench -c intel python=3.7 scikit-learn daal4py pandas
+conda install -c conda-forge scikit-learn daal4py pandas
 ```
 
 * [**cuml**](cuml_bench#how-to-create-conda-environment-for-benchmarking)
 
 ```bash
-conda create -n bench -c rapidsai -c conda-forge python=3.7 cuml pandas cudf
+conda install -c rapidsai -c conda-forge cuml pandas cudf
 ```
 
 * [**xgboost**](xgboost_bench#how-to-create-conda-environment-for-benchmarking)
 
 ```bash
-conda create -n bench -c conda-forge python=3.7 xgboost pandas
+pip install -r xgboost_bench/requirements.txt
+# or
+conda install -c conda-forge xgboost pandas
 ```
 
 ## Running Python benchmarks with runner script
@@ -109,7 +113,7 @@ The configuration of benchmarks allows you to select the frameworks to run, sele
 
 ## Intel(R) Extension for Scikit-learn support
 
-When you run scikit-learn benchmarks on CPU, [Intel(R) Extension for Scikit-learn](https://github.com/intel/scikit-learn-intelex) is used by default. Use the ``--no-intel-optimized`` option to run the benchmarks without the extension. 
+When you run scikit-learn benchmarks on CPU, [Intel(R) Extension for Scikit-learn](https://github.com/intel/scikit-learn-intelex) is used by default. Use the ``--no-intel-optimized`` option to run the benchmarks without the extension.
 
 The following benchmarks have a GPU support:
 * dbscan

diff --git a/bench.py b/bench.py
@@ -340,6 +340,13 @@ def accuracy_score(y, yp):
     return columnwise_score(y, yp, lambda y1, y2: np.mean(y1 == y2))
 
 
+def log_loss(y, yp):
+    from sklearn.metrics import log_loss as sklearn_log_loss
+    y = convert_to_numpy(y)
+    yp = convert_to_numpy(yp)
+    return sklearn_log_loss(y, yp)
+
+
 def rmse_score(y, yp):
     return columnwise_score(
         y, yp, lambda y1, y2: float(np.sqrt(np.mean((y1 - y2)**2))))

diff --git a/configs/svm/svc_proba_cuml.json b/configs/svm/svc_proba_cuml.json
@@ -0,0 +1,222 @@
+{
+    "common": {
+        "lib": ["cuml"],
+        "data-format": ["cudf"],
+        "data-order": ["F"],
+        "dtype": ["float64"],
+        "max-cache-size": [2],
+        "probability": [""]
+    },
+    "cases": [
+        {
+            "algorithm": "svm",
+            "dataset": [
+                {
+                    "source": "csv",
+                    "name": "ijcnn",
+                    "training":
+                    {
+                        "x": "data/ijcnn_x_train.csv",
+                        "y": "data/ijcnn_y_train.csv"
+                    },
+                    "testing":
+                    {
+                        "x": "data/ijcnn_x_test.csv",
+                        "y": "data/ijcnn_y_test.csv"
+                    }
+                }
+            ],
+            "C": [1000.0],
+            "kernel": ["linear"]
+        },
+        {
+            "algorithm": "svm",
+            "dataset": [
+                {
+                    "source": "csv",
+                    "name": "a9a",
+                    "training":
+                    {
+                        "x": "data/a9a_x_train.csv",
+                        "y": "data/a9a_y_train.csv"
+                    },
+                    "testing":
+                    {
+                        "x": "data/a9a_x_test.csv",
+                        "y": "data/a9a_y_test.csv"
+                    }
+                }
+            ],
+            "C": [500.0],
+            "kernel": ["rbf"]
+        },
+        {
+            "algorithm": "svm",
+            "dataset": [
+                {
+                    "source": "csv",
+                    "name": "gisette",
+                    "training":
+                    {
+                        "x": "data/gisette_x_train.csv",
+                        "y": "data/gisette_y_train.csv"
+                    },
+                    "testing":
+                    {
+                        "x": "data/gisette_x_test.csv",
+                        "y": "data/gisette_y_test.csv"
+                    }
+                }
+            ],
+            "C": [1.5e-3],
+            "kernel": ["linear"]
+        },
+        {
+            "algorithm": "svm",
+            "dataset": [
+                {
+                    "source": "csv",
+                    "name": "klaverjas",
+                    "training":
+                    {
+                        "x": "data/klaverjas_x_train.csv",
+                        "y": "data/klaverjas_y_train.csv"
+                    },
+                    "testing":
+                    {
+                        "x": "data/klaverjas_x_test.csv",
+                        "y": "data/klaverjas_y_test.csv"
+                    }
+                }
+            ],
+            "C": [1.0],
+            "kernel": ["rbf"]
+        },
+        {
+            "algorithm": "svm",
+            "dataset": [
+                {
+                    "source": "csv",
+                    "name": "connect",
+                    "training":
+                    {
+                        "x": "data/connect_x_train.csv",
+                        "y": "data/connect_y_train.csv"
+                    },
+                    "testing":
+                    {
+                        "x": "data/connect_x_test.csv",
+                        "y": "data/connect_y_test.csv"
+                    }
+                }
+            ],
+            "C": [100.0],
+            "kernel": ["linear"]
+        },
+        {
+            "algorithm": "svm",
+            "dataset": [
+                {
+                    "source": "csv",
+                    "name": "mnist",
+                    "training":
+                    {
+                        "x": "data/mnist_x_train.csv",
+                        "y": "data/mnist_y_train.csv"
+                    },
+                    "testing":
+                    {
+                        "x": "data/mnist_x_test.csv",
+                        "y": "data/mnist_y_test.csv"
+                    }
+                }
+            ],
+            "C": [50.0],
+            "kernel": ["rbf"]
+        },
+        {
+            "algorithm": "svm",
+            "dataset": [
+                {
+                    "source": "csv",
+                    "name": "sensit",
+                    "training":
+                    {
+                        "x": "data/sensit_x_train.csv",
+                        "y": "data/sensit_y_train.csv"
+                    },
+                    "testing":
+                    {
+                        "x": "data/sensit_x_test.csv",
+                        "y": "data/sensit_y_test.csv"
+                    }
+                }
+            ],
+            "C": [500.0],
+            "kernel": ["linear"]
+        },
+        {
+            "algorithm": "svm",
+            "dataset": [
+                {
+                    "source": "csv",
+                    "name": "skin_segmentation",
+                    "training":
+                    {
+                        "x": "data/skin_segmentation_x_train.csv",
+                        "y": "data/skin_segmentation_y_train.csv"
+                    },
+                    "testing":
+                    {
+                        "x": "data/skin_segmentation_x_test.csv",
+                        "y": "data/skin_segmentation_y_test.csv"
+                    }
+                }
+            ],
+            "C": [1.0],
+            "kernel": ["rbf"]
+        },
+        {
+            "algorithm": "svm",
+            "dataset": [
+                {
+                    "source": "csv",
+                    "name": "covertype",
+                    "training":
+                    {
+                        "x": "data/covertype_x_train.csv",
+                        "y": "data/covertype_y_train.csv"
+                    },
+                    "testing":
+                    {
+                        "x": "data/covertype_x_test.csv",
+                        "y": "data/covertype_y_test.csv"
+                    }
+                }
+            ],
+            "C": [100.0],
+            "kernel": ["rbf"]
+        },
+        {
+            "algorithm": "svm",
+            "dataset": [
+                {
+                    "source": "csv",
+                    "name": "codrnanorm",
+                    "training":
+                    {
+                        "x": "data/codrnanorm_x_train.csv",
+                        "y": "data/codrnanorm_y_train.csv"
+                    },
+                    "testing":
+                    {
+                        "x": "data/codrnanorm_x_test.csv",
+                        "y": "data/codrnanorm_y_test.csv"
+                    }
+                }
+            ],
+            "C": [1000.0],
+            "kernel": ["linear"]
+        }
+    ]
+}