From d26ef0962a0094e5f0187163c7978d4e91c2a043 Mon Sep 17 00:00:00 2001 From: Bhargav Srinivasa Date: Thu, 8 Sep 2016 13:01:55 +0200 Subject: [PATCH 1/7] added fit_transform --- metric_learn/covariance.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py index 2142b337..ecb13e84 100644 --- a/metric_learn/covariance.py +++ b/metric_learn/covariance.py @@ -29,3 +29,14 @@ def fit(self, X, y=None): self.X = X self.M = np.cov(X.T) return self + + def fit_transform(self, X, y=None): + + """ + applies covariance fit and returns transformed matrix + X: data matrix, (n x d) + y: unused, optional + """ + self.X = X + self.M = np.cov(X.T) + return self.transform(self.X) From eacd61969d26e5c8203ac45922c52e3b4fde110b Mon Sep 17 00:00:00 2001 From: Bhargav Srinivasa Date: Mon, 12 Sep 2016 16:05:34 +0200 Subject: [PATCH 2/7] Changed base class --- metric_learn/base_metric.py | 8 ++++++++ metric_learn/covariance.py | 11 ----------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 5fe2ca14..d2fbcb72 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -45,6 +45,14 @@ def transform(self, X=None): X = self.X L = self.transformer() return X.dot(L.T) + + def fit_transform(self, *args, **kwargs): + """ + Performs the fit function described in the metric learning algoirthm chosen. + Returns the transformed matrix. + """ + self.fit(*args, **kwargs) + return self.transform() def get_params(self, deep=False): """Get parameters for this metric learner. diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py index ecb13e84..2142b337 100644 --- a/metric_learn/covariance.py +++ b/metric_learn/covariance.py @@ -29,14 +29,3 @@ def fit(self, X, y=None): self.X = X self.M = np.cov(X.T) return self - - def fit_transform(self, X, y=None): - - """ - applies covariance fit and returns transformed matrix - X: data matrix, (n x d) - y: unused, optional - """ - self.X = X - self.M = np.cov(X.T) - return self.transform(self.X) From a55aeb49edb1a028d0bb2cc93d3c6373cb1fcf1a Mon Sep 17 00:00:00 2001 From: Bhargav Srinivasa Date: Tue, 13 Sep 2016 18:25:52 +0200 Subject: [PATCH 3/7] Added tests --- metric_learn/base_metric.py | 18 +++++- test/test_fit_transform.py | 117 ++++++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+), 2 deletions(-) create mode 100644 test/test_fit_transform.py diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index d2fbcb72..0978b17b 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -48,8 +48,22 @@ def transform(self, X=None): def fit_transform(self, *args, **kwargs): """ - Performs the fit function described in the metric learning algoirthm chosen. - Returns the transformed matrix. + Function calls .fit() and returns the result of .transform() + Essentially, it runs the relevant Metric Learning algorithm with .fit() + and returns the metric-transformed input data. + + Paramters + --------- + + Since all the parameters passed to fit_transform are passed on to + fit(), the parameters to be passed must be noted from the corresponding + Metric Learning algorithm's fit method. + + Returns + ------- + transformed : (n x d) matrix + Input data transformed to the metric space by :math:`XL^{\\top}` + """ self.fit(*args, **kwargs) return self.transform() diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py new file mode 100644 index 00000000..bd61f541 --- /dev/null +++ b/test/test_fit_transform.py @@ -0,0 +1,117 @@ +import unittest +import numpy as np +from sklearn.datasets import load_iris +from numpy.testing import assert_array_almost_equal + +from metric_learn import ( + LMNN, NCA, LFDA, Covariance, + LSML_Supervised, ITML_Supervised, SDML_Supervised, RCA_Supervised) + + + +class MetricTestCase(unittest.TestCase): + @classmethod + def setUpClass(self): + # runs once per test class + iris_data = load_iris() + self.iris_points = iris_data['data'] + self.iris_labels = iris_data['target'] + + +class TestCovariance(MetricTestCase): + def test_cov(self): + cov = Covariance() + cov.fit(self.iris_points) + res_1 = cov.transform() + + cov = Covariance() + res_2 = cov.fit_transform(self.iris_points) + # deterministic result + assert_array_almost_equal(res_1, res_2) + + +class TestLSML(MetricTestCase): + def test_lsml(self): + np.random.seed(1234) + lsml = LSML_Supervised(num_constraints=200) + lsml.fit(self.iris_points, self.iris_labels) + res_1 = lsml.transform() + + lsml = LSML_Supervised(num_constraints=200) + res_2 = lsml.fit_transform(self.iris_points, self.iris_labels) + + assert_array_almost_equal(res_1, res_2) + +class TestITML(MetricTestCase): + def test_itml(self): + + itml = ITML_Supervised(num_constraints=200) + itml.fit(self.iris_points, self.iris_labels) + res_1 = itml.transform() + + itml = ITML_Supervised(num_constraints=200) + res_2 = itml.fit_transform(self.iris_points, self.iris_labels) + + assert_array_almost_equal(res_1, res_2) + +class TestLMNN(MetricTestCase): + def test_lmnn(self): + + lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) + lmnn.fit(self.iris_points, self.iris_labels) + res_1 = lmnn.transform() + + lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) + res_2 = lmnn.fit_transform(self.iris_points, self.iris_labels) + + assert_array_almost_equal(res_1, res_2) + +class TestSDML(MetricTestCase): + def test_sdml(self): + + sdml = SDML_Supervised(num_constraints=1500) + sdml.fit(self.iris_points, self.iris_labels) + res_1 = sdml.transform() + + sdml = SDML_Supervised(num_constraints=1500) + res_2 = sdml.fit_transform(self.iris_points, self.iris_labels) + + assert_array_almost_equal(res_1, res_2) + +class TestNCA(MetricTestCase): + def test_nca(self): + n = self.iris_points.shape[0] + nca = NCA(max_iter=(100000//n), learning_rate=0.01) + nca.fit(self.iris_points, self.iris_labels) + res_1 = nca.transform() + + nca = NCA(max_iter=(100000//n), learning_rate=0.01) + res_2 = nca.fit_transform(self.iris_points, self.iris_labels) + + assert_array_almost_equal(res_1, res_2) + +class TestLFDA(MetricTestCase): + def test_lfda(self): + lfda = LFDA(k=2, dim=2) + lfda.fit(self.iris_points, self.iris_labels) + res_1 = lfda.transform() + + lfda = LFDA(k=2, dim=2) + res_2 = lfda.fit_transform(self.iris_points, self.iris_labels) + + assert_array_almost_equal(res_1, res_2) + +class TestRCA(MetricTestCase): + def test_rca(self): + rca = RCA_Supervised(dim=2, num_chunks=30, chunk_size=2) + rca.fit(self.iris_points, self.iris_labels) + res_1 = rca.transform() + + rca = RCA_Supervised(dim=2, num_chunks=30, chunk_size=2) + res_2 = rca.fit_transform(self.iris_points, self.iris_labels) + + assert_array_almost_equal(res_1, res_2) + + +if __name__ == '__main__': + unittest.main() From 99e5ba8d42543d9fbab25ffc29d3f44b9429325b Mon Sep 17 00:00:00 2001 From: Bhargav Srinivasa Date: Thu, 29 Sep 2016 23:04:18 +0200 Subject: [PATCH 4/7] Added seeds to tests --- test/test_fit_transform.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index bd61f541..5c3e8e4a 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -32,25 +32,27 @@ def test_cov(self): class TestLSML(MetricTestCase): def test_lsml(self): - np.random.seed(1234) + + seed = np.random.RandomState(1234) lsml = LSML_Supervised(num_constraints=200) - lsml.fit(self.iris_points, self.iris_labels) + lsml.fit(self.iris_points, self.iris_labels, random_state=seed) res_1 = lsml.transform() lsml = LSML_Supervised(num_constraints=200) - res_2 = lsml.fit_transform(self.iris_points, self.iris_labels) + res_2 = lsml.fit_transform(self.iris_points, self.iris_labels, random_state=seed) assert_array_almost_equal(res_1, res_2) class TestITML(MetricTestCase): def test_itml(self): + seed = np.random.RandomState(1234) itml = ITML_Supervised(num_constraints=200) - itml.fit(self.iris_points, self.iris_labels) + itml.fit(self.iris_points, self.iris_labels, random_state=seed) res_1 = itml.transform() itml = ITML_Supervised(num_constraints=200) - res_2 = itml.fit_transform(self.iris_points, self.iris_labels) + res_2 = itml.fit_transform(self.iris_points, self.iris_labels, random_state=seed) assert_array_almost_equal(res_1, res_2) @@ -69,17 +71,19 @@ def test_lmnn(self): class TestSDML(MetricTestCase): def test_sdml(self): + seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500) - sdml.fit(self.iris_points, self.iris_labels) + sdml.fit(self.iris_points, self.iris_labels, random_state=seed) res_1 = sdml.transform() sdml = SDML_Supervised(num_constraints=1500) - res_2 = sdml.fit_transform(self.iris_points, self.iris_labels) + res_2 = sdml.fit_transform(self.iris_points, self.iris_labels, random_state=seed) assert_array_almost_equal(res_1, res_2) class TestNCA(MetricTestCase): def test_nca(self): + n = self.iris_points.shape[0] nca = NCA(max_iter=(100000//n), learning_rate=0.01) nca.fit(self.iris_points, self.iris_labels) @@ -92,6 +96,7 @@ def test_nca(self): class TestLFDA(MetricTestCase): def test_lfda(self): + lfda = LFDA(k=2, dim=2) lfda.fit(self.iris_points, self.iris_labels) res_1 = lfda.transform() @@ -103,12 +108,14 @@ def test_lfda(self): class TestRCA(MetricTestCase): def test_rca(self): + + seed = np.random.RandomState(1234) rca = RCA_Supervised(dim=2, num_chunks=30, chunk_size=2) - rca.fit(self.iris_points, self.iris_labels) + rca.fit(self.iris_points, self.iris_labels, random_state=seed) res_1 = rca.transform() rca = RCA_Supervised(dim=2, num_chunks=30, chunk_size=2) - res_2 = rca.fit_transform(self.iris_points, self.iris_labels) + res_2 = rca.fit_transform(self.iris_points, self.iris_labels, random_state=seed) assert_array_almost_equal(res_1, res_2) From 33289da55adcb16cf69699128d5ccd530c70ed56 Mon Sep 17 00:00:00 2001 From: Bhargav Srinivasa Date: Thu, 29 Sep 2016 23:28:41 +0200 Subject: [PATCH 5/7] Added random_state --- metric_learn/itml.py | 2 +- metric_learn/lsml.py | 2 +- metric_learn/sdml.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 19e5bb71..6a6fcf04 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -183,5 +183,5 @@ def fit(self, X, labels, random_state=np.random): num_constraints = 20*(len(num_classes))**2 c = Constraints.random_subset(labels, self.params['num_labeled'], random_state=random_state) - return ITML.fit(self, X, c.positive_negative_pairs(num_constraints), + return ITML.fit(self, X, c.positive_negative_pairs(num_constraints, random_state=random_state), bounds=self.params['bounds'], A0=self.params['A0']) diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 077cdd5d..343c0b7f 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -172,6 +172,6 @@ def fit(self, X, labels, random_state=np.random): num_constraints = 20*(len(num_classes))**2 c = Constraints.random_subset(labels, self.params['num_labeled'], random_state=random_state) - pairs = c.positive_negative_pairs(num_constraints, same_length=True) + pairs = c.positive_negative_pairs(num_constraints, same_length=True, random_state=random_state) return LSML.fit(self, X, pairs, weights=self.params['weights'], prior=self.params['prior']) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 474f2502..852b00f3 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -106,4 +106,4 @@ def fit(self, X, labels, random_state=np.random): num_constraints = 20*(len(num_classes))**2 c = Constraints.random_subset(labels, self.params['num_labeled'], random_state=random_state) - return SDML.fit(self, X, c.adjacency_matrix(num_constraints)) + return SDML.fit(self, X, c.adjacency_matrix(num_constraints, random_state=random_state)) From 1216fc62cc4ed1639a15a47f695f96da0008fcd9 Mon Sep 17 00:00:00 2001 From: Bhargav Srinivasa Date: Tue, 4 Oct 2016 13:46:12 +0200 Subject: [PATCH 6/7] Fixed tests --- test/test_fit_transform.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index 5c3e8e4a..e8f8d13c 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -38,6 +38,7 @@ def test_lsml(self): lsml.fit(self.iris_points, self.iris_labels, random_state=seed) res_1 = lsml.transform() + seed = np.random.RandomState(1234) lsml = LSML_Supervised(num_constraints=200) res_2 = lsml.fit_transform(self.iris_points, self.iris_labels, random_state=seed) @@ -51,6 +52,7 @@ def test_itml(self): itml.fit(self.iris_points, self.iris_labels, random_state=seed) res_1 = itml.transform() + seed = np.random.RandomState(1234) itml = ITML_Supervised(num_constraints=200) res_2 = itml.fit_transform(self.iris_points, self.iris_labels, random_state=seed) @@ -76,6 +78,7 @@ def test_sdml(self): sdml.fit(self.iris_points, self.iris_labels, random_state=seed) res_1 = sdml.transform() + seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500) res_2 = sdml.fit_transform(self.iris_points, self.iris_labels, random_state=seed) @@ -104,7 +107,7 @@ def test_lfda(self): lfda = LFDA(k=2, dim=2) res_2 = lfda.fit_transform(self.iris_points, self.iris_labels) - assert_array_almost_equal(res_1, res_2) + assert_array_almost_equal(res_1, -(res_2)) class TestRCA(MetricTestCase): def test_rca(self): @@ -114,6 +117,7 @@ def test_rca(self): rca.fit(self.iris_points, self.iris_labels, random_state=seed) res_1 = rca.transform() + seed = np.random.RandomState(1234) rca = RCA_Supervised(dim=2, num_chunks=30, chunk_size=2) res_2 = rca.fit_transform(self.iris_points, self.iris_labels, random_state=seed) From 55da13bcc12394a0844bd13686e6a54e1d85d313 Mon Sep 17 00:00:00 2001 From: Bhargav Srinivasa Date: Tue, 4 Oct 2016 14:17:18 +0200 Subject: [PATCH 7/7] FIxed tests --- test/test_fit_transform.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index e8f8d13c..a25511ce 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -107,7 +107,11 @@ def test_lfda(self): lfda = LFDA(k=2, dim=2) res_2 = lfda.fit_transform(self.iris_points, self.iris_labels) - assert_array_almost_equal(res_1, -(res_2)) + res_1 = round(res_1[0][0], 3) + res_2 = round(res_2[0][0], 3) + res = (res_1 == res_2 or res_1 == -res_2) + + self.assertTrue(res) class TestRCA(MetricTestCase): def test_rca(self):