From 0208d267a1f947ff395143217273704d5d84624e Mon Sep 17 00:00:00 2001
From: Titouan Vayer <titouan.vayer@gmail.com>
Date: Wed, 30 Oct 2024 10:39:58 +0100
Subject: [PATCH 1/4] Create EMD_wrapper_sparse.cpp

---
 ot/lp/EMD_wrapper_sparse.cpp | 93 ++++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)
 create mode 100644 ot/lp/EMD_wrapper_sparse.cpp
diff --git a/ot/lp/EMD_wrapper_sparse.cpp b/ot/lp/EMD_wrapper_sparse.cpp
new file mode 100644
index 000000000..609e476cb
--- /dev/null
+++ b/ot/lp/EMD_wrapper_sparse.cpp
@@ -0,0 +1,93 @@
+/* This file is a c++ wrapper function for computing the transportation cost
+ * between two vectors given a cost matrix.
+ *
+ * It was written by Antoine Rolet (2014) and mainly consists of a wrapper
+ * of the code written by Nicolas Bonneel available on this page
+ *          http://people.seas.harvard.edu/~nbonneel/FastTransport/
+ *
+ * It was then modified to make it more amenable to python inline calling
+ *
+ * Please give relevant credit to the original author (Nicolas Bonneel) if
+ * you use this code for a publication.
+ *
+ */
+
+#include "EMD.h"
+
+int EMD_wrap_all_sparse(int n1, int n2, double *X, double *Y,
+                        long *iD, long *jD, double *D, long nD,
+                        long *iG, long *jG, double *G, long *nG,
+                        double *alpha, double *beta, double *cost, int maxIter)
+{
+    // beware M and C anre strored in row major C style!!!
+
+    // Get the number of non zero coordinates for r and c and vectors
+    int n, m, cur;
+
+    typedef FullBipartiteDigraph Digraph;
+    DIGRAPH_TYPEDEFS(FullBipartiteDigraph);
+
+    n = n1;
+    m = n2;
+
+    // Define the graph
+
+    std::vector<double> weights2(m);
+    Digraph di(n, m);
+    NetworkSimplexSimple<Digraph, double, double, node_id_type> net(di, true, n + m, n * m, maxIter);
+
+    // Set supply and demand, don't account for 0 values (faster)
+
+    // Demand is actually negative supply...
+
+    cur = 0;
+    for (int i = 0; i < n2; i++)
+    {
+        double val = *(Y + i);
+        if (val > 0)
+        {
+            weights2[cur] = -val;
+        }
+    }
+
+    // Define the graph
+    net.supplyMap(X, n, &weights2[0], m);
+
+    // Set the cost of each edge
+    for (int k = 0; k < nD; k++)
+    {
+        int i = iD[k];
+        int j = jD[k];
+        net.setCost(di.arcFromId(i * m + j), D[k]);
+    }
+
+    // Solve the problem with the network simplex algorithm
+
+    int ret = net.run();
+    if (ret == (int)net.OPTIMAL || ret == (int)net.MAX_ITER_REACHED)
+    {
+        *cost = net.totalCost();
+        Arc a;
+        di.first(a);
+        cur = 0;
+        for (; a != INVALID; di.next(a))
+        {
+            int i = di.source(a);
+            int j = di.target(a);
+            double flow = net.flow(a);
+            if (flow > 0)
+            {
+
+                *(G + cur) = flow;
+                *(iG + cur) = i;
+                *(jG + cur) = j - n;
+                *(alpha + i) = -net.potential(i);
+                *(beta + j - n) = net.potential(j);
+                cur++;
+            }
+        }
+        *nG = cur; // nb of value +1 for numpy indexing
+    }
+
+    return ret;
+}
\ No newline at end of file

From d657758a7a68b18d76d3a04bc76c44010daeb111 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Wed, 30 Oct 2024 11:29:04 +0100
Subject: [PATCH 2/4] firts shot that compiles

---
 ot/lp/EMD.h                  |  1 +
 ot/lp/EMD_wrapper.cpp        | 80 +++++++++++++++++++++++++++++++
 ot/lp/EMD_wrapper_sparse.cpp | 93 ------------------------------------
 ot/lp/emd_wrap.pyx           | 79 +++++++++++++++++++++++++++++-
 4 files changed, 159 insertions(+), 94 deletions(-)
 delete mode 100644 ot/lp/EMD_wrapper_sparse.cpp

diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h
index b56f0601b..2c8160f38 100644
--- a/ot/lp/EMD.h
+++ b/ot/lp/EMD.h
@@ -31,6 +31,7 @@ enum ProblemType {
 
 int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, uint64_t maxIter);
 int EMD_wrap_omp(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, uint64_t maxIter, int numThreads);
+int EMD_wrap_sparse(int n1, int n2, double *X, double *Y, uint32_t *iD, uint32_t *jD, double *D, uint64_t nD, uint32_t *iG, uint32_t *jG, double *G, uint64_t *nG, double *alpha, double *beta, double *cost, uint64_t maxIter);
 
 
 
diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp
index 4aa5a6e72..4911ebe2a 100644
--- a/ot/lp/EMD_wrapper.cpp
+++ b/ot/lp/EMD_wrapper.cpp
@@ -216,3 +216,83 @@ int EMD_wrap_omp(int n1, int n2, double *X, double *Y, double *D, double *G,
 
     return ret;
 }
+
+
+
+int EMD_wrap_sparse(int n1, int n2, double *X, double *Y,
+                        uint32_t *iD, uint32_t *jD, double *D, uint64_t nD,
+                        uint32_t *iG, uint32_t *jG, double *G, uint64_t *nG,
+                        double *alpha, double *beta, double *cost, uint64_t maxIter)
+    {
+    // beware M and C are stored in row major C style!!!
+
+    using namespace lemon;
+    uint64_t n, m, cur;
+
+    typedef FullBipartiteDigraph Digraph;
+    DIGRAPH_TYPEDEFS(Digraph);
+
+    n = n1;
+    m = n2;
+
+
+    std::vector<double> weights2(m);
+    Digraph di(n, m);
+    NetworkSimplexSimple<Digraph, double, double, node_id_type> net(di, true, n + m, nD, maxIter);
+
+    // Set supply and demand, don't account for 0 values (faster)
+
+    // Demand is actually negative supply...
+
+    cur = 0;
+    for (uint64_t i = 0; i < n2; i++)
+    {
+        double val = *(Y + i);
+        if (val > 0)
+        {
+            weights2[cur] = -val;
+        }
+    }
+
+    // Define the graph
+    net.supplyMap(X, n, &weights2[0], m);
+
+    // Set the cost of each edge
+    for (uint64_t k = 0; k < nD; k++)
+    {
+        int i = iD[k];
+        int j = jD[k];
+        net.setCost(di.arcFromId(i * m + j), D[k]);
+    }
+
+    // Solve the problem with the network simplex algorithm
+
+    int ret = net.run();
+    if (ret == (int)net.OPTIMAL || ret == (int)net.MAX_ITER_REACHED)
+    {
+        *cost = net.totalCost();
+        Arc a;
+        di.first(a);
+        cur = 0;
+        for (; a != INVALID; di.next(a))
+        {
+            int i = di.source(a);
+            int j = di.target(a);
+            double flow = net.flow(a);
+            if (flow > 0)
+            {
+
+                *(G + cur) = flow;
+                *(iG + cur) = i;
+                *(jG + cur) = j - n;
+                *(alpha + i) = -net.potential(i);
+                *(beta + j - n) = net.potential(j);
+                cur++;
+            }
+        }
+        *nG = cur; // nb of value +1 for numpy indexing
+    }
+
+    return ret;
+
+}
diff --git a/ot/lp/EMD_wrapper_sparse.cpp b/ot/lp/EMD_wrapper_sparse.cpp
deleted file mode 100644
index 609e476cb..000000000
--- a/ot/lp/EMD_wrapper_sparse.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-/* This file is a c++ wrapper function for computing the transportation cost
- * between two vectors given a cost matrix.
- *
- * It was written by Antoine Rolet (2014) and mainly consists of a wrapper
- * of the code written by Nicolas Bonneel available on this page
- *          http://people.seas.harvard.edu/~nbonneel/FastTransport/
- *
- * It was then modified to make it more amenable to python inline calling
- *
- * Please give relevant credit to the original author (Nicolas Bonneel) if
- * you use this code for a publication.
- *
- */
-
-#include "EMD.h"
-
-int EMD_wrap_all_sparse(int n1, int n2, double *X, double *Y,
-                        long *iD, long *jD, double *D, long nD,
-                        long *iG, long *jG, double *G, long *nG,
-                        double *alpha, double *beta, double *cost, int maxIter)
-{
-    // beware M and C anre strored in row major C style!!!
-
-    // Get the number of non zero coordinates for r and c and vectors
-    int n, m, cur;
-
-    typedef FullBipartiteDigraph Digraph;
-    DIGRAPH_TYPEDEFS(FullBipartiteDigraph);
-
-    n = n1;
-    m = n2;
-
-    // Define the graph
-
-    std::vector<double> weights2(m);
-    Digraph di(n, m);
-    NetworkSimplexSimple<Digraph, double, double, node_id_type> net(di, true, n + m, n * m, maxIter);
-
-    // Set supply and demand, don't account for 0 values (faster)
-
-    // Demand is actually negative supply...
-
-    cur = 0;
-    for (int i = 0; i < n2; i++)
-    {
-        double val = *(Y + i);
-        if (val > 0)
-        {
-            weights2[cur] = -val;
-        }
-    }
-
-    // Define the graph
-    net.supplyMap(X, n, &weights2[0], m);
-
-    // Set the cost of each edge
-    for (int k = 0; k < nD; k++)
-    {
-        int i = iD[k];
-        int j = jD[k];
-        net.setCost(di.arcFromId(i * m + j), D[k]);
-    }
-
-    // Solve the problem with the network simplex algorithm
-
-    int ret = net.run();
-    if (ret == (int)net.OPTIMAL || ret == (int)net.MAX_ITER_REACHED)
-    {
-        *cost = net.totalCost();
-        Arc a;
-        di.first(a);
-        cur = 0;
-        for (; a != INVALID; di.next(a))
-        {
-            int i = di.source(a);
-            int j = di.target(a);
-            double flow = net.flow(a);
-            if (flow > 0)
-            {
-
-                *(G + cur) = flow;
-                *(iG + cur) = i;
-                *(jG + cur) = j - n;
-                *(alpha + i) = -net.potential(i);
-                *(beta + j - n) = net.potential(j);
-                cur++;
-            }
-        }
-        *nG = cur; // nb of value +1 for numpy indexing
-    }
-
-    return ret;
-}
\ No newline at end of file
diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx
index 53df54fc3..636401de1 100644
--- a/ot/lp/emd_wrap.pyx
+++ b/ot/lp/emd_wrap.pyx
@@ -14,7 +14,7 @@ from ..utils import dist
 
 cimport cython
 cimport libc.math as math
-from libc.stdint cimport uint64_t
+from libc.stdint cimport uint64_t, uint32_t
 
 import warnings
 
@@ -23,6 +23,7 @@ cdef extern from "EMD.h":
     int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, uint64_t maxIter) nogil
     int EMD_wrap_omp(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, uint64_t maxIter, int numThreads) nogil
     cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED, MAX_ITER_REACHED
+    int EMD_wrap_sparse(int n1, int n2, double *X, double *Y, uint32_t *iD, uint32_t *jD, double *D, uint64_t nD, uint32_t *iG, uint32_t *jG, double *G, uint64_t *nG, double *alpha, double *beta, double *cost, uint64_t maxIter) nogil
 
 
 def check_result(result_code):
@@ -117,6 +118,82 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
             result_code = EMD_wrap_omp(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter, numThreads)
     return G, cost, alpha, beta, result_code
 
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def emd_c_sparse(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"]  b, np.ndarray[uint32_t, ndim=1, mode="c"] iM, np.ndarray[uint32_t, ndim=1, mode="c"] jM,np.ndarray[double, ndim=1, mode="c"] M, uint64_t max_iter):
+    """
+        Solves the Earth Movers distance problem and returns the optimal transport matrix
+
+        gamm=emd(a,b,M)
+
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,M>_F
+
+        s.t. \gamma 1 = a
+
+             \gamma^T 1= b
+
+             \gamma\geq 0
+    where :
+
+    - M is the metric cost matrix
+    - a and b are the sample weights
+
+    .. warning::
+        Note that the M matrix needs to be a C-order :py.cls:`numpy.array`
+
+    .. warning::
+        The C++ solver discards all samples in the distributions with 
+        zeros weights. This means that while the primal variable (transport 
+        matrix) is exact, the solver only returns feasible dual potentials
+        on the samples with weights different from zero. 
+
+    Parameters
+    ----------
+    a : (ns,) numpy.ndarray, float64
+        source histogram
+    b : (nt,) numpy.ndarray, float64
+        target histogram
+    iM : (n,) numpy.ndarray, uint32
+        row indices of the non zero elements of the loss matrix (COO)
+    jM : (n,) numpy.ndarray, uint32
+        column indices of the non zero elements of the loss matrix (COO)
+    M : (n,) numpy.ndarray, float64
+        loss matrix (COO) 
+    max_iter : uint64_t
+        The maximum number of iterations before stopping the optimization
+        algorithm if it has not converged.
+
+    Returns
+    -------
+    gamma: (ns x nt) numpy.ndarray
+        Optimal transportation matrix for the given parameters
+
+    """
+    cdef int n1= a.shape[0]
+    cdef int n2= b.shape[0]
+    cdef int nmax=n1+n2-1
+    cdef int result_code = 0
+    cdef uint64_t nG=0
+
+    cdef double cost=0
+    cdef np.ndarray[double, ndim=1, mode="c"] alpha=np.zeros(n1)
+    cdef np.ndarray[double, ndim=1, mode="c"] beta=np.zeros(n2)
+
+    cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros(nmax)
+    cdef np.ndarray[uint32_t, ndim=1, mode="c"] iG=np.zeros(nmax, dtype=np.uint32)
+    cdef np.ndarray[uint32_t, ndim=1, mode="c"] jG=np.zeros(nmax, dtype=np.uint32)
+
+    with nogil:
+        result_code = EMD_wrap_sparse(n1, n2, <double*> a.data, <double*> b.data,
+                                        <uint32_t*> iM.data, <uint32_t*> jM.data, <double*> M.data, iM.shape[0],
+                                        <uint32_t*> iG.data, <uint32_t*> jG.data, <double*> G.data, &nG,
+                                        <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
+    return G[:nG], iG[:nG], jG[:nG], cost, alpha, beta, result_code
+
+
+
+
 
 @cython.boundscheck(False)
 @cython.wraparound(False)

From 0b19239fc67abd767ce7b83374bd2d2bb3af1b48 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Wed, 30 Oct 2024 15:15:32 +0100
Subject: [PATCH 3/4] debug call

---
 ot/lp/emd_wrap.pyx | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx
index 636401de1..4933b5b15 100644
--- a/ot/lp/emd_wrap.pyx
+++ b/ot/lp/emd_wrap.pyx
@@ -120,7 +120,7 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def emd_c_sparse(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"]  b, np.ndarray[uint32_t, ndim=1, mode="c"] iM, np.ndarray[uint32_t, ndim=1, mode="c"] jM,np.ndarray[double, ndim=1, mode="c"] M, uint64_t max_iter):
+def emd_c_sparse(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"]  b, np.ndarray[int, ndim=1, mode="c"] iM, np.ndarray[int, ndim=1, mode="c"] jM, np.ndarray[double, ndim=1, mode="c"] M, uint64_t max_iter):
     """
         Solves the Earth Movers distance problem and returns the optimal transport matrix
 
@@ -175,12 +175,13 @@ def emd_c_sparse(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim
     cdef int nmax=n1+n2-1
     cdef int result_code = 0
     cdef uint64_t nG=0
+    cdef uint64_t maxiter = max_iter
 
     cdef double cost=0
     cdef np.ndarray[double, ndim=1, mode="c"] alpha=np.zeros(n1)
     cdef np.ndarray[double, ndim=1, mode="c"] beta=np.zeros(n2)
 
-    cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros(nmax)
+    cdef np.ndarray[double, ndim=1, mode="c"] G=np.zeros(nmax)
     cdef np.ndarray[uint32_t, ndim=1, mode="c"] iG=np.zeros(nmax, dtype=np.uint32)
     cdef np.ndarray[uint32_t, ndim=1, mode="c"] jG=np.zeros(nmax, dtype=np.uint32)
 
@@ -188,7 +189,7 @@ def emd_c_sparse(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim
         result_code = EMD_wrap_sparse(n1, n2, <double*> a.data, <double*> b.data,
                                         <uint32_t*> iM.data, <uint32_t*> jM.data, <double*> M.data, iM.shape[0],
                                         <uint32_t*> iG.data, <uint32_t*> jG.data, <double*> G.data, &nG,
-                                        <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
+                                        <double*> alpha.data, <double*> beta.data, <double*> &cost, maxiter)
     return G[:nG], iG[:nG], jG[:nG], cost, alpha, beta, result_code
 
 

From 182660dd3174a4c32da1df69cd82e5c75b271c15 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Flamary?= <remi.flamary@gmail.com>
Date: Wed, 30 Oct 2024 15:45:05 +0100
Subject: [PATCH 4/4] it works !

---
 ot/lp/EMD_wrapper.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp
index 4911ebe2a..3634fa04c 100644
--- a/ot/lp/EMD_wrapper.cpp
+++ b/ot/lp/EMD_wrapper.cpp
@@ -238,19 +238,18 @@ int EMD_wrap_sparse(int n1, int n2, double *X, double *Y,
 
     std::vector<double> weights2(m);
     Digraph di(n, m);
-    NetworkSimplexSimple<Digraph, double, double, node_id_type> net(di, true, n + m, nD, maxIter);
+    NetworkSimplexSimple<Digraph, double, double, node_id_type> net(di, true, n + m, n*m, maxIter);
 
     // Set supply and demand, don't account for 0 values (faster)
 
     // Demand is actually negative supply...
 
-    cur = 0;
     for (uint64_t i = 0; i < n2; i++)
     {
         double val = *(Y + i);
         if (val > 0)
         {
-            weights2[cur] = -val;
+            weights2[i] = -val;
         }
     }