Update from_string function. dtype is automatically inferred (fixes #131)

alixdamman · gdementen · commit 78bb1bfabcd1 · 2017-03-07T16:24:41.000+01:00
diff --git a/larray/core.py b/larray/core.py
@@ -112,7 +112,8 @@
 from larray.utils import (table2str, size2str, unique, csv_open, unzip, long,
                           decode, basestring, unicode, bytes, izip, rproduct,
                           ReprString, duplicates, array_lookup2, strip_rows,
-                          skip_comment_cells, find_closing_chr, PY3)
+                          skip_comment_cells, find_closing_chr, StringIO, PY3)
+
 
 def _range_to_slice(seq, length=None):
     """
@@ -9165,7 +9166,7 @@ def from_lists(data, nb_index=None, index_col=None):
     return df_aslarray(df, raw=index_col is None, parse_header=False)
 
 
-def from_string(s, nb_index=None, index_col=None, sep=','):
+def from_string(s, nb_index=None, index_col=None, sep=',', **kwargs):
     """Create an array from a multi-line string.
 
     Parameters
@@ -9179,14 +9180,18 @@ def from_string(s, nb_index=None, index_col=None, sep=','):
         List of columns for the index (ex. [0, 1, 2, 3]). Defaults to None (see nb_index above).
     sep : str
         delimiter used to split each line into cells.
+    \**kwargs
+        See arguments of Pandas read_csv function.
 
     Returns
     -------
     LArray
 
     Examples
     --------
-
+    >>> from_string("sex,M,F\\n,0,1")
+    sex | M | F
+        | 0 | 1
     >>> from_string("nat\\sex,M,F\\nBE,0,1\\nFO,2,3")
     nat\sex | M | F
          BE | 0 | 1
@@ -9200,6 +9205,16 @@ def from_string(s, nb_index=None, index_col=None, sep=','):
     nat\sex | M | F
          BE | 0 | 1
          FO | 2 | 3
+    >>> from_string('''age,nat\\sex, M, F
+    ...                0,  BE,       0, 1
+    ...                0,  FO,       2, 3
+    ...                1,  BE,       4, 5
+    ...                1,  FO,       6, 7''')
+    age | nat\sex | M | F
+      0 |      BE | 0 | 1
+      0 |      FO | 2 | 3
+      1 |      BE | 4 | 5
+      1 |      FO | 6 | 7
 
     Empty lines at the beginning or end are ignored, so one can also format the string like this:
 
@@ -9212,9 +9227,8 @@ def from_string(s, nb_index=None, index_col=None, sep=','):
          BE | 0 | 1
          FO | 2 | 3
     """
-    data = [[cell.strip() for cell in line.split(sep)]
-            for line in s.strip().splitlines()]
-    return from_lists(data, nb_index=nb_index, index_col=index_col)
+
+    return read_csv(StringIO(s), nb_index=nb_index, index_col=index_col, sep=sep, skipinitialspace=True, **kwargs)
 
 
 def read_csv(filepath, nb_index=None, index_col=None, sep=',', headersep=None, na=np.nan,
diff --git a/larray/utils.py b/larray/utils.py
@@ -19,7 +19,7 @@
 
 import numpy as np
 
-if sys.version < '3':
+if sys.version_info[0] < 3:
     basestring = basestring
     bytes = str
     unicode = unicode
@@ -33,6 +33,11 @@
     long = int
     PY3 = True
 
+if PY3:
+    from io import StringIO
+else:
+    from StringIO import StringIO
+
 
 def csv_open(filename, mode='r'):
     assert 'b' not in mode and 't' not in mode