From a75db0c8dc6a48883711fd895b1df0648ed0781f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 7 Dec 2019 15:16:17 -0800
Subject: [PATCH 1/5] Make read_axes return instead of alter self

---
 pandas/io/pytables.py | 77 ++++++++++++++++++++++++++-----------------
 1 file changed, 47 insertions(+), 30 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index a95d7f39ab82c..0cb086efee4a2 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -1999,15 +1999,16 @@ def convert(
             kwargs["name"] = _ensure_decoded(self.index_name)
         # making an Index instance could throw a number of different errors
         try:
-            self.values = Index(values, **kwargs)
+            values = Index(values, **kwargs)
         except ValueError:
             # if the output freq is different that what we recorded,
             # it should be None (see also 'doc example part 2')
             if "freq" in kwargs:
                 kwargs["freq"] = None
-            self.values = Index(values, **kwargs)
+            values = Index(values, **kwargs)
 
-        self.values = _set_tz(self.values, self.tz)
+        out = _set_tz(values, self.tz)
+        return out, out
 
     def take_data(self):
         """ return the values & release the memory """
@@ -2195,7 +2196,8 @@ def convert(
 
         _start = start if start is not None else 0
         _stop = min(stop, self.table.nrows) if stop is not None else self.table.nrows
-        self.values = Int64Index(np.arange(_stop - _start))
+        out = Int64Index(np.arange(_stop - _start))
+        return out, out
 
     def get_attr(self):
         pass
@@ -2444,6 +2446,7 @@ def convert(self, values, nan_rep, encoding, errors, start=None, stop=None):
 
         # NB: unlike in the other calls to set_data, self.dtype may not be None here
         self.set_data(values)
+        converted = self.data  # TODO: Setting should not be necessary
 
         # use the meta if needed
         meta = _ensure_decoded(self.meta)
@@ -2456,25 +2459,25 @@ def convert(self, values, nan_rep, encoding, errors, start=None, stop=None):
             if dtype == "datetime64":
 
                 # recreate with tz if indicated
-                self.data = _set_tz(self.data, self.tz, coerce=True)
+                converted = _set_tz(converted, self.tz, coerce=True)
 
             elif dtype == "timedelta64":
-                self.data = np.asarray(self.data, dtype="m8[ns]")
+                converted = np.asarray(converted, dtype="m8[ns]")
             elif dtype == "date":
                 try:
-                    self.data = np.asarray(
-                        [date.fromordinal(v) for v in self.data], dtype=object
+                    converted = np.asarray(
+                        [date.fromordinal(v) for v in converted], dtype=object
                     )
                 except ValueError:
-                    self.data = np.asarray(
-                        [date.fromtimestamp(v) for v in self.data], dtype=object
+                    converted = np.asarray(
+                        [date.fromtimestamp(v) for v in converted], dtype=object
                     )
 
             elif meta == "category":
 
                 # we have a categorical
                 categories = self.metadata
-                codes = self.data.ravel()
+                codes = converted.ravel()
 
                 # if we have stored a NaN in the categories
                 # then strip it; in theory we could have BOTH
@@ -2491,23 +2494,25 @@ def convert(self, values, nan_rep, encoding, errors, start=None, stop=None):
                         categories = categories[~mask]
                         codes[codes != -1] -= mask.astype(int).cumsum().values
 
-                self.data = Categorical.from_codes(
+                converted = Categorical.from_codes(
                     codes, categories=categories, ordered=self.ordered
                 )
 
             else:
 
                 try:
-                    self.data = self.data.astype(dtype, copy=False)
+                    converted = converted.astype(dtype, copy=False)
                 except TypeError:
-                    self.data = self.data.astype("O", copy=False)
+                    converted = converted.astype("O", copy=False)
 
         # convert nans / decode
         if _ensure_decoded(self.kind) == "string":
-            self.data = _unconvert_string_array(
-                self.data, nan_rep=nan_rep, encoding=encoding, errors=errors
+            converted = _unconvert_string_array(
+                converted, nan_rep=nan_rep, encoding=encoding, errors=errors
             )
 
+        return self.values, converted
+
     def get_attr(self):
         """ get the data for this column """
         self.values = getattr(self.attrs, self.kind_attr, None)
@@ -3608,7 +3613,7 @@ def create_index(self, columns=None, optlevel=None, kind: Optional[str] = None):
                         )
                     v.create_index(**kw)
 
-    def read_axes(
+    def _read_axes(
         self, where, start: Optional[int] = None, stop: Optional[int] = None
     ) -> bool:
         """
@@ -3622,8 +3627,6 @@ def read_axes(
 
         Returns
         -------
-        bool
-            Indicates success.
         """
 
         # validate the version
@@ -3637,10 +3640,11 @@ def read_axes(
         selection = Selection(self, where=where, start=start, stop=stop)
         values = selection.select()
 
+        results = []
         # convert the data
         for a in self.axes:
             a.set_info(self.info)
-            a.convert(
+            res = a.convert(
                 values,
                 nan_rep=self.nan_rep,
                 encoding=self.encoding,
@@ -3648,8 +3652,9 @@ def read_axes(
                 start=start,
                 stop=stop,
             )
+            results.append(res)
 
-        return True
+        return results
 
     def get_object(self, obj, transposed: bool):
         """ return the data for this obj """
@@ -4082,13 +4087,13 @@ def read_column(
                 # column must be an indexable or a data column
                 c = getattr(self.table.cols, column)
                 a.set_info(self.info)
-                a.convert(
+                col_values = a.convert(
                     c[start:stop],
                     nan_rep=self.nan_rep,
                     encoding=self.encoding,
                     errors=self.errors,
                 )
-                return Series(_set_tz(a.take_data(), a.tz), name=column)
+                return Series(_set_tz(col_values[1], a.tz), name=column)
 
         raise KeyError(f"column [{column}] not found in the table")
 
@@ -4372,7 +4377,8 @@ def read(
         stop: Optional[int] = None,
     ):
 
-        if not self.read_axes(where=where, start=start, stop=stop):
+        result = self._read_axes(where=where, start=start, stop=stop)
+        if result is False:
             return None
 
         info = (
@@ -4380,26 +4386,37 @@ def read(
             if len(self.non_index_axes)
             else dict()
         )
-        index = self.index_axes[0].values
+
+        axes = list(self.axes)
+        inds = [i for i in range(len(axes)) if axes[i] is self.index_axes[0]]
+        assert len(inds) == 1
+        ind = inds[0]
+
+        index = result[ind][0]#self.index_axes[0].values
+
         frames = []
-        for a in self.values_axes:
+        for i, a in enumerate(self.axes):
+            if a not in self.values_axes:
+                continue
+            values, cvalues = result[i]
 
             # we could have a multi-index constructor here
             # ensure_index doesn't recognized our list-of-tuples here
             if info.get("type") == "MultiIndex":
-                cols = MultiIndex.from_tuples(a.values)
+                cols = MultiIndex.from_tuples(values)
             else:
-                cols = Index(a.values)
+                cols = Index(values)
+
             names = info.get("names")
             if names is not None:
                 cols.set_names(names, inplace=True)
 
             if self.is_transposed:
-                values = a.cvalues
+                values = cvalues
                 index_ = cols
                 cols_ = Index(index, name=getattr(index, "name", None))
             else:
-                values = a.cvalues.T
+                values = cvalues.T
                 index_ = Index(index, name=getattr(index, "name", None))
                 cols_ = cols
 

From f0dfbb649f7b4291d5efb127f4edc3fcb0283ccf Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 9 Dec 2019 10:56:09 -0800
Subject: [PATCH 2/5] port _get_data_and_dtype_name to avoid calling set_data
 at all

---
 pandas/io/pytables.py | 49 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 37 insertions(+), 12 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 14501eed84e19..e9eda31c96eb6 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -2373,24 +2373,28 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
 
         assert self.typ is not None
         if self.dtype is None:
-            self.set_data(values)
+            converted, dtype_name = _get_data_and_dtype_name(values)
+            kind = _dtype_to_kind(dtype_name)
         else:
-            self.data = values
-        converted = self.data  # TODO: Setting should not be necessary
+            converted = values
+            dtype_name = self.dtype
+            kind = self.kind
 
         # use the meta if needed
         meta = _ensure_decoded(self.meta)
+        metadata = self.metadata
+        ordered = self.ordered
+        tz = self.tz
 
-        assert self.dtype is not None
+        assert dtype_name is not None
         # convert to the correct dtype
-        dtype = _ensure_decoded(self.dtype)
-
+        dtype = _ensure_decoded(dtype_name)
 
         # reverse converts
         if dtype == "datetime64":
 
             # recreate with tz if indicated
-            converted = _set_tz(converted, self.tz, coerce=True)
+            converted = _set_tz(converted, tz, coerce=True)
 
         elif dtype == "timedelta64":
             converted = np.asarray(converted, dtype="m8[ns]")
@@ -2407,7 +2411,7 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
         elif meta == "category":
 
             # we have a categorical
-            categories = self.metadata
+            categories = metadata
             codes = converted.ravel()
 
             # if we have stored a NaN in the categories
@@ -2426,7 +2430,7 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
                     codes[codes != -1] -= mask.astype(int).cumsum().values
 
             converted = Categorical.from_codes(
-                codes, categories=categories, ordered=self.ordered
+                codes, categories=categories, ordered=ordered
             )
 
         else:
@@ -2436,9 +2440,8 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
             except TypeError:
                 converted = converted.astype("O", copy=False)
 
-
         # convert nans / decode
-        if _ensure_decoded(self.kind) == "string":
+        if _ensure_decoded(kind) == "string":
             converted = _unconvert_string_array(
                 converted, nan_rep=nan_rep, encoding=encoding, errors=errors
             )
@@ -4357,7 +4360,7 @@ def read(
         assert len(inds) == 1
         ind = inds[0]
 
-        index = result[ind][0]#self.index_axes[0].values
+        index = result[ind][0]
 
         frames = []
         for i, a in enumerate(self.axes):
@@ -4979,6 +4982,28 @@ def _dtype_to_kind(dtype_str: str) -> str:
     return kind
 
 
+def _get_data_and_dtype_name(data: Union[np.ndarray, ABCExtensionArray]):
+    """
+    Convert the passed data into a storable form and a dtype string.
+    """
+    if is_categorical_dtype(data.dtype):
+        data = data.codes
+
+    # For datetime64tz we need to drop the TZ in tests TODO: why?
+    dtype_name = data.dtype.name.split("[")[0]
+
+    if data.dtype.kind in ["m", "M"]:
+        data = np.asarray(data.view("i8"))
+        # TODO: we used to reshape for the dt64tz case, but no longer
+        #  doing that doesnt seem to break anything.  why?
+
+    elif isinstance(data, PeriodIndex):
+        data = data.asi8
+
+    data = np.asarray(data)
+    return data, dtype_name
+
+
 class Selection:
     """
     Carries out a selection operation on a tables.Table object.

From 0e46c4d5b1e20c0030191b2d6553965338186cf0 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 10 Dec 2019 08:25:18 -0800
Subject: [PATCH 3/5] comment

---
 pandas/io/pytables.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index e9eda31c96eb6..8c2218e7ac36f 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -2373,6 +2373,8 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
 
         assert self.typ is not None
         if self.dtype is None:
+            # Note: in tests we never have timedelta64 or datetime64,
+            #  so the _get_data_and_dtype_name may be unnecessary
             converted, dtype_name = _get_data_and_dtype_name(values)
             kind = _dtype_to_kind(dtype_name)
         else:

From e7974f2fca6153f8c5f36b1733d720612e91a508 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 10 Dec 2019 11:27:49 -0800
Subject: [PATCH 4/5] DOC: improve docstrings

---
 pandas/io/pytables.py | 47 +++++++++++++++++++++++++++----------------
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 401f5b9e8422d..8456a4b6015e4 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -1974,7 +1974,9 @@ def is_indexed(self) -> bool:
         return getattr(self.table.cols, self.cname).is_indexed  # type: ignore
 
     def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
-        """ set the values from this selection: take = take ownership """
+        """
+        Convert the data from this selection to the appropriate pandas type.
+        """
         assert isinstance(values, np.ndarray), type(values)
 
         # values is a recarray
@@ -2158,7 +2160,7 @@ def is_indexed(self) -> bool:
 
     def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
         """
-        Set the values from this selection.
+        Convert the data from this selection to the appropriate pandas type.
 
         Parameters
         ----------
@@ -2356,8 +2358,20 @@ def validate_attr(self, append):
                 )
 
     def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
-        """set the data from this selection (and convert to the correct dtype
-        if we can)
+        """
+        Convert the data from this selection to the appropriate pandas type.
+
+        Parameters
+        ----------
+        values : np.ndarray
+        nan_rep :
+        encoding : str
+        errors : str
+
+        Returns
+        -------
+        index : listlike to become an Index
+        data : ndarraylike to become a column
         """
         assert isinstance(values, np.ndarray), type(values)
 
@@ -3570,7 +3584,7 @@ def create_index(self, columns=None, optlevel=None, kind: Optional[str] = None):
 
     def _read_axes(
         self, where, start: Optional[int] = None, stop: Optional[int] = None
-    ) -> bool:
+    ) -> List[Tuple[ArrayLike, ArrayLike]]:
         """
         Create the axes sniffed from the table.
 
@@ -3582,15 +3596,9 @@ def _read_axes(
 
         Returns
         -------
+        List[Tuple[index_values, column_values]]
         """
 
-        # validate the version
-        self.validate_version(where)
-
-        # infer the data kind
-        if not self.infer_axes():
-            return False
-
         # create the selection
         selection = Selection(self, where=where, start=start, stop=stop)
         values = selection.select()
@@ -4341,10 +4349,15 @@ def read(
         stop: Optional[int] = None,
     ):
 
-        result = self._read_axes(where=where, start=start, stop=stop)
-        if result is False:
+        # validate the version
+        self.validate_version(where)
+
+        # infer the data kind
+        if not self.infer_axes():
             return None
 
+        result = self._read_axes(where=where, start=start, stop=stop)
+
         info = (
             self.info.get(self.non_index_axes[0][0], dict())
             if len(self.non_index_axes)
@@ -4362,14 +4375,14 @@ def read(
         for i, a in enumerate(self.axes):
             if a not in self.values_axes:
                 continue
-            values, cvalues = result[i]
+            index_vals, cvalues = result[i]
 
             # we could have a multi-index constructor here
             # ensure_index doesn't recognized our list-of-tuples here
             if info.get("type") == "MultiIndex":
-                cols = MultiIndex.from_tuples(values)
+                cols = MultiIndex.from_tuples(index_vals)
             else:
-                cols = Index(values)
+                cols = Index(index_vals)
 
             names = info.get("names")
             if names is not None:

From 648c86410d5d5f474142eb6e6251e80c29661b1f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 11 Dec 2019 08:43:32 -0800
Subject: [PATCH 5/5] inline

---
 pandas/io/pytables.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index bd5ed4fa53456..3d4e4252adfdd 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -4360,8 +4360,7 @@ def read(
             else dict()
         )
 
-        axes = list(self.axes)
-        inds = [i for i in range(len(axes)) if axes[i] is self.index_axes[0]]
+        inds = [i for i, ax in enumerate(self.axes) if ax is self.index_axes[0]]
         assert len(inds) == 1
         ind = inds[0]