From 4db9ccb49480426c93f78a5e6fe970d543fdf19c Mon Sep 17 00:00:00 2001
From: lmcindewar <l.mcindewar@fraym.io>
Date: Mon, 19 Apr 2021 10:50:05 -0400
Subject: [PATCH 01/17] ENH: option to export df to Stata dataset with value
 labels

GH38454
---
 pandas/io/stata.py            | 125 ++++++++++++++++++++++++++++++++--
 pandas/tests/io/test_stata.py |  36 ++++++++++
 2 files changed, 157 insertions(+), 4 deletions(-)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 1deaa634ce3ae..faef07e9a1d88 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -748,6 +748,71 @@ def generate_value_label(self, byteorder: str) -> bytes:
         return bio.getvalue()
 
 
+class StataNonCatValueLabel(StataValueLabel):
+    """
+    Prepare formatted version of value labels
+
+    Parameters
+    ----------
+    labname : str
+        Value label name
+    value_labels: Dictionary
+        Mapping of values to labels
+    encoding : {"latin-1", "utf-8"}
+        Encoding to use for value labels.
+    """
+
+    def __init__(
+        self,
+        labname: str,
+        value_labels: dict[float | int, str],
+        encoding: str = "latin-1",
+    ):
+
+        if encoding not in ("latin-1", "utf-8"):
+            raise ValueError("Only latin-1 and utf-8 are supported.")
+
+        self.labname = labname
+        self._encoding = encoding
+        self.value_labels = [(val, lab) for val, lab in value_labels.items()]
+        self.value_labels.sort(key=lambda x: x[0])
+
+        self.text_len = 0
+        self.txt: list[bytes] = []
+        self.n = 0
+
+        # Compute lengths and setup lists of offsets and labels
+        offsets: list[int] = []
+        values: list[int] = []
+        for vl in self.value_labels:
+            category = vl[1]
+            if not isinstance(category, str):
+                category = str(category)
+                warnings.warn(
+                    value_label_mismatch_doc.format(labname),
+                    ValueLabelTypeMismatch,
+                )
+            category = category.encode(encoding)
+            offsets.append(self.text_len)
+            self.text_len += len(category) + 1  # +1 for the padding
+            values.append(vl[0])
+            self.txt.append(category)
+            self.n += 1
+
+        if self.text_len > 32000:
+            raise ValueError(
+                "Stata value labels for a single variable must "
+                "have a combined length less than 32,000 characters."
+            )
+
+        # Ensure int32
+        self.off = np.array(offsets, dtype=np.int32)
+        self.val = np.array(values, dtype=np.int32)
+
+        # Total length
+        self.len = 4 + 4 + 4 * self.n + 4 * self.n + self.text_len
+
+
 class StataMissingValue:
     """
     An observation's missing value.
@@ -2159,6 +2224,10 @@ class StataWriter(StataParser):
     variable_labels : dict
         Dictionary containing columns as keys and variable labels as values.
         Each label must be 80 characters or smaller.
+    value_labels : dict
+        Dictionary containing columns as keys and dictionaries of column value
+        to labels as values. Labels for a single variable must be 32,000
+        characters or smaller.
     compression : str or dict, default 'infer'
         For on-the-fly compression of the output dta. If string, specifies
         compression mode. If dict, value at key 'method' specifies compression
@@ -2223,6 +2292,7 @@ def __init__(
         time_stamp: datetime.datetime | None = None,
         data_label: str | None = None,
         variable_labels: dict[Hashable, str] | None = None,
+        value_labels: dict[str, dict[float | int, str]] | None = None,
         compression: CompressionOptions = "infer",
         storage_options: StorageOptions = None,
     ):
@@ -2232,6 +2302,8 @@ def __init__(
         self._time_stamp = time_stamp
         self._data_label = data_label
         self._variable_labels = variable_labels
+        self._non_cat_value_labels = value_labels
+        self._value_labels: list[StataValueLabel] = []
         self._compression = compression
         self._output_file: Buffer | None = None
         # attach nobs, nvars, data, varlist, typlist
@@ -2259,17 +2331,47 @@ def _write_bytes(self, value: bytes) -> None:
         """
         self.handles.handle.write(value)  # type: ignore[arg-type]
 
+    def _prepare_non_cat_value_labels(self, data: DataFrame) -> None:
+        """
+        Check for value labels provided for non-categorical columns. Value
+        labels
+        """
+        self._has_value_labels = np.repeat(False, data.shape[1])
+        if self._non_cat_value_labels is None:
+            return
+
+        for labname, labels in self._non_cat_value_labels.items():
+            if labname not in data.columns:
+                # Value label should apply to a column
+                raise ValueError(
+                    f"Can't create value labels for {labname}, it wasn't "
+                    "found in the dataset."
+                )
+            if is_categorical_dtype(data[labname].dtype):
+                # Labels should not be passed explicitly for categorical
+                # columns that will be converted to int
+                raise ValueError(
+                    f"Can't create value labels for {labname}, a categorical "
+                    "column. Value labels are created automatically before "
+                    "writing categorical columns."
+                )
+            svl = StataNonCatValueLabel(labname, labels)
+            self._value_labels.append(svl)
+
+        has_non_cat_val_labels = data.columns.isin(self._non_cat_value_labels.keys())
+        self._has_value_labels |= has_non_cat_val_labels
+
     def _prepare_categoricals(self, data: DataFrame) -> DataFrame:
         """
         Check for categorical columns, retain categorical information for
         Stata file and convert categorical data to int
         """
         is_cat = [is_categorical_dtype(data[col].dtype) for col in data]
-        self._is_col_cat = is_cat
-        self._value_labels: list[StataValueLabel] = []
         if not any(is_cat):
             return data
 
+        self._has_value_labels |= np.array(is_cat)
+
         get_base_missing_value = StataMissingValue.get_base_missing_value
         data_formatted = []
         for col, col_is_cat in zip(data, is_cat):
@@ -2449,6 +2551,9 @@ def _prepare_pandas(self, data: DataFrame) -> None:
         # Replace NaNs with Stata missing values
         data = self._replace_nans(data)
 
+        # Create value labels for non-categorical data
+        self._prepare_non_cat_value_labels(data)
+
         # Convert categoricals to int data, and strip labels
         data = self._prepare_categoricals(data)
 
@@ -2688,7 +2793,7 @@ def _write_value_label_names(self) -> None:
         # lbllist, 33*nvar, char array
         for i in range(self.nvar):
             # Use variable name when categorical
-            if self._is_col_cat[i]:
+            if self._has_value_labels[i]:
                 name = self.varlist[i]
                 name = self._null_terminate_str(name)
                 name = _pad_bytes(name[:32], 33)
@@ -3041,6 +3146,10 @@ class StataWriter117(StataWriter):
     variable_labels : dict
         Dictionary containing columns as keys and variable labels as values.
         Each label must be 80 characters or smaller.
+    value_labels : dict
+        Dictionary containing columns as keys and dictionaries of column value
+        to labels as values. Labels for a single variable must be 32,000
+        characters or smaller.
     convert_strl : list
         List of columns names to convert to Stata StrL format.  Columns with
         more than 2045 characters are automatically written as StrL.
@@ -3109,6 +3218,7 @@ def __init__(
         time_stamp: datetime.datetime | None = None,
         data_label: str | None = None,
         variable_labels: dict[Hashable, str] | None = None,
+        value_labels: dict[str, dict[float | int, str]] | None = None,
         convert_strl: Sequence[Hashable] | None = None,
         compression: CompressionOptions = "infer",
         storage_options: StorageOptions = None,
@@ -3127,6 +3237,7 @@ def __init__(
             time_stamp=time_stamp,
             data_label=data_label,
             variable_labels=variable_labels,
+            value_labels=value_labels,
             compression=compression,
             storage_options=storage_options,
         )
@@ -3272,7 +3383,7 @@ def _write_value_label_names(self) -> None:
         for i in range(self.nvar):
             # Use variable name when categorical
             name = ""  # default name
-            if self._is_col_cat[i]:
+            if self._has_value_labels[i]:
                 name = self.varlist[i]
             name = self._null_terminate_str(name)
             encoded_name = _pad_bytes_new(name[:32].encode(self._encoding), vl_len + 1)
@@ -3427,6 +3538,10 @@ class StataWriterUTF8(StataWriter117):
     variable_labels : dict, default None
         Dictionary containing columns as keys and variable labels as values.
         Each label must be 80 characters or smaller.
+    value_labels : dict
+        Dictionary containing columns as keys and dictionaries of column value
+        to labels as values. Labels for a single variable must be 32,000
+        characters or smaller.
     convert_strl : list, default None
         List of columns names to convert to Stata StrL format.  Columns with
         more than 2045 characters are automatically written as StrL.
@@ -3501,6 +3616,7 @@ def __init__(
         time_stamp: datetime.datetime | None = None,
         data_label: str | None = None,
         variable_labels: dict[Hashable, str] | None = None,
+        value_labels: dict[str, dict[float | int, str]] | None = None,
         convert_strl: Sequence[Hashable] | None = None,
         version: int | None = None,
         compression: CompressionOptions = "infer",
@@ -3525,6 +3641,7 @@ def __init__(
             time_stamp=time_stamp,
             data_label=data_label,
             variable_labels=variable_labels,
+            value_labels=value_labels,
             convert_strl=convert_strl,
             compression=compression,
             storage_options=storage_options,
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index 6bf8d23f61937..270ff9ded89b1 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -29,6 +29,7 @@
     PossiblePrecisionLoss,
     StataMissingValue,
     StataReader,
+    StataWriter,
     StataWriterUTF8,
     ValueLabelTypeMismatch,
     read_stata,
@@ -2048,3 +2049,38 @@ def test_stata_compression(compression_only, read_infer, to_infer):
         df.to_stata(path, compression=to_compression)
         result = read_stata(path, compression=read_compression, index_col="index")
         tm.assert_frame_equal(result, df)
+
+
+def test_non_categorical_value_labels():
+    data = DataFrame(
+        {
+            "X": [1, 2, 3, 4, 1],
+            "Y": [7, 7, 9, 8, 10],
+            "Z": pd.Categorical(["j", "k", "l", "k", "j"]),
+        }
+    )
+
+    with tm.ensure_clean() as path:
+        value_labels = {"X": {1: "one", 2: "two", 4: "four"}}
+        expected = {**value_labels, "Z": {0: "j", 1: "k", 2: "l"}}
+
+        writer = StataWriter(path, data, value_labels=value_labels)
+        writer.write_file()
+
+        reader = StataReader(path)
+        reader_value_labels = reader.value_labels()
+        assert reader_value_labels == expected
+
+        msg = "Can't create value labels for notY, it wasn't found in the dataset."
+        with pytest.raises(ValueError, match=msg):
+            value_labels = {"notY": {7: "label1", 8: "label2"}}
+            writer = StataWriter(path, data, value_labels=value_labels)
+
+        msg = (
+            "Can't create value labels for Z, a categorical "
+            "column. Value labels are created automatically before "
+            "writing categorical columns."
+        )
+        with pytest.raises(ValueError, match=msg):
+            value_labels = {"Z": {1: "a", 2: "k", 3: "j", 4: "i"}}
+            writer = StataWriter(path, data, value_labels=value_labels)

From 5a3d6d9915ea0a17183f7f939aa9c584baf63c74 Mon Sep 17 00:00:00 2001
From: lmcindewar <l.mcindewar@fraym.io>
Date: Mon, 19 Apr 2021 12:42:39 -0400
Subject: [PATCH 02/17] Removing unnecessary list comprehension, flake8

---
 pandas/io/stata.py            | 10 +++++-----
 pandas/tests/io/test_stata.py |  5 ++---
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index faef07e9a1d88..83f6d66334377 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -46,6 +46,7 @@
     ensure_object,
     is_categorical_dtype,
     is_datetime64_dtype,
+    is_numeric_dtype,
 )
 
 from pandas import (
@@ -774,7 +775,7 @@ def __init__(
 
         self.labname = labname
         self._encoding = encoding
-        self.value_labels = [(val, lab) for val, lab in value_labels.items()]
+        self.value_labels = list(value_labels.items())
         self.value_labels.sort(key=lambda x: x[0])
 
         self.text_len = 0
@@ -2347,13 +2348,12 @@ def _prepare_non_cat_value_labels(self, data: DataFrame) -> None:
                     f"Can't create value labels for {labname}, it wasn't "
                     "found in the dataset."
                 )
-            if is_categorical_dtype(data[labname].dtype):
+            if not is_numeric_dtype(data[labname].dtype):
                 # Labels should not be passed explicitly for categorical
                 # columns that will be converted to int
                 raise ValueError(
-                    f"Can't create value labels for {labname}, a categorical "
-                    "column. Value labels are created automatically before "
-                    "writing categorical columns."
+                    f"Can't create value labels for {labname}, value labels "
+                    "can only be applied to numeric columns."
                 )
             svl = StataNonCatValueLabel(labname, labels)
             self._value_labels.append(svl)
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index 270ff9ded89b1..22e7677d60764 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -2077,9 +2077,8 @@ def test_non_categorical_value_labels():
             writer = StataWriter(path, data, value_labels=value_labels)
 
         msg = (
-            "Can't create value labels for Z, a categorical "
-            "column. Value labels are created automatically before "
-            "writing categorical columns."
+            "Can't create value labels for Z, value labels "
+            "can only be applied to numeric columns."
         )
         with pytest.raises(ValueError, match=msg):
             value_labels = {"Z": {1: "a", 2: "k", 3: "j", 4: "i"}}

From bbb43f8512acd0c19a42c33368b0158ad8640eb5 Mon Sep 17 00:00:00 2001
From: lmcindewar <l.mcindewar@fraym.io>
Date: Mon, 19 Apr 2021 14:44:40 -0400
Subject: [PATCH 03/17] Adding value_labels argument to DataFrame to_stata
 method

---
 pandas/core/frame.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index db12129a15ef9..79e0602bc73f4 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2378,6 +2378,7 @@ def to_stata(
         time_stamp: datetime.datetime | None = None,
         data_label: str | None = None,
         variable_labels: dict[Hashable, str] | None = None,
+        value_labels: dict[str, dict[float | int, str]] | None = None,
         version: int | None = 114,
         convert_strl: Sequence[Hashable] | None = None,
         compression: CompressionOptions = "infer",
@@ -2420,6 +2421,10 @@ def to_stata(
         variable_labels : dict
             Dictionary containing columns as keys and variable labels as
             values. Each label must be 80 characters or smaller.
+        value_labels : dict
+            Dictionary containing columns as keys and dictionaries of column value
+            to labels as values. Labels for a single variable must be 32,000
+            characters or smaller.
         version : {{114, 117, 118, 119, None}}, default 114
             Version to use in the output dta file. Set to None to let pandas
             decide between 118 or 119 formats depending on the number of
@@ -2522,6 +2527,7 @@ def to_stata(
             data_label=data_label,
             write_index=write_index,
             variable_labels=variable_labels,
+            value_labels=value_labels,
             compression=compression,
             storage_options=storage_options,
             **kwargs,

From 533a3d5a94fad499eb8c9c45a0677347dd82b5bf Mon Sep 17 00:00:00 2001
From: lmcindewar <l.mcindewar@fraym.io>
Date: Mon, 26 Apr 2021 10:34:48 -0400
Subject: [PATCH 04/17] Updating types and changing ValueError to KeyError for
 missing column

---
 pandas/core/frame.py | 4 ++--
 pandas/io/stata.py   | 7 ++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 79e0602bc73f4..21dd893f9484a 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2378,7 +2378,7 @@ def to_stata(
         time_stamp: datetime.datetime | None = None,
         data_label: str | None = None,
         variable_labels: dict[Hashable, str] | None = None,
-        value_labels: dict[str, dict[float | int, str]] | None = None,
+        value_labels: dict[Hashable, dict[float | int, str]] | None = None,
         version: int | None = 114,
         convert_strl: Sequence[Hashable] | None = None,
         compression: CompressionOptions = "infer",
@@ -2421,7 +2421,7 @@ def to_stata(
         variable_labels : dict
             Dictionary containing columns as keys and variable labels as
             values. Each label must be 80 characters or smaller.
-        value_labels : dict
+        value_labels : dict of dicts
             Dictionary containing columns as keys and dictionaries of column value
             to labels as values. Labels for a single variable must be 32,000
             characters or smaller.
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 83f6d66334377..147983cc3330c 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -21,6 +21,7 @@
     Any,
     AnyStr,
     Hashable,
+    Literal,
     Sequence,
     cast,
 )
@@ -767,7 +768,7 @@ def __init__(
         self,
         labname: str,
         value_labels: dict[float | int, str],
-        encoding: str = "latin-1",
+        encoding: Literal["latin-1", "utf-8"] = "latin-1",
     ):
 
         if encoding not in ("latin-1", "utf-8"):
@@ -2344,7 +2345,7 @@ def _prepare_non_cat_value_labels(self, data: DataFrame) -> None:
         for labname, labels in self._non_cat_value_labels.items():
             if labname not in data.columns:
                 # Value label should apply to a column
-                raise ValueError(
+                raise KeyError(
                     f"Can't create value labels for {labname}, it wasn't "
                     "found in the dataset."
                 )
@@ -3616,7 +3617,7 @@ def __init__(
         time_stamp: datetime.datetime | None = None,
         data_label: str | None = None,
         variable_labels: dict[Hashable, str] | None = None,
-        value_labels: dict[str, dict[float | int, str]] | None = None,
+        value_labels: dict[Hashable, dict[float | int, str]] | None = None,
         convert_strl: Sequence[Hashable] | None = None,
         version: int | None = None,
         compression: CompressionOptions = "infer",

From ed73d69f205fc5f87a11403a06422569657528e8 Mon Sep 17 00:00:00 2001
From: lmcindewar <l.mcindewar@fraym.io>
Date: Fri, 30 Apr 2021 14:41:21 -0400
Subject: [PATCH 05/17] Using converted names for invalid Stata variable names

---
 pandas/io/stata.py | 54 ++++++++++++++++++++++++++++------------------
 1 file changed, 33 insertions(+), 21 deletions(-)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 147983cc3330c..eea7217093a90 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -2226,10 +2226,6 @@ class StataWriter(StataParser):
     variable_labels : dict
         Dictionary containing columns as keys and variable labels as values.
         Each label must be 80 characters or smaller.
-    value_labels : dict
-        Dictionary containing columns as keys and dictionaries of column value
-        to labels as values. Labels for a single variable must be 32,000
-        characters or smaller.
     compression : str or dict, default 'infer'
         For on-the-fly compression of the output dta. If string, specifies
         compression mode. If dict, value at key 'method' specifies compression
@@ -2246,6 +2242,11 @@ class StataWriter(StataParser):
 
         .. versionadded:: 1.2.0
 
+    value_labels : dict of dicts
+        Dictionary containing columns as keys and dictionaries of column value
+        to labels as values. Labels for a single variable must be 32,000
+        characters or smaller.
+
     Returns
     -------
     writer : StataWriter instance
@@ -2294,9 +2295,10 @@ def __init__(
         time_stamp: datetime.datetime | None = None,
         data_label: str | None = None,
         variable_labels: dict[Hashable, str] | None = None,
-        value_labels: dict[str, dict[float | int, str]] | None = None,
         compression: CompressionOptions = "infer",
         storage_options: StorageOptions = None,
+        *,
+        value_labels: dict[Hashable, dict[float | int, str]] | None = None,
     ):
         super().__init__()
         self._convert_dates = {} if convert_dates is None else convert_dates
@@ -2308,6 +2310,7 @@ def __init__(
         self._value_labels: list[StataValueLabel] = []
         self._compression = compression
         self._output_file: Buffer | None = None
+        self._converted_names: dict[Hashable, str] = {}
         # attach nobs, nvars, data, varlist, typlist
         self._prepare_pandas(data)
         self.storage_options = storage_options
@@ -2317,7 +2320,6 @@ def __init__(
         self._byteorder = _set_endianness(byteorder)
         self._fname = fname
         self.type_converters = {253: np.int32, 252: np.int16, 251: np.int8}
-        self._converted_names: dict[Hashable, str] = {}
 
     def _write(self, to_write: str) -> None:
         """
@@ -2339,27 +2341,33 @@ def _prepare_non_cat_value_labels(self, data: DataFrame) -> None:
         labels
         """
         self._has_value_labels = np.repeat(False, data.shape[1])
+        labelled_columns = []
         if self._non_cat_value_labels is None:
             return
 
         for labname, labels in self._non_cat_value_labels.items():
-            if labname not in data.columns:
-                # Value label should apply to a column
+            if labname in self._converted_names:
+                colname = self._converted_names[labname]
+            elif labname in data.columns:
+                colname = labname
+            else:
                 raise KeyError(
                     f"Can't create value labels for {labname}, it wasn't "
                     "found in the dataset."
                 )
-            if not is_numeric_dtype(data[labname].dtype):
+
+            if not is_numeric_dtype(data[colname].dtype):
                 # Labels should not be passed explicitly for categorical
                 # columns that will be converted to int
                 raise ValueError(
                     f"Can't create value labels for {labname}, value labels "
                     "can only be applied to numeric columns."
                 )
-            svl = StataNonCatValueLabel(labname, labels)
+            svl = StataNonCatValueLabel(colname, labels)
             self._value_labels.append(svl)
+            labelled_columns.append(colname)
 
-        has_non_cat_val_labels = data.columns.isin(self._non_cat_value_labels.keys())
+        has_non_cat_val_labels = data.columns.isin(labelled_columns)
         self._has_value_labels |= has_non_cat_val_labels
 
     def _prepare_categoricals(self, data: DataFrame) -> DataFrame:
@@ -3147,10 +3155,6 @@ class StataWriter117(StataWriter):
     variable_labels : dict
         Dictionary containing columns as keys and variable labels as values.
         Each label must be 80 characters or smaller.
-    value_labels : dict
-        Dictionary containing columns as keys and dictionaries of column value
-        to labels as values. Labels for a single variable must be 32,000
-        characters or smaller.
     convert_strl : list
         List of columns names to convert to Stata StrL format.  Columns with
         more than 2045 characters are automatically written as StrL.
@@ -3169,6 +3173,11 @@ class StataWriter117(StataWriter):
 
         .. versionadded:: 1.1.0
 
+    value_labels : dict of dicts
+        Dictionary containing columns as keys and dictionaries of column value
+        to labels as values. Labels for a single variable must be 32,000
+        characters or smaller.
+
     Returns
     -------
     writer : StataWriter117 instance
@@ -3219,10 +3228,11 @@ def __init__(
         time_stamp: datetime.datetime | None = None,
         data_label: str | None = None,
         variable_labels: dict[Hashable, str] | None = None,
-        value_labels: dict[str, dict[float | int, str]] | None = None,
         convert_strl: Sequence[Hashable] | None = None,
         compression: CompressionOptions = "infer",
         storage_options: StorageOptions = None,
+        *,
+        value_labels: dict[Hashable, dict[float | int, str]] | None = None,
     ):
         # Copy to new list since convert_strl might be modified later
         self._convert_strl: list[Hashable] = []
@@ -3539,10 +3549,6 @@ class StataWriterUTF8(StataWriter117):
     variable_labels : dict, default None
         Dictionary containing columns as keys and variable labels as values.
         Each label must be 80 characters or smaller.
-    value_labels : dict
-        Dictionary containing columns as keys and dictionaries of column value
-        to labels as values. Labels for a single variable must be 32,000
-        characters or smaller.
     convert_strl : list, default None
         List of columns names to convert to Stata StrL format.  Columns with
         more than 2045 characters are automatically written as StrL.
@@ -3565,6 +3571,11 @@ class StataWriterUTF8(StataWriter117):
 
         .. versionadded:: 1.1.0
 
+    value_labels : dict of dicts
+        Dictionary containing columns as keys and dictionaries of column value
+        to labels as values. Labels for a single variable must be 32,000
+        characters or smaller.
+
     Returns
     -------
     StataWriterUTF8
@@ -3617,11 +3628,12 @@ def __init__(
         time_stamp: datetime.datetime | None = None,
         data_label: str | None = None,
         variable_labels: dict[Hashable, str] | None = None,
-        value_labels: dict[Hashable, dict[float | int, str]] | None = None,
         convert_strl: Sequence[Hashable] | None = None,
         version: int | None = None,
         compression: CompressionOptions = "infer",
         storage_options: StorageOptions = None,
+        *,
+        value_labels: dict[Hashable, dict[float | int, str]] | None = None,
     ):
         if version is None:
             version = 118 if data.shape[1] <= 32767 else 119

From ae4dca7e7bc6ef4e657bc89b96dcc13fd32e4e09 Mon Sep 17 00:00:00 2001
From: lmcindewar <l.mcindewar@fraym.io>
Date: Fri, 30 Apr 2021 14:42:01 -0400
Subject: [PATCH 06/17] Moving value_labels to key word only for to_stata

---
 pandas/core/frame.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 21dd893f9484a..d811cb329ea9d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2378,11 +2378,12 @@ def to_stata(
         time_stamp: datetime.datetime | None = None,
         data_label: str | None = None,
         variable_labels: dict[Hashable, str] | None = None,
-        value_labels: dict[Hashable, dict[float | int, str]] | None = None,
         version: int | None = 114,
         convert_strl: Sequence[Hashable] | None = None,
         compression: CompressionOptions = "infer",
         storage_options: StorageOptions = None,
+        *,
+        value_labels: dict[Hashable, dict[float | int, str]] | None = None,
     ) -> None:
         """
         Export DataFrame object to Stata dta format.
@@ -2421,10 +2422,6 @@ def to_stata(
         variable_labels : dict
             Dictionary containing columns as keys and variable labels as
             values. Each label must be 80 characters or smaller.
-        value_labels : dict of dicts
-            Dictionary containing columns as keys and dictionaries of column value
-            to labels as values. Labels for a single variable must be 32,000
-            characters or smaller.
         version : {{114, 117, 118, 119, None}}, default 114
             Version to use in the output dta file. Set to None to let pandas
             decide between 118 or 119 formats depending on the number of
@@ -2468,6 +2465,11 @@ def to_stata(
 
             .. versionadded:: 1.2.0
 
+        value_labels : dict of dicts
+            Dictionary containing columns as keys and dictionaries of column value
+            to labels as values. Labels for a single variable must be 32,000
+            characters or smaller.
+
         Raises
         ------
         NotImplementedError
@@ -2527,9 +2529,9 @@ def to_stata(
             data_label=data_label,
             write_index=write_index,
             variable_labels=variable_labels,
-            value_labels=value_labels,
             compression=compression,
             storage_options=storage_options,
+            value_labels=value_labels,
             **kwargs,
         )
         writer.write_file()

From 8e57e46043166b210f1180698737acb2f68e2783 Mon Sep 17 00:00:00 2001
From: lmcindewar <l.mcindewar@fraym.io>
Date: Fri, 30 Apr 2021 18:42:31 -0400
Subject: [PATCH 07/17] Adding tests for invalid Stata names and repeated value
 labels

---
 pandas/tests/io/test_stata.py | 85 +++++++++++++++++++++++++++++++++--
 1 file changed, 82 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index 22e7677d60764..02cf478c61583 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -2054,14 +2054,18 @@ def test_stata_compression(compression_only, read_infer, to_infer):
 def test_non_categorical_value_labels():
     data = DataFrame(
         {
-            "X": [1, 2, 3, 4, 1],
+            "fully_labelled": [1, 2, 3, 3, 1],
+            "partially_labelled": [1.0, 2.0, np.nan, 9.0, np.nan],
             "Y": [7, 7, 9, 8, 10],
             "Z": pd.Categorical(["j", "k", "l", "k", "j"]),
         }
     )
 
     with tm.ensure_clean() as path:
-        value_labels = {"X": {1: "one", 2: "two", 4: "four"}}
+        value_labels = {
+            "fully_labelled": {1: "one", 2: "two", 3: "three"},
+            "partially_labelled": {1.0: "one", 2.0: "two"},
+        }
         expected = {**value_labels, "Z": {0: "j", 1: "k", 2: "l"}}
 
         writer = StataWriter(path, data, value_labels=value_labels)
@@ -2072,7 +2076,7 @@ def test_non_categorical_value_labels():
         assert reader_value_labels == expected
 
         msg = "Can't create value labels for notY, it wasn't found in the dataset."
-        with pytest.raises(ValueError, match=msg):
+        with pytest.raises(KeyError, match=msg):
             value_labels = {"notY": {7: "label1", 8: "label2"}}
             writer = StataWriter(path, data, value_labels=value_labels)
 
@@ -2083,3 +2087,78 @@ def test_non_categorical_value_labels():
         with pytest.raises(ValueError, match=msg):
             value_labels = {"Z": {1: "a", 2: "k", 3: "j", 4: "i"}}
             writer = StataWriter(path, data, value_labels=value_labels)
+
+
+def test_non_categorical_value_label_name_conversion():
+    # Check conversion of invalid variable names
+    data = DataFrame(
+        {
+            "invalid~!": [1, 1, 2, 3, 5, 8],  # Only alphanumeric and _
+            "6_invalid": [1, 1, 2, 3, 5, 8],  # Must start with letter or _
+            "invalid_name_longer_than_32_characters": [8, 8, 9, 9, 8, 8],  # Too long
+            "aggregate": [2, 5, 5, 6, 6, 9],  # Reserved words
+            (1, 2): [1, 2, 3, 4, 5, 6],  # Hashable non-string
+        }
+    )
+
+    value_labels = {
+        "invalid~!": {1: "label1", 2: "label2"},
+        "6_invalid": {1: "label1", 2: "label2"},
+        "invalid_name_longer_than_32_characters": {8: "eight", 9: "nine"},
+        "aggregate": {5: "five"},
+        (1, 2): {3: "three"},
+    }
+
+    expected = {
+        "invalid__": {1: "label1", 2: "label2"},
+        "_6_invalid": {1: "label1", 2: "label2"},
+        "invalid_name_longer_than_32_char": {8: "eight", 9: "nine"},
+        "_aggregate": {5: "five"},
+        "_1__2_": {3: "three"},
+    }
+
+    with tm.ensure_clean() as path:
+        with tm.assert_produces_warning(InvalidColumnName):
+            data.to_stata(path, value_labels=value_labels)
+
+        reader = StataReader(path)
+        reader_value_labels = reader.value_labels()
+        assert reader_value_labels == expected
+
+
+def test_non_categorical_value_label_convert_categoricals_error():
+    # Mapping more than one value to the same label is valid for Stata
+    # labels, but can't be read with convert_categoricals=True
+    value_labels = {
+        "repeated_labels": {10: "Ten", 20: "More than ten", 40: "More than ten"}
+    }
+
+    data = DataFrame(
+        {
+            "repeated_labels": [10, 10, 20, 20, 40, 40],
+        }
+    )
+
+    with tm.ensure_clean() as path:
+        data.to_stata(path, value_labels=value_labels)
+
+        reader = StataReader(path, convert_categoricals=False)
+        reader_value_labels = reader.value_labels()
+        assert reader_value_labels == value_labels
+
+        col = "repeated_labels"
+        repeats = "-" * 80 + "\n" + "\n".join(["More than ten"])
+
+        msg = f"""
+Value labels for column {col} are not unique. These cannot be converted to
+pandas categoricals.
+
+Either read the file with `convert_categoricals` set to False or use the
+low level interface in `StataReader` to separately read the values and the
+value_labels.
+
+The repeated labels are:
+{repeats}
+"""
+        with pytest.raises(ValueError, match=msg):
+            read_stata(path, convert_categoricals=True)

From 70dc88b99584d6a5b19cbf7c9dd67cfb20f54c04 Mon Sep 17 00:00:00 2001
From: lmcindewar <l.mcindewar@fraym.io>
Date: Fri, 30 Apr 2021 19:03:30 -0400
Subject: [PATCH 08/17] Fixing Literal import

---
 pandas/io/stata.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index eea7217093a90..d310c6bcffc82 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -18,10 +18,10 @@
 import struct
 import sys
 from typing import (
+    TYPE_CHECKING,
     Any,
     AnyStr,
     Hashable,
-    Literal,
     Sequence,
     cast,
 )
@@ -66,6 +66,9 @@
 
 from pandas.io.common import get_handle
 
+if TYPE_CHECKING:
+    from typing import Literal
+
 _version_error = (
     "Version of given Stata file is {version}. pandas supports importing "
     "versions 105, 108, 111 (Stata 7SE), 113 (Stata 8/9), "

From 2796d1f660023380a5e933aa550124a675872160 Mon Sep 17 00:00:00 2001
From: lmcindewar <l.mcindewar@fraym.io>
Date: Mon, 3 May 2021 14:52:34 -0400
Subject: [PATCH 09/17] Moving label encoding to method

---
 pandas/io/stata.py | 46 ++++++++++++++--------------------------------
 1 file changed, 14 insertions(+), 32 deletions(-)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index d310c6bcffc82..d661efb360815 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -665,10 +665,18 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
         categories = catarray.cat.categories
         self.value_labels = list(zip(np.arange(len(categories)), categories))
         self.value_labels.sort(key=lambda x: x[0])
+
         self.text_len = 0
         self.txt: list[bytes] = []
         self.n = 0
+        self.off = np.array([])
+        self.val = np.array([])
+        self.len = 0
+
+        self._prepare_value_labels()
 
+    def _prepare_value_labels(self):
+        """ Encode value labels. """
         # Compute lengths and setup lists of offsets and labels
         offsets: list[int] = []
         values: list[int] = []
@@ -677,10 +685,10 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
             if not isinstance(category, str):
                 category = str(category)
                 warnings.warn(
-                    value_label_mismatch_doc.format(catarray.name),
+                    value_label_mismatch_doc.format(self.labname),
                     ValueLabelTypeMismatch,
                 )
-            category = category.encode(encoding)
+            category = category.encode(self._encoding)
             offsets.append(self.text_len)
             self.text_len += len(category) + 1  # +1 for the padding
             values.append(vl[0])
@@ -785,37 +793,11 @@ def __init__(
         self.text_len = 0
         self.txt: list[bytes] = []
         self.n = 0
+        self.off = np.array([])
+        self.val = np.array([])
+        self.len = 0
 
-        # Compute lengths and setup lists of offsets and labels
-        offsets: list[int] = []
-        values: list[int] = []
-        for vl in self.value_labels:
-            category = vl[1]
-            if not isinstance(category, str):
-                category = str(category)
-                warnings.warn(
-                    value_label_mismatch_doc.format(labname),
-                    ValueLabelTypeMismatch,
-                )
-            category = category.encode(encoding)
-            offsets.append(self.text_len)
-            self.text_len += len(category) + 1  # +1 for the padding
-            values.append(vl[0])
-            self.txt.append(category)
-            self.n += 1
-
-        if self.text_len > 32000:
-            raise ValueError(
-                "Stata value labels for a single variable must "
-                "have a combined length less than 32,000 characters."
-            )
-
-        # Ensure int32
-        self.off = np.array(offsets, dtype=np.int32)
-        self.val = np.array(values, dtype=np.int32)
-
-        # Total length
-        self.len = 4 + 4 + 4 * self.n + 4 * self.n + self.text_len
+        self._prepare_value_labels()
 
 
 class StataMissingValue:

From 277896afbf94d8cdc3252efcce39898a639264c2 Mon Sep 17 00:00:00 2001
From: lmcindewar <l.mcindewar@fraym.io>
Date: Thu, 6 May 2021 16:38:34 -0400
Subject: [PATCH 10/17] Updates from review: typing, documentation

---
 pandas/core/frame.py | 2 ++
 pandas/io/stata.py   | 9 ++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index d811cb329ea9d..85cd908efe0dc 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2470,6 +2470,8 @@ def to_stata(
             to labels as values. Labels for a single variable must be 32,000
             characters or smaller.
 
+            .. versionadded:: 1.3.0
+
         Raises
         ------
         NotImplementedError
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index d661efb360815..ed00f624e3ef3 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -663,7 +663,9 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
         self.labname = catarray.name
         self._encoding = encoding
         categories = catarray.cat.categories
-        self.value_labels = list(zip(np.arange(len(categories)), categories))
+        self.value_labels: list[tuple[int | float, str], ...] = list(
+            zip(np.arange(len(categories)), categories)
+        )
         self.value_labels.sort(key=lambda x: x[0])
 
         self.text_len = 0
@@ -787,8 +789,9 @@ def __init__(
 
         self.labname = labname
         self._encoding = encoding
-        self.value_labels = list(value_labels.items())
-        self.value_labels.sort(key=lambda x: x[0])
+        self.value_labels: list[tuple[int | float, str], ...] = sorted(
+            value_labels.items(), key=lambda x: x[0]
+        )
 
         self.text_len = 0
         self.txt: list[bytes] = []

From c31034d24c97d81074100d2e6241394a02428627 Mon Sep 17 00:00:00 2001
From: lmcindewar <l.mcindewar@fraym.io>
Date: Mon, 10 May 2021 21:40:03 -0400
Subject: [PATCH 11/17] Fixing mypy errors

---
 pandas/io/stata.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index ed00f624e3ef3..4dc564e07be82 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -663,7 +663,7 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
         self.labname = catarray.name
         self._encoding = encoding
         categories = catarray.cat.categories
-        self.value_labels: list[tuple[int | float, str], ...] = list(
+        self.value_labels: list[tuple[int | float, str]] = list(
             zip(np.arange(len(categories)), categories)
         )
         self.value_labels.sort(key=lambda x: x[0])
@@ -681,9 +681,9 @@ def _prepare_value_labels(self):
         """ Encode value labels. """
         # Compute lengths and setup lists of offsets and labels
         offsets: list[int] = []
-        values: list[int] = []
+        values: list[int | float] = []
         for vl in self.value_labels:
-            category = vl[1]
+            category: str | bytes = vl[1]
             if not isinstance(category, str):
                 category = str(category)
                 warnings.warn(
@@ -789,7 +789,7 @@ def __init__(
 
         self.labname = labname
         self._encoding = encoding
-        self.value_labels: list[tuple[int | float, str], ...] = sorted(
+        self.value_labels: list[tuple[int | float, str]] = sorted(
             value_labels.items(), key=lambda x: x[0]
         )
 
@@ -2337,7 +2337,7 @@ def _prepare_non_cat_value_labels(self, data: DataFrame) -> None:
             if labname in self._converted_names:
                 colname = self._converted_names[labname]
             elif labname in data.columns:
-                colname = labname
+                colname = str(labname)
             else:
                 raise KeyError(
                     f"Can't create value labels for {labname}, it wasn't "

From 85374fd0508c510ed1479db15ba5da83fcfdc106 Mon Sep 17 00:00:00 2001
From: lmcindewar <l.mcindewar@fraym.io>
Date: Thu, 27 May 2021 09:16:38 -0400
Subject: [PATCH 12/17] Clarifying comment on label length

---
 pandas/io/stata.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 4dc564e07be82..ab5b4ed685657 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -2232,8 +2232,8 @@ class StataWriter(StataParser):
 
     value_labels : dict of dicts
         Dictionary containing columns as keys and dictionaries of column value
-        to labels as values. Labels for a single variable must be 32,000
-        characters or smaller.
+        to labels as values. The combined length of all labels for a single
+        variable must be 32,000 characters or smaller.
 
     Returns
     -------
@@ -3163,8 +3163,8 @@ class StataWriter117(StataWriter):
 
     value_labels : dict of dicts
         Dictionary containing columns as keys and dictionaries of column value
-        to labels as values. Labels for a single variable must be 32,000
-        characters or smaller.
+        to labels as values. The combined length of all labels for a single
+        variable must be 32,000 characters or smaller.
 
     Returns
     -------
@@ -3561,8 +3561,8 @@ class StataWriterUTF8(StataWriter117):
 
     value_labels : dict of dicts
         Dictionary containing columns as keys and dictionaries of column value
-        to labels as values. Labels for a single variable must be 32,000
-        characters or smaller.
+        to labels as values. The combined length of all labels for a single
+        variable must be 32,000 characters or smaller.
 
     Returns
     -------

From 4ac27dbcd1d4ee77fbd3286b9cc8f6dfab2c1132 Mon Sep 17 00:00:00 2001
From: lmcindewar <l.mcindewar@fraym.io>
Date: Wed, 16 Jun 2021 09:23:12 -0400
Subject: [PATCH 13/17] Removing duplication in value label class and returning
 labels from prepare_non_cat

---
 pandas/io/stata.py | 46 ++++++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index ab5b4ed685657..f5ad01fa99902 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -668,17 +668,20 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
         )
         self.value_labels.sort(key=lambda x: x[0])
 
+        self._prepare_value_labels()
+
+    def _prepare_value_labels(self):
+        """ Encode value labels. """
+
         self.text_len = 0
         self.txt: list[bytes] = []
         self.n = 0
+        # Offsets (length of categories), converted to int32
         self.off = np.array([])
+        # Values, converted to int32
         self.val = np.array([])
         self.len = 0
 
-        self._prepare_value_labels()
-
-    def _prepare_value_labels(self):
-        """ Encode value labels. """
         # Compute lengths and setup lists of offsets and labels
         offsets: list[int] = []
         values: list[int | float] = []
@@ -792,14 +795,6 @@ def __init__(
         self.value_labels: list[tuple[int | float, str]] = sorted(
             value_labels.items(), key=lambda x: x[0]
         )
-
-        self.text_len = 0
-        self.txt: list[bytes] = []
-        self.n = 0
-        self.off = np.array([])
-        self.val = np.array([])
-        self.len = 0
-
         self._prepare_value_labels()
 
 
@@ -2296,6 +2291,7 @@ def __init__(
         self._variable_labels = variable_labels
         self._non_cat_value_labels = value_labels
         self._value_labels: list[StataValueLabel] = []
+        self._has_value_labels = np.array([], dtype=bool)
         self._compression = compression
         self._output_file: Buffer | None = None
         self._converted_names: dict[Hashable, str] = {}
@@ -2323,15 +2319,16 @@ def _write_bytes(self, value: bytes) -> None:
         """
         self.handles.handle.write(value)  # type: ignore[arg-type]
 
-    def _prepare_non_cat_value_labels(self, data: DataFrame) -> None:
+    def _prepare_non_cat_value_labels(
+        self, data: DataFrame
+    ) -> list[StataNonCatValueLabel]:
         """
         Check for value labels provided for non-categorical columns. Value
         labels
         """
-        self._has_value_labels = np.repeat(False, data.shape[1])
-        labelled_columns = []
+        non_cat_value_labels: list[StataNonCatValueLabel] = []
         if self._non_cat_value_labels is None:
-            return
+            return non_cat_value_labels
 
         for labname, labels in self._non_cat_value_labels.items():
             if labname in self._converted_names:
@@ -2352,11 +2349,8 @@ def _prepare_non_cat_value_labels(self, data: DataFrame) -> None:
                     "can only be applied to numeric columns."
                 )
             svl = StataNonCatValueLabel(colname, labels)
-            self._value_labels.append(svl)
-            labelled_columns.append(colname)
-
-        has_non_cat_val_labels = data.columns.isin(labelled_columns)
-        self._has_value_labels |= has_non_cat_val_labels
+            non_cat_value_labels.append(svl)
+        return non_cat_value_labels
 
     def _prepare_categoricals(self, data: DataFrame) -> DataFrame:
         """
@@ -2548,8 +2542,16 @@ def _prepare_pandas(self, data: DataFrame) -> None:
         # Replace NaNs with Stata missing values
         data = self._replace_nans(data)
 
+        # Set all columns to initially unlabelled
+        self._has_value_labels = np.repeat(False, data.shape[1])
+
         # Create value labels for non-categorical data
-        self._prepare_non_cat_value_labels(data)
+        non_cat_value_labels = self._prepare_non_cat_value_labels(data)
+
+        non_cat_columns = [svl.labname for svl in non_cat_value_labels]
+        has_non_cat_val_labels = data.columns.isin(non_cat_columns)
+        self._has_value_labels |= has_non_cat_val_labels
+        self._value_labels.extend(non_cat_value_labels)
 
         # Convert categoricals to int data, and strip labels
         data = self._prepare_categoricals(data)

From d2a55840859055505f889dd12341869b783b1554 Mon Sep 17 00:00:00 2001
From: lmcindewar <l.mcindewar@fraym.io>
Date: Mon, 21 Jun 2021 08:07:49 -0400
Subject: [PATCH 14/17] Adding versionaddeds

---
 pandas/core/frame.py | 2 +-
 pandas/io/stata.py   | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 85cd908efe0dc..c1a3f4d9298b4 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2470,7 +2470,7 @@ def to_stata(
             to labels as values. Labels for a single variable must be 32,000
             characters or smaller.
 
-            .. versionadded:: 1.3.0
+            .. versionadded:: 1.4.0
 
         Raises
         ------
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index f5ad01fa99902..35d826c038e62 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -2230,6 +2230,8 @@ class StataWriter(StataParser):
         to labels as values. The combined length of all labels for a single
         variable must be 32,000 characters or smaller.
 
+        .. versionadded:: 1.4.0
+
     Returns
     -------
     writer : StataWriter instance
@@ -3168,6 +3170,8 @@ class StataWriter117(StataWriter):
         to labels as values. The combined length of all labels for a single
         variable must be 32,000 characters or smaller.
 
+        .. versionadded:: 1.4.0
+
     Returns
     -------
     writer : StataWriter117 instance
@@ -3566,6 +3570,8 @@ class StataWriterUTF8(StataWriter117):
         to labels as values. The combined length of all labels for a single
         variable must be 32,000 characters or smaller.
 
+        .. versionadded:: 1.4.0
+
     Returns
     -------
     StataWriterUTF8

From d30d1fe2e5645a3fc4fa92ed6f059759dbce9c31 Mon Sep 17 00:00:00 2001
From: lmcindewar <l.mcindewar@fraym.io>
Date: Mon, 21 Jun 2021 08:10:06 -0400
Subject: [PATCH 15/17] Typo in spacing of docstring

---
 pandas/io/stata.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 35d826c038e62..125e669178e69 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -671,7 +671,7 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
         self._prepare_value_labels()
 
     def _prepare_value_labels(self):
-        """ Encode value labels. """
+        """Encode value labels."""
 
         self.text_len = 0
         self.txt: list[bytes] = []

From 05d4d74aa2cf09496103eada127689985861dd42 Mon Sep 17 00:00:00 2001
From: lmcindewar <l.mcindewar@fraym.io>
Date: Mon, 19 Jul 2021 09:32:40 -0400
Subject: [PATCH 16/17] Adding release note

---
 doc/source/whatsnew/v1.4.0.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 2f8cb346935a9..d20ca1a4c6e00 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -101,6 +101,8 @@ Other enhancements
 - :meth:`Series.ewm`, :meth:`DataFrame.ewm`, now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview <window.overview>` for performance and functional benefits (:issue:`42273`)
 - :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` now support the argument ``skipna`` (:issue:`34047`)
 - :meth:`read_table` now supports the argument ``storage_options`` (:issue:`39167`)
+- :meth:`DataFrame.to_stata` and :meth:`StataWriter` now accept the keyword only argument ``value_labels`` to save labels for non-categorical columns
+-
 
 .. ---------------------------------------------------------------------------
 

From 03111426cf4dc25bec4ea81af99081bc8f7b8eea Mon Sep 17 00:00:00 2001
From: lmcindewar <l.mcindewar@fraym.io>
Date: Wed, 28 Jul 2021 14:05:49 -0400
Subject: [PATCH 17/17] WSetting data in Statawriter init

---
 pandas/io/stata.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 125e669178e69..11b9e8f7009c4 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -2286,6 +2286,7 @@ def __init__(
         value_labels: dict[Hashable, dict[float | int, str]] | None = None,
     ):
         super().__init__()
+        self.data = data
         self._convert_dates = {} if convert_dates is None else convert_dates
         self._write_index = write_index
         self._time_stamp = time_stamp