pandas-dev
diff --git a/‎.github/workflows/ubuntu.yml
Lines changed: 3 additions & 9 deletions b/‎.github/workflows/ubuntu.yml
Lines changed: 3 additions & 9 deletions
diff --git a/‎doc/source/user_guide/io.rst
Lines changed: 12 additions & 0 deletions b/‎doc/source/user_guide/io.rst
Lines changed: 12 additions & 0 deletions
diff --git a/‎doc/source/user_guide/style.ipynb
Lines changed: 188 additions & 104 deletions b/‎doc/source/user_guide/style.ipynb
Lines changed: 188 additions & 104 deletions
diff --git a/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 18 additions & 2 deletions b/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 18 additions & 2 deletions
diff --git a/‎pandas/_typing.py
Lines changed: 3 additions & 0 deletions b/‎pandas/_typing.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎pandas/core/config_init.py
Lines changed: 0 additions & 52 deletions b/‎pandas/core/config_init.py
Lines changed: 0 additions & 52 deletions
diff --git a/‎pandas/core/generic.py
Lines changed: 41 additions & 15 deletions b/‎pandas/core/generic.py
Lines changed: 41 additions & 15 deletions
diff --git a/‎pandas/core/groupby/generic.py
Lines changed: 19 additions & 22 deletions b/‎pandas/core/groupby/generic.py
Lines changed: 19 additions & 22 deletions
@@ -29,7 +29,7 @@ jobs:
       matrix:
         env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml]
         pattern: ["not single_cpu", "single_cpu"]
-        pyarrow_version: ["7", "8", "9", "10"]
+        pyarrow_version: ["8", "9", "10"]
         include:
           - name: "Downstream Compat"
             env_file: actions-38-downstream_compat.yaml
@@ -79,23 +79,17 @@ jobs:
             test_args: "-W error::DeprecationWarning -W error::FutureWarning"
             error_on_warnings: "0"
         exclude:
-          - env_file: actions-38.yaml
-            pyarrow_version: "7"
           - env_file: actions-38.yaml
             pyarrow_version: "8"
           - env_file: actions-38.yaml
             pyarrow_version: "9"
-          - env_file: actions-39.yaml
-            pyarrow_version: "7"
           - env_file: actions-39.yaml
             pyarrow_version: "8"
           - env_file: actions-39.yaml
             pyarrow_version: "9"
-          - env_file: actions-311.yaml
-            pyarrow_version: "7"
-          - env_file: actions-311.yaml
+          - env_file: actions-310.yaml
             pyarrow_version: "8"
-          - env_file: actions-311.yaml
+          - env_file: actions-310.yaml
             pyarrow_version: "9"
       fail-fast: false
     name: ${{ matrix.name || format('{0} pyarrow={1} {2}', matrix.env_file, matrix.pyarrow_version, matrix.pattern) }}
 
@@ -2069,6 +2069,8 @@ is ``None``. To explicitly force ``Series`` parsing, pass ``typ=series``
 * ``lines`` : reads file as one json object per line.
 * ``encoding`` : The encoding to use to decode py3 bytes.
 * ``chunksize`` : when used in combination with ``lines=True``, return a JsonReader which reads in ``chunksize`` lines per iteration.
+* ``engine``: Either ``"ujson"``, the built-in JSON parser, or ``"pyarrow"`` which dispatches to pyarrow's ``pyarrow.json.read_json``.
+  The ``"pyarrow"`` is only available when ``lines=True``
 
 The parser will raise one of ``ValueError/TypeError/AssertionError`` if the JSON is not parseable.
 
@@ -2250,6 +2252,16 @@ For line-delimited json files, pandas can also return an iterator which reads in
       for chunk in reader:
           print(chunk)
 
+Line-limited json can also be read using the pyarrow reader by specifying ``engine="pyarrow"``.
+
+.. ipython:: python
+
+   from io import BytesIO
+   df = pd.read_json(BytesIO(jsonl.encode()), lines=True, engine="pyarrow")
+   df
+
+.. versionadded:: 2.0.0
+
 .. _io.table_schema:
 
 Table schema
 
@@ -223,9 +223,12 @@ Copy-on-Write improvements
   - :meth:`DataFrame.to_period` / :meth:`Series.to_period`
   - :meth:`DataFrame.truncate`
   - :meth:`DataFrame.tz_convert` / :meth:`Series.tz_localize`
-  - :meth:`DataFrame.fillna` / :meth:`Series.fillna`
+  - :meth:`DataFrame.interpolate` / :meth:`Series.interpolate`
+  - :meth:`DataFrame.ffill` / :meth:`Series.ffill`
+  - :meth:`DataFrame.bfill` / :meth:`Series.bfill`
   - :meth:`DataFrame.infer_objects` / :meth:`Series.infer_objects`
   - :meth:`DataFrame.astype` / :meth:`Series.astype`
+  - :meth:`DataFrame.convert_dtypes` / :meth:`Series.convert_dtypes`
   - :func:`concat`
 
   These methods return views when Copy-on-Write is enabled, which provides a significant
@@ -248,6 +251,9 @@ Copy-on-Write improvements
   can never update the original Series or DataFrame. Therefore, an informative
   error is raised to the user instead of silently doing nothing (:issue:`49467`)
 
+- :meth:`DataFrame.replace` will now respect the Copy-on-Write mechanism
+  when ``inplace=True``.
+
 Copy-on-Write can be enabled through one of
 
 .. code-block:: python
@@ -299,6 +305,7 @@ Other enhancements
 - Added :meth:`DatetimeIndex.as_unit` and :meth:`TimedeltaIndex.as_unit` to convert to different resolutions; supported resolutions are "s", "ms", "us", and "ns" (:issue:`50616`)
 - Added :meth:`Series.dt.unit` and :meth:`Series.dt.as_unit` to convert to different resolutions; supported resolutions are "s", "ms", "us", and "ns" (:issue:`51223`)
 - Added new argument ``dtype`` to :func:`read_sql` to be consistent with :func:`read_sql_query` (:issue:`50797`)
+- Added new argument ``engine`` to :func:`read_json` to support parsing JSON with pyarrow by specifying ``engine="pyarrow"`` (:issue:`48893`)
 - Added support for SQLAlchemy 2.0 (:issue:`40686`)
 -
 
@@ -630,7 +637,9 @@ The arguments signature is similar, albeit ``col_space`` has been removed since
 it is ignored by LaTeX engines. This render engine also requires ``jinja2`` as a
 dependency which needs to be installed, since rendering is based upon jinja2 templates.
 
-The pandas options below are no longer used and will be removed in future releases.
+The pandas latex options below are no longer used and have been removed. The generic
+max rows and columns arguments remain but for this functionality should be replaced
+by the Styler equivalents.
 The alternative options giving similar functionality are indicated below:
 
 - ``display.latex.escape``: replaced with ``styler.format.escape``,
@@ -644,6 +653,13 @@ The alternative options giving similar functionality are indicated below:
   ``styler.render.max_rows``, ``styler.render.max_columns`` and
   ``styler.render.max_elements``.
 
+Note that due to this change some defaults have also changed:
+
+- ``multirow`` now defaults to *True*.
+- ``multirow_align`` defaults to *"r"* instead of *"l"*.
+- ``multicol_align`` defaults to *"r"* instead of *"l"*.
+- ``escape`` now defaults to *False*.
+
 Note that the behaviour of ``_repr_latex_`` is also changed. Previously
 setting ``display.latex.repr`` would generate LaTeX only when using nbconvert for a
 JupyterNotebook, and not when the user is running the notebook. Now the
 
@@ -324,6 +324,9 @@ def closed(self) -> bool:
 # read_csv engines
 CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"]
 
+# read_json engines
+JSONEngine = Literal["ujson", "pyarrow"]
+
 # read_xml parsers
 XMLParsers = Literal["lxml", "etree"]
 
 
@@ -210,13 +210,6 @@ def use_numba_cb(key) -> None:
     (default: False)
 """
 
-pc_latex_repr_doc = """
-: boolean
-    Whether to produce a latex DataFrame representation for jupyter
-    environments that support it.
-    (default: False)
-"""
-
 pc_table_schema_doc = """
 : boolean
     Whether to publish a Table Schema representation for frontends
@@ -292,41 +285,6 @@ def use_numba_cb(key) -> None:
     df.info() is called. Valid values True,False,'deep'
 """
 
-pc_latex_escape = """
-: bool
-    This specifies if the to_latex method of a Dataframe uses escapes special
-    characters.
-    Valid values: False,True
-"""
-
-pc_latex_longtable = """
-:bool
-    This specifies if the to_latex method of a Dataframe uses the longtable
-    format.
-    Valid values: False,True
-"""
-
-pc_latex_multicolumn = """
-: bool
-    This specifies if the to_latex method of a Dataframe uses multicolumns
-    to pretty-print MultiIndex columns.
-    Valid values: False,True
-"""
-
-pc_latex_multicolumn_format = """
-: string
-    This specifies the format for multicolumn headers.
-    Can be surrounded with '|'.
-    Valid values: 'l', 'c', 'r', 'p{<width>}'
-"""
-
-pc_latex_multirow = """
-: bool
-    This specifies if the to_latex method of a Dataframe uses multirows
-    to pretty-print MultiIndex rows.
-    Valid values: False,True
-"""
-
 
 def table_schema_cb(key) -> None:
     from pandas.io.formats.printing import enable_data_resource_formatter
@@ -425,16 +383,6 @@ def is_terminal() -> bool:
     cf.register_option(
         "unicode.ambiguous_as_wide", False, pc_east_asian_width_doc, validator=is_bool
     )
-    cf.register_option("latex.repr", False, pc_latex_repr_doc, validator=is_bool)
-    cf.register_option("latex.escape", True, pc_latex_escape, validator=is_bool)
-    cf.register_option("latex.longtable", False, pc_latex_longtable, validator=is_bool)
-    cf.register_option(
-        "latex.multicolumn", True, pc_latex_multicolumn, validator=is_bool
-    )
-    cf.register_option(
-        "latex.multicolumn_format", "l", pc_latex_multicolumn, validator=is_text
-    )
-    cf.register_option("latex.multirow", False, pc_latex_multirow, validator=is_bool)
     cf.register_option(
         "html.table_schema",
         False,
 
@@ -3233,30 +3233,54 @@ def to_latex(
             columns. By default, 'l' will be used for all columns except
             columns of numbers, which default to 'r'.
         longtable : bool, optional
-            By default, the value will be read from the pandas config
-            module. Use a longtable environment instead of tabular. Requires
+            Use a longtable environment instead of tabular. Requires
             adding a \usepackage{{longtable}} to your LaTeX preamble.
+            By default, the value will be read from the pandas config
+            module, and set to `True` if the option ``styler.latex.environment`` is
+            `"longtable"`.
+
+            .. versionchanged:: 2.0.0
+               The pandas option affecting this argument has changed.
         escape : bool, optional
             By default, the value will be read from the pandas config
-            module. When set to False prevents from escaping latex special
+            module and set to `True` if the option ``styler.format.escape`` is
+            `"latex"`. When set to False prevents from escaping latex special
             characters in column names.
+
+            .. versionchanged:: 2.0.0
+               The pandas option affecting this argument has changed, as has the
+               default value to `False`.
         encoding : str, optional
             A string representing the encoding to use in the output file,
             defaults to 'utf-8'.
         decimal : str, default '.'
             Character recognized as decimal separator, e.g. ',' in Europe.
         multicolumn : bool, default True
             Use \multicolumn to enhance MultiIndex columns.
-            The default will be read from the config module.
-        multicolumn_format : str, default 'l'
+            The default will be read from the config module, and is set
+            as the option ``styler.sparse.columns``.
+
+            .. versionchanged:: 2.0.0
+               The pandas option affecting this argument has changed.
+        multicolumn_format : str, default 'r'
             The alignment for multicolumns, similar to `column_format`
-            The default will be read from the config module.
-        multirow : bool, default False
+            The default will be read from the config module, and is set as the option
+            ``styler.latex.multicol_align``.
+
+            .. versionchanged:: 2.0.0
+               The pandas option affecting this argument has changed, as has the
+               default value to "r".
+        multirow : bool, default True
             Use \multirow to enhance MultiIndex rows. Requires adding a
             \usepackage{{multirow}} to your LaTeX preamble. Will print
             centered labels (instead of top-aligned) across the contained
             rows, separating groups via clines. The default will be read
-            from the pandas config module.
+            from the pandas config module, and is set as the option
+            ``styler.sparse.index``.
+
+            .. versionchanged:: 2.0.0
+               The pandas option affecting this argument has changed, as has the
+               default value to `True`.
         caption : str or tuple, optional
             Tuple (full_caption, short_caption),
             which results in ``\caption[short_caption]{{full_caption}}``;
@@ -3324,15 +3348,15 @@ def to_latex(
         if self.ndim == 1:
             self = self.to_frame()
         if longtable is None:
-            longtable = config.get_option("display.latex.longtable")
+            longtable = config.get_option("styler.latex.environment") == "longtable"
         if escape is None:
-            escape = config.get_option("display.latex.escape")
+            escape = config.get_option("styler.format.escape") == "latex"
         if multicolumn is None:
-            multicolumn = config.get_option("display.latex.multicolumn")
+            multicolumn = config.get_option("styler.sparse.columns")
         if multicolumn_format is None:
-            multicolumn_format = config.get_option("display.latex.multicolumn_format")
+            multicolumn_format = config.get_option("styler.latex.multicol_align")
         if multirow is None:
-            multirow = config.get_option("display.latex.multirow")
+            multirow = config.get_option("styler.sparse.index")
 
         if column_format is not None and not isinstance(column_format, str):
             raise ValueError("`column_format` must be str or unicode")
@@ -3418,7 +3442,9 @@ def _wrap(x, alt_format_):
             "label": label,
             "position": position,
             "column_format": column_format,
-            "clines": "skip-last;data" if multirow else None,
+            "clines": "skip-last;data"
+            if (multirow and isinstance(self.index, MultiIndex))
+            else None,
             "bold_rows": bold_rows,
         }
 
@@ -6647,7 +6673,7 @@ def convert_dtypes(
                 # https://github.com/python/mypy/issues/8354
                 return cast(NDFrameT, result)
             else:
-                return self.copy()
+                return self.copy(deep=None)
 
     # ----------------------------------------------------------------------
     # Filling NA's
 
@@ -219,16 +219,9 @@ def apply(self, func, *args, **kwargs) -> Series:
     def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
 
         if maybe_use_numba(engine):
-            data = self._obj_with_exclusions
-            result = self._aggregate_with_numba(
-                data.to_frame(), func, *args, engine_kwargs=engine_kwargs, **kwargs
+            return self._aggregate_with_numba(
+                func, *args, engine_kwargs=engine_kwargs, **kwargs
             )
-            index = self.grouper.result_index
-            result = self.obj._constructor(result.ravel(), index=index, name=data.name)
-            if not self.as_index:
-                result = self._insert_inaxis_grouper(result)
-                result.index = default_index(len(result))
-            return result
 
         relabeling = func is None
         columns = None
@@ -1261,16 +1254,9 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
     def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
 
         if maybe_use_numba(engine):
-            data = self._obj_with_exclusions
-            result = self._aggregate_with_numba(
-                data, func, *args, engine_kwargs=engine_kwargs, **kwargs
+            return self._aggregate_with_numba(
+                func, *args, engine_kwargs=engine_kwargs, **kwargs
             )
-            index = self.grouper.result_index
-            result = self.obj._constructor(result, index=index, columns=data.columns)
-            if not self.as_index:
-                result = self._insert_inaxis_grouper(result)
-                result.index = default_index(len(result))
-            return result
 
         relabeling, func, columns, order = reconstruct_func(func, **kwargs)
         func = maybe_mangle_lambdas(func)
@@ -1283,7 +1269,12 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
             # this should be the only (non-raising) case with relabeling
             # used reordered index of columns
             result = result.iloc[:, order]
-            result.columns = columns
+            result = cast(DataFrame, result)
+            # error: Incompatible types in assignment (expression has type
+            # "Optional[List[str]]", variable has type
+            # "Union[Union[Union[ExtensionArray, ndarray[Any, Any]],
+            # Index, Series], Sequence[Any]]")
+            result.columns = columns  # type: ignore[assignment]
 
         if result is None:
 
@@ -1312,11 +1303,18 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
                 except ValueError as err:
                     if "No objects to concatenate" not in str(err):
                         raise
+                    # _aggregate_frame can fail with e.g. func=Series.mode,
+                    # where it expects 1D values but would be getting 2D values
+                    # In other tests, using aggregate_frame instead of GroupByApply
+                    #  would give correct values but incorrect dtypes
+                    #  object vs float64 in test_cython_agg_empty_buckets
+                    #  float64 vs int64 in test_category_order_apply
                     result = self._aggregate_frame(func)
 
                 else:
                     # GH#32040, GH#35246
                     # e.g. test_groupby_as_index_select_column_sum_empty_df
+                    result = cast(DataFrame, result)
                     result.columns = self._obj_with_exclusions.columns.copy()
 
         if not self.as_index:
@@ -1502,8 +1500,7 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike:
         res_mgr.set_axis(1, mgr.axes[1])
 
         res_df = self.obj._constructor(res_mgr)
-        if self.axis == 1:
-            res_df = res_df.T
+        res_df = self._maybe_transpose_result(res_df)
         return res_df
 
     def _transform_general(self, func, *args, **kwargs):
@@ -1830,7 +1827,7 @@ def _iterate_column_groupbys(self, obj: DataFrame | Series):
                 observed=self.observed,
             )
 
-    def _apply_to_column_groupbys(self, func, obj: DataFrame | Series) -> DataFrame:
+    def _apply_to_column_groupbys(self, func, obj: DataFrame) -> DataFrame:
         from pandas.core.reshape.concat import concat
 
         columns = obj.columns