diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index ad5af5df710ba..9f7aff0a30bd3 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -238,6 +238,8 @@ Other enhancements
 - :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`)
 -
 - Added methods :meth:`IntegerArray.prod`, :meth:`IntegerArray.min`, and :meth:`IntegerArray.max` (:issue:`33790`)
+- Calling a NumPy ufunc on a ``DataFrame`` with extension types now preserves the extension types when possible (:issue:`23743`).
+- Calling a binary-input NumPy ufunc on multiple ``DataFrame`` objects now aligns, matching the behavior of binary operations and ufuncs on ``Series`` (:issue:`23743`).
 - Where possible :meth:`RangeIndex.difference` and :meth:`RangeIndex.symmetric_difference` will return :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`36564`)
 - :meth:`DataFrame.to_parquet` now supports :class:`MultiIndex` for columns in parquet format (:issue:`34777`)
 - Added :meth:`Rolling.sem()` and :meth:`Expanding.sem()` to compute the standard error of mean (:issue:`26476`).
@@ -470,6 +472,7 @@ Deprecations
 - The default value of ``regex`` for :meth:`Series.str.replace` will change from ``True`` to ``False`` in a future release. In addition, single character regular expressions will *not* be treated as literal strings when ``regex=True`` is set. (:issue:`24804`)
 - Deprecated automatic alignment on comparison operations between :class:`DataFrame` and :class:`Series`, do ``frame, ser = frame.align(ser, axis=1, copy=False)`` before e.g. ``frame == ser`` (:issue:`28759`)
 - :meth:`Rolling.count` with ``min_periods=None`` will default to the size of the window in a future version (:issue:`31302`)
+- Using "outer" ufuncs on DataFrames to return 4d ndarray is now deprecated. Convert to an ndarray first (:issue:`23743`)
 - Deprecated slice-indexing on timezone-aware :class:`DatetimeIndex` with naive ``datetime`` objects, to match scalar indexing behavior (:issue:`36148`)
 - :meth:`Index.ravel` returning a ``np.ndarray`` is deprecated, in the future this will return a view on the same index (:issue:`19956`)
 - Deprecate use of strings denoting units with 'M', 'Y' or 'y' in :func:`~pandas.to_timedelta` (:issue:`36666`)
@@ -750,6 +753,7 @@ Other
 
 - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`)
 - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`)
+- Fixed metadata propagation in :meth:`Series.abs` and ufuncs called on Series and DataFrames (:issue:`28283`)
 - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly casting from ``PeriodDtype`` to object dtype (:issue:`34871`)
 - Fixed bug in metadata propagation incorrectly copying DataFrame columns as metadata when the column name overlaps with the metadata name (:issue:`37037`)
 - Fixed metadata propagation in the :class:`Series.dt`, :class:`Series.str` accessors, :class:`DataFrame.duplicated`, :class:`DataFrame.stack`, :class:`DataFrame.unstack`, :class:`DataFrame.pivot`, :class:`DataFrame.append`, :class:`DataFrame.diff`, :class:`DataFrame.applymap` and :class:`DataFrame.update` methods (:issue:`28283`) (:issue:`37381`)
diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py
index da366c9abf0a4..6b28f8f135769 100644
--- a/pandas/core/arraylike.py
+++ b/pandas/core/arraylike.py
@@ -5,8 +5,15 @@
     ExtensionArray
 """
 import operator
+from typing import Any, Callable
+import warnings
 
-from pandas.core.ops import roperator
+import numpy as np
+
+from pandas._libs import lib
+
+from pandas.core.construction import extract_array
+from pandas.core.ops import maybe_dispatch_ufunc_to_dunder_op, roperator
 from pandas.core.ops.common import unpack_zerodim_and_defer
 
 
@@ -140,3 +147,138 @@ def __pow__(self, other):
     @unpack_zerodim_and_defer("__rpow__")
     def __rpow__(self, other):
         return self._arith_method(other, roperator.rpow)
+
+
+def array_ufunc(self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any):
+    """
+    Compatibility with numpy ufuncs.
+
+    See also
+    --------
+    numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__
+    """
+    from pandas.core.generic import NDFrame
+    from pandas.core.internals import BlockManager
+
+    cls = type(self)
+
+    # for binary ops, use our custom dunder methods
+    result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs)
+    if result is not NotImplemented:
+        return result
+
+    # Determine if we should defer.
+    no_defer = (np.ndarray.__array_ufunc__, cls.__array_ufunc__)
+
+    for item in inputs:
+        higher_priority = (
+            hasattr(item, "__array_priority__")
+            and item.__array_priority__ > self.__array_priority__
+        )
+        has_array_ufunc = (
+            hasattr(item, "__array_ufunc__")
+            and type(item).__array_ufunc__ not in no_defer
+            and not isinstance(item, self._HANDLED_TYPES)
+        )
+        if higher_priority or has_array_ufunc:
+            return NotImplemented
+
+    # align all the inputs.
+    types = tuple(type(x) for x in inputs)
+    alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)]
+
+    if len(alignable) > 1:
+        # This triggers alignment.
+        # At the moment, there aren't any ufuncs with more than two inputs
+        # so this ends up just being x1.index | x2.index, but we write
+        # it to handle *args.
+
+        if len(set(types)) > 1:
+            # We currently don't handle ufunc(DataFrame, Series)
+            # well. Previously this raised an internal ValueError. We might
+            # support it someday, so raise a NotImplementedError.
+            raise NotImplementedError(
+                "Cannot apply ufunc {} to mixed DataFrame and Series "
+                "inputs.".format(ufunc)
+            )
+        axes = self.axes
+        for obj in alignable[1:]:
+            # this relies on the fact that we aren't handling mixed
+            # series / frame ufuncs.
+            for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)):
+                axes[i] = ax1.union(ax2)
+
+        reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes))
+        inputs = tuple(
+            x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x
+            for x, t in zip(inputs, types)
+        )
+    else:
+        reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes))
+
+    if self.ndim == 1:
+        names = [getattr(x, "name") for x in inputs if hasattr(x, "name")]
+        name = names[0] if len(set(names)) == 1 else None
+        reconstruct_kwargs = {"name": name}
+    else:
+        reconstruct_kwargs = {}
+
+    def reconstruct(result):
+        if lib.is_scalar(result):
+            return result
+        if result.ndim != self.ndim:
+            if method == "outer":
+                if self.ndim == 2:
+                    # we already deprecated for Series
+                    msg = (
+                        "outer method for ufunc {} is not implemented on "
+                        "pandas objects. Returning an ndarray, but in the "
+                        "future this will raise a 'NotImplementedError'. "
+                        "Consider explicitly converting the DataFrame "
+                        "to an array with '.to_numpy()' first."
+                    )
+                    warnings.warn(msg.format(ufunc), FutureWarning, stacklevel=4)
+                    return result
+                raise NotImplementedError
+            return result
+        if isinstance(result, BlockManager):
+            # we went through BlockManager.apply
+            result = self._constructor(result, **reconstruct_kwargs, copy=False)
+        else:
+            # we converted an array, lost our axes
+            result = self._constructor(
+                result, **reconstruct_axes, **reconstruct_kwargs, copy=False
+            )
+        # TODO: When we support multiple values in __finalize__, this
+        # should pass alignable to `__fianlize__` instead of self.
+        # Then `np.add(a, b)` would consider attrs from both a and b
+        # when a and b are NDFrames.
+        if len(alignable) == 1:
+            result = result.__finalize__(self)
+        return result
+
+    if self.ndim > 1 and (
+        len(inputs) > 1 or ufunc.nout > 1  # type: ignore[attr-defined]
+    ):
+        # Just give up on preserving types in the complex case.
+        # In theory we could preserve them for them.
+        # * nout>1 is doable if BlockManager.apply took nout and
+        #   returned a Tuple[BlockManager].
+        # * len(inputs) > 1 is doable when we know that we have
+        #   aligned blocks / dtypes.
+        inputs = tuple(np.asarray(x) for x in inputs)
+        result = getattr(ufunc, method)(*inputs)
+    elif self.ndim == 1:
+        # ufunc(series, ...)
+        inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)
+        result = getattr(ufunc, method)(*inputs, **kwargs)
+    else:
+        # ufunc(dataframe)
+        mgr = inputs[0]._mgr
+        result = mgr.apply(getattr(ufunc, method))
+
+    if ufunc.nout > 1:  # type: ignore[attr-defined]
+        result = tuple(reconstruct(x) for x in result)
+    else:
+        result = reconstruct(result)
+    return result
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5b87c4ea8b9cc..9c70f3557e339 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -434,6 +434,7 @@ class DataFrame(NDFrame, OpsMixin):
 
     _internal_names_set = {"columns", "index"} | NDFrame._internal_names_set
     _typ = "dataframe"
+    _HANDLED_TYPES = (Series, Index, ExtensionArray, np.ndarray)
 
     @property
     def _constructor(self) -> Type[DataFrame]:
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 7e8012d76fe1b..e866314f00639 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -87,7 +87,7 @@
 from pandas.core.dtypes.missing import isna, notna
 
 import pandas as pd
-from pandas.core import indexing, missing, nanops
+from pandas.core import arraylike, indexing, missing, nanops
 import pandas.core.algorithms as algos
 from pandas.core.base import PandasObject, SelectionMixin
 import pandas.core.common as com
@@ -1927,6 +1927,11 @@ def __array_wrap__(
             self, method="__array_wrap__"
         )
 
+    def __array_ufunc__(
+        self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any
+    ):
+        return arraylike.array_ufunc(self, ufunc, method, *inputs, **kwargs)
+
     # ideally we would define this to avoid the getattr checks, but
     # is slower
     # @property
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 4c3ad38c8a922..1e4c0e07de403 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -176,6 +176,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
     """
 
     _typ = "series"
+    _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray)
 
     _name: Label
     _metadata: List[str] = ["name"]
@@ -683,81 +684,6 @@ def view(self, dtype=None) -> "Series":
     # NDArray Compat
     _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray)
 
-    def __array_ufunc__(
-        self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any
-    ):
-        # TODO: handle DataFrame
-        cls = type(self)
-
-        # for binary ops, use our custom dunder methods
-        result = ops.maybe_dispatch_ufunc_to_dunder_op(
-            self, ufunc, method, *inputs, **kwargs
-        )
-        if result is not NotImplemented:
-            return result
-
-        # Determine if we should defer.
-        no_defer = (np.ndarray.__array_ufunc__, cls.__array_ufunc__)
-
-        for item in inputs:
-            higher_priority = (
-                hasattr(item, "__array_priority__")
-                and item.__array_priority__ > self.__array_priority__
-            )
-            has_array_ufunc = (
-                hasattr(item, "__array_ufunc__")
-                and type(item).__array_ufunc__ not in no_defer
-                and not isinstance(item, self._HANDLED_TYPES)
-            )
-            if higher_priority or has_array_ufunc:
-                return NotImplemented
-
-        # align all the inputs.
-        names = [getattr(x, "name") for x in inputs if hasattr(x, "name")]
-        types = tuple(type(x) for x in inputs)
-        # TODO: dataframe
-        alignable = [x for x, t in zip(inputs, types) if issubclass(t, Series)]
-
-        if len(alignable) > 1:
-            # This triggers alignment.
-            # At the moment, there aren't any ufuncs with more than two inputs
-            # so this ends up just being x1.index | x2.index, but we write
-            # it to handle *args.
-            index = alignable[0].index
-            for s in alignable[1:]:
-                index = index.union(s.index)
-            inputs = tuple(
-                x.reindex(index) if issubclass(t, Series) else x
-                for x, t in zip(inputs, types)
-            )
-        else:
-            index = self.index
-
-        inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)
-        result = getattr(ufunc, method)(*inputs, **kwargs)
-
-        name = names[0] if len(set(names)) == 1 else None
-
-        def construct_return(result):
-            if lib.is_scalar(result):
-                return result
-            elif result.ndim > 1:
-                # e.g. np.subtract.outer
-                if method == "outer":
-                    # GH#27198
-                    raise NotImplementedError
-                return result
-            return self._constructor(result, index=index, name=name, copy=False)
-
-        if type(result) is tuple:
-            # multiple return values
-            return tuple(construct_return(x) for x in result)
-        elif method == "at":
-            # no return value
-            return None
-        else:
-            return construct_return(result)
-
     def __array__(self, dtype=None) -> np.ndarray:
         """
         Return the values as a NumPy array.
diff --git a/pandas/tests/frame/test_ufunc.py b/pandas/tests/frame/test_ufunc.py
new file mode 100644
index 0000000000000..7bc9aa29af3b4
--- /dev/null
+++ b/pandas/tests/frame/test_ufunc.py
@@ -0,0 +1,111 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas._testing as tm
+
+dtypes = [
+    "int64",
+    "Int64",
+    dict(A="int64", B="Int64"),
+]
+
+
+@pytest.mark.parametrize("dtype", dtypes)
+def test_unary_unary(dtype):
+    # unary input, unary output
+    values = np.array([[-1, -1], [1, 1]], dtype="int64")
+    df = pd.DataFrame(values, columns=["A", "B"], index=["a", "b"]).astype(dtype=dtype)
+    result = np.positive(df)
+    expected = pd.DataFrame(
+        np.positive(values), index=df.index, columns=df.columns
+    ).astype(dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("dtype", dtypes)
+def test_unary_binary(dtype):
+    # unary input, binary output
+    if pd.api.types.is_extension_array_dtype(dtype) or isinstance(dtype, dict):
+        pytest.xfail(reason="Extension / mixed with multiple outuputs not implemented.")
+
+    values = np.array([[-1, -1], [1, 1]], dtype="int64")
+    df = pd.DataFrame(values, columns=["A", "B"], index=["a", "b"]).astype(dtype=dtype)
+    result_pandas = np.modf(df)
+    assert isinstance(result_pandas, tuple)
+    assert len(result_pandas) == 2
+    expected_numpy = np.modf(values)
+
+    for result, b in zip(result_pandas, expected_numpy):
+        expected = pd.DataFrame(b, index=df.index, columns=df.columns)
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("dtype", dtypes)
+def test_binary_input_dispatch_binop(dtype):
+    # binop ufuncs are dispatched to our dunder methods.
+    values = np.array([[-1, -1], [1, 1]], dtype="int64")
+    df = pd.DataFrame(values, columns=["A", "B"], index=["a", "b"]).astype(dtype=dtype)
+    result = np.add(df, df)
+    expected = pd.DataFrame(
+        np.add(values, values), index=df.index, columns=df.columns
+    ).astype(dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("dtype_a", dtypes)
+@pytest.mark.parametrize("dtype_b", dtypes)
+def test_binary_input_aligns_columns(dtype_a, dtype_b):
+    if (
+        pd.api.types.is_extension_array_dtype(dtype_a)
+        or isinstance(dtype_a, dict)
+        or pd.api.types.is_extension_array_dtype(dtype_b)
+        or isinstance(dtype_b, dict)
+    ):
+        pytest.xfail(reason="Extension / mixed with multiple inputs not implemented.")
+
+    df1 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}).astype(dtype_a)
+
+    if isinstance(dtype_a, dict) and isinstance(dtype_b, dict):
+        dtype_b["C"] = dtype_b.pop("B")
+
+    df2 = pd.DataFrame({"A": [1, 2], "C": [3, 4]}).astype(dtype_b)
+    result = np.heaviside(df1, df2)
+    expected = np.heaviside(
+        np.array([[1, 3, np.nan], [2, 4, np.nan]]),
+        np.array([[1, np.nan, 3], [2, np.nan, 4]]),
+    )
+    expected = pd.DataFrame(expected, index=[0, 1], columns=["A", "B", "C"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("dtype", dtypes)
+def test_binary_input_aligns_index(dtype):
+    if pd.api.types.is_extension_array_dtype(dtype) or isinstance(dtype, dict):
+        pytest.xfail(reason="Extension / mixed with multiple inputs not implemented.")
+    df1 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).astype(dtype)
+    df2 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "c"]).astype(dtype)
+    result = np.heaviside(df1, df2)
+    expected = np.heaviside(
+        np.array([[1, 3], [3, 4], [np.nan, np.nan]]),
+        np.array([[1, 3], [np.nan, np.nan], [3, 4]]),
+    )
+    # TODO(FloatArray): this will be Float64Dtype.
+    expected = pd.DataFrame(expected, index=["a", "b", "c"], columns=["A", "B"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_binary_frame_series_raises():
+    # We don't currently implement
+    df = pd.DataFrame({"A": [1, 2]})
+    with pytest.raises(NotImplementedError, match="logaddexp"):
+        np.logaddexp(df, df["A"])
+
+    with pytest.raises(NotImplementedError, match="logaddexp"):
+        np.logaddexp(df["A"], df)
+
+
+def test_frame_outer_deprecated():
+    df = pd.DataFrame({"A": [1, 2]})
+    with tm.assert_produces_warning(FutureWarning):
+        np.subtract.outer(df, df)
diff --git a/pandas/tests/generic/test_duplicate_labels.py b/pandas/tests/generic/test_duplicate_labels.py
index 3f7bebd86e983..300f4cd72573a 100644
--- a/pandas/tests/generic/test_duplicate_labels.py
+++ b/pandas/tests/generic/test_duplicate_labels.py
@@ -37,8 +37,8 @@ def test_construction_ok(self, cls, data):
             operator.methodcaller("add", 1),
             operator.methodcaller("rename", str.upper),
             operator.methodcaller("rename", "name"),
-            pytest.param(operator.methodcaller("abs"), marks=not_implemented),
-            # TODO: test np.abs
+            operator.methodcaller("abs"),
+            np.abs,
         ],
     )
     def test_preserved_series(self, func):
diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py
index ecd70bb415334..4974d3fff1df4 100644
--- a/pandas/tests/generic/test_finalize.py
+++ b/pandas/tests/generic/test_finalize.py
@@ -302,7 +302,7 @@
     (pd.DataFrame, frame_data, operator.inv),
     (pd.Series, [1], operator.inv),
     (pd.DataFrame, frame_data, abs),
-    pytest.param((pd.Series, [1], abs), marks=not_implemented_mark),
+    (pd.Series, [1], abs),
     pytest.param((pd.DataFrame, frame_data, round), marks=not_implemented_mark),
     (pd.Series, [1], round),
     (pd.DataFrame, frame_data, operator.methodcaller("take", [0, 0])),