diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index 86a255321f827..0b9e56fd19556 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -1077,7 +1077,7 @@ def fill_binop(left, right, fill_value):
     return left, right
 
 
-def mask_cmp_op(x, y, op, allowed_types):
+def mask_cmp_op(x, y, op):
     """
     Apply the function `op` to only non-null points in x and y.
 
@@ -1086,16 +1086,14 @@ def mask_cmp_op(x, y, op, allowed_types):
     x : array-like
     y : array-like
     op : binary operation
-    allowed_types : class or tuple of classes
 
     Returns
     -------
     result : ndarray[bool]
     """
-    # TODO: Can we make the allowed_types arg unnecessary?
     xrav = x.ravel()
     result = np.empty(x.size, dtype=bool)
-    if isinstance(y, allowed_types):
+    if isinstance(y, (np.ndarray, ABCSeries)):
         yrav = y.ravel()
         mask = notna(xrav) & notna(yrav)
         result[mask] = op(np.array(list(xrav[mask])),
@@ -1633,39 +1631,38 @@ def _arith_method_SERIES(cls, op, special):
                         if op in [divmod, rdivmod] else _construct_result)
 
     def na_op(x, y):
-        import pandas.core.computation.expressions as expressions
-        try:
-            result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs)
-        except TypeError:
-            result = masked_arith_op(x, y, op)
-
-        result = missing.fill_zeros(result, x, y, op_name, fill_zeros)
-        return result
-
-    def safe_na_op(lvalues, rvalues):
         """
-        return the result of evaluating na_op on the passed in values
+        Return the result of evaluating op on the passed in values.
 
-        try coercion to object type if the native types are not compatible
+        If native types are not compatible, try coersion to object dtype.
 
         Parameters
         ----------
-        lvalues : array-like
-        rvalues : array-like
+        x : array-like
+        y : array-like or scalar
+
+        Returns
+        -------
+        array-like
 
         Raises
         ------
-        TypeError: invalid operation
+        TypeError : invalid operation
         """
+        import pandas.core.computation.expressions as expressions
         try:
-            with np.errstate(all='ignore'):
-                return na_op(lvalues, rvalues)
-        except Exception:
-            if is_object_dtype(lvalues):
-                return libalgos.arrmap_object(lvalues,
-                                              lambda x: op(x, rvalues))
+            result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs)
+        except TypeError:
+            result = masked_arith_op(x, y, op)
+        except Exception:  # TODO: more specific?
+            if is_object_dtype(x):
+                return libalgos.arrmap_object(x,
+                                              lambda val: op(val, y))
             raise
 
+        result = missing.fill_zeros(result, x, y, op_name, fill_zeros)
+        return result
+
     def wrapper(left, right):
         if isinstance(right, ABCDataFrame):
             return NotImplemented
@@ -1713,7 +1710,8 @@ def wrapper(left, right):
         if isinstance(rvalues, ABCSeries):
             rvalues = rvalues.values
 
-        result = safe_na_op(lvalues, rvalues)
+        with np.errstate(all='ignore'):
+            result = na_op(lvalues, rvalues)
         return construct_result(left, result,
                                 index=left.index, name=res_name, dtype=None)
 
@@ -2136,7 +2134,6 @@ def na_op(x, y):
             result = masked_arith_op(x, y, op)
 
         result = missing.fill_zeros(result, x, y, op_name, fill_zeros)
-
         return result
 
     if op_name in _op_descriptions:
@@ -2183,7 +2180,7 @@ def na_op(x, y):
             with np.errstate(invalid='ignore'):
                 result = op(x, y)
         except TypeError:
-            result = mask_cmp_op(x, y, op, (np.ndarray, ABCSeries))
+            result = mask_cmp_op(x, y, op)
         return result
 
     doc = _flex_comp_doc_FRAME.format(op_name=op_name,
diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
index afd29852fea7e..64b4e162483f1 100644
--- a/pandas/tests/arithmetic/test_datetime64.py
+++ b/pandas/tests/arithmetic/test_datetime64.py
@@ -37,6 +37,27 @@ def assert_all(obj):
 # ------------------------------------------------------------------
 # Comparisons
 
+class TestDatetime64ArrayLikeComparisons:
+    # Comparison tests for datetime64 vectors fully parametrized over
+    #  DataFrame/Series/DatetimeIndex/DateteimeArray.  Ideally all comparison
+    #  tests will eventually end up here.
+
+    def test_compare_zerodim(self, tz_naive_fixture, box_with_array):
+        # Test comparison with zero-dimensional array is unboxed
+        tz = tz_naive_fixture
+        box = box_with_array
+        xbox = box_with_array if box_with_array is not pd.Index else np.ndarray
+        dti = date_range('20130101', periods=3, tz=tz)
+
+        other = np.array(dti.to_numpy()[0])
+
+        # FIXME: ValueError with transpose on tzaware
+        dtarr = tm.box_expected(dti, box, transpose=False)
+        result = dtarr <= other
+        expected = np.array([True, False, False])
+        expected = tm.box_expected(expected, xbox, transpose=False)
+        tm.assert_equal(result, expected)
+
 
 class TestDatetime64DataFrameComparison:
     @pytest.mark.parametrize('timestamps', [
@@ -339,17 +360,6 @@ def test_comparison_tzawareness_compat(self, op):
 
 class TestDatetimeIndexComparisons:
 
-    # TODO: parametrize over box
-    def test_compare_zerodim(self, tz_naive_fixture):
-        # Test comparison with zero-dimensional array is unboxed
-        tz = tz_naive_fixture
-        dti = date_range('20130101', periods=3, tz=tz)
-
-        other = np.array(dti.to_numpy()[0])
-        result = dti <= other
-        expected = np.array([True, False, False])
-        tm.assert_numpy_array_equal(result, expected)
-
     # TODO: moved from tests.indexes.test_base; parametrize and de-duplicate
     @pytest.mark.parametrize("op", [
         operator.eq, operator.ne, operator.gt, operator.lt,
diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py
index bc1b78bf944d1..413d58d9429e7 100644
--- a/pandas/tests/arithmetic/test_period.py
+++ b/pandas/tests/arithmetic/test_period.py
@@ -20,17 +20,27 @@
 # Comparisons
 
 
-class TestPeriodIndexComparisons:
+class TestPeriodArrayLikeComparisons:
+    # Comparison tests for PeriodDtype vectors fully parametrized over
+    #  DataFrame/Series/PeriodIndex/PeriodArray.  Ideally all comparison
+    #  tests will eventually end up here.
 
-    # TODO: parameterize over boxes
-    def test_compare_zerodim(self):
+    def test_compare_zerodim(self, box_with_array):
         # GH#26689 make sure we unbox zero-dimensional arrays
+        xbox = box_with_array if box_with_array is not pd.Index else np.ndarray
+
         pi = pd.period_range('2000', periods=4)
         other = np.array(pi.to_numpy()[0])
 
+        pi = tm.box_expected(pi, box_with_array)
         result = pi <= other
         expected = np.array([True, False, False, False])
-        tm.assert_numpy_array_equal(result, expected)
+        expected = tm.box_expected(expected, xbox)
+        tm.assert_equal(result, expected)
+
+
+class TestPeriodIndexComparisons:
+    # TODO: parameterize over boxes
 
     @pytest.mark.parametrize("other", ["2017", 2017])
     def test_eq(self, other):
diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py
index 047900c3d7586..22b5fd452d661 100644
--- a/pandas/tests/arithmetic/test_timedelta64.py
+++ b/pandas/tests/arithmetic/test_timedelta64.py
@@ -31,22 +31,33 @@ def get_upcast_box(box, vector):
 # ------------------------------------------------------------------
 # Timedelta64[ns] dtype Comparisons
 
-class TestTimedelta64ArrayComparisons:
-    # TODO: All of these need to be parametrized over box
+class TestTimedelta64ArrayLikeComparisons:
+    # Comparison tests for timedelta64[ns] vectors fully parametrized over
+    #  DataFrame/Series/TimedeltaIndex/TimedeltaArray.  Ideally all comparison
+    #  tests will eventually end up here.
 
-    def test_compare_timedelta64_zerodim(self):
+    def test_compare_timedelta64_zerodim(self, box_with_array):
         # GH#26689 should unbox when comparing with zerodim array
+        box = box_with_array
+        xbox = box_with_array if box_with_array is not pd.Index else np.ndarray
+
         tdi = pd.timedelta_range('2H', periods=4)
         other = np.array(tdi.to_numpy()[0])
 
+        tdi = tm.box_expected(tdi, box)
         res = tdi <= other
         expected = np.array([True, False, False, False])
-        tm.assert_numpy_array_equal(res, expected)
+        expected = tm.box_expected(expected, xbox)
+        tm.assert_equal(res, expected)
 
         with pytest.raises(TypeError):
             # zero-dim of wrong dtype should still raise
             tdi >= np.array(4)
 
+
+class TestTimedelta64ArrayComparisons:
+    # TODO: All of these need to be parametrized over box
+
     def test_compare_timedelta_series(self):
         # regression test for GH#5963
         s = pd.Series([timedelta(days=1), timedelta(days=2)])
diff --git a/pandas/tests/tseries/offsets/test_offsets_properties.py b/pandas/tests/tseries/offsets/test_offsets_properties.py
index 50be2deca4d30..271f4ceef5f49 100644
--- a/pandas/tests/tseries/offsets/test_offsets_properties.py
+++ b/pandas/tests/tseries/offsets/test_offsets_properties.py
@@ -71,7 +71,10 @@ def test_on_offset_implementations(dt, offset):
     assert offset.onOffset(dt) == (compare == dt)
 
 
-@pytest.mark.xfail
+@pytest.mark.xfail(reason="res_v2 below is incorrect, needs to use the "
+                          "commented-out version with tz_localize.  "
+                          "But with that fix in place, hypothesis then "
+                          "has errors in timezone generation.")
 @given(gen_yqm_offset, gen_date_range)
 def test_apply_index_implementations(offset, rng):
     # offset.apply_index(dti)[i] should match dti[i] + offset
@@ -82,6 +85,7 @@ def test_apply_index_implementations(offset, rng):
 
     res = rng + offset
     res_v2 = offset.apply_index(rng)
+    # res_v2 = offset.apply_index(rng.tz_localize(None)).tz_localize(rng.tz)
     assert (res == res_v2).all()
 
     assert res[0] == rng[0] + offset
@@ -93,7 +97,7 @@ def test_apply_index_implementations(offset, rng):
     # TODO: Check randomly assorted entries, not just first/last
 
 
-@pytest.mark.xfail
+@pytest.mark.xfail  # TODO: reason?
 @given(gen_yqm_offset)
 def test_shift_across_dst(offset):
     # GH#18319 check that 1) timezone is correctly normalized and