diff --git a/asv_bench/benchmarks/tslibs/tz_convert.py b/asv_bench/benchmarks/tslibs/tz_convert.py
new file mode 100644
index 0000000000000..2a1f559bdf6d4
--- /dev/null
+++ b/asv_bench/benchmarks/tslibs/tz_convert.py
@@ -0,0 +1,30 @@
+import numpy as np
+from pytz import UTC
+
+from pandas._libs.tslibs.tzconversion import tz_convert, tz_localize_to_utc
+
+from .tslib import _sizes, _tzs
+
+
+class TimeTZConvert:
+    params = (
+        _sizes,
+        [x for x in _tzs if x is not None],
+    )
+    param_names = ["size", "tz"]
+
+    def setup(self, size, tz):
+        arr = np.random.randint(0, 10, size=size, dtype="i8")
+        self.i8data = arr
+
+    def time_tz_convert_from_utc(self, size, tz):
+        # effectively:
+        #  dti = DatetimeIndex(self.i8data, tz=tz)
+        #  dti.tz_localize(None)
+        tz_convert(self.i8data, UTC, tz)
+
+    def time_tz_localize_to_utc(self, size, tz):
+        # effectively:
+        #  dti = DatetimeIndex(self.i8data)
+        #  dti.tz_localize(tz, ambiguous="NaT", nonexistent="NaT")
+        tz_localize_to_utc(self.i8data, tz, ambiguous="NaT", nonexistent="NaT")
diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx
index d1d6bc40ef288..9bd935940dc7b 100644
--- a/pandas/_libs/tslibs/tzconversion.pyx
+++ b/pandas/_libs/tslibs/tzconversion.pyx
@@ -551,29 +551,48 @@ cdef int64_t[:] _tz_convert_dst(
         int64_t[:] result = np.empty(n, dtype=np.int64)
         ndarray[int64_t] trans
         int64_t[:] deltas
-        int64_t v
+        int64_t v, delta
+        str typ
 
     # tz is assumed _not_ to be tzlocal; that should go
     #  through _tz_convert_tzlocal_utc
 
-    trans, deltas, _ = get_dst_info(tz)
-    if not to_utc:
-        # We add `offset` below instead of subtracting it
-        deltas = -1 * np.array(deltas, dtype='i8')
+    trans, deltas, typ = get_dst_info(tz)
 
-    # Previously, this search was done pointwise to try and benefit
-    # from getting to skip searches for iNaTs. However, it seems call
-    # overhead dominates the search time so doing it once in bulk
-    # is substantially faster (GH#24603)
-    pos = trans.searchsorted(values, side='right') - 1
+    if typ not in ["pytz", "dateutil"]:
+        # FixedOffset, we know len(deltas) == 1
+        delta = deltas[0]
 
-    for i in range(n):
-        v = values[i]
-        if v == NPY_NAT:
-            result[i] = v
-        else:
-            if pos[i] < 0:
-                raise ValueError('First time before start of DST info')
-            result[i] = v - deltas[pos[i]]
+        for i in range(n):
+            v = values[i]
+            if v == NPY_NAT:
+                result[i] = v
+            else:
+                if to_utc:
+                    result[i] = v - delta
+                else:
+                    result[i] = v + delta
+
+    else:
+        # Previously, this search was done pointwise to try and benefit
+        # from getting to skip searches for iNaTs. However, it seems call
+        # overhead dominates the search time so doing it once in bulk
+        # is substantially faster (GH#24603)
+        pos = trans.searchsorted(values, side="right") - 1
+
+        for i in range(n):
+            v = values[i]
+            if v == NPY_NAT:
+                result[i] = v
+            else:
+                if pos[i] < 0:
+                    # TODO: How is this reached?  Should we be checking for
+                    #  it elsewhere?
+                    raise ValueError("First time before start of DST info")
+
+                if to_utc:
+                    result[i] = v - deltas[pos[i]]
+                else:
+                    result[i] = v + deltas[pos[i]]
 
     return result