Skip to content

Commit 4d1710f

Browse files
committed
refactor perf3
1 parent 892f474 commit 4d1710f

File tree

1 file changed

+38
-29
lines changed

1 file changed

+38
-29
lines changed

pandas/core/strings.py

Lines changed: 38 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -36,30 +36,28 @@
3636
_shared_docs = dict()
3737

3838

39-
def str_cat_core(array, sep):
39+
def interleave_sep(all_cols, sep):
4040
'''
4141
Auxiliary function for :meth:`str.cat`
4242
4343
Parameters
4444
----------
45-
array : ndarray
46-
Array containing the vectors to be concatenated. These vectors must be
47-
of object type and may not contain any nulls!
45+
all_cols : list of numpy arrays
46+
List of arrays to be concatenated with sep
4847
sep : string
4948
The separator string for concatenating the columns
5049
5150
Returns
5251
-------
53-
concatenated
54-
the vector of concatenated results
52+
list
53+
The list of arrays interleaved with sep; to be fed to np.sum
5554
'''
5655
if sep == '':
57-
return array.sum(axis=1)
58-
else:
59-
res = array[:, 0]
60-
for i in range(1, array.shape[1]):
61-
res += sep + array[:, i]
62-
return res
56+
# no need to add empty strings
57+
return all_cols
58+
result = [sep] * (2 * len(all_cols) - 1)
59+
result[::2] = all_cols
60+
return result
6361

6462

6563
def _na_map(f, arr, na_result=np.nan, dtype=object):
@@ -2098,11 +2096,13 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
20982096

20992097
# concatenate Series/Index with itself if no "others"
21002098
if others is None:
2101-
if na_rep is None:
2102-
data = data.dropna()
2103-
else:
2104-
data = data.fillna(na_rep)
2105-
return sep.join(data.values)
2099+
data = data.astype(object).values
2100+
mask = isna(data)
2101+
if mask.any():
2102+
if na_rep is None:
2103+
return sep.join(data[~mask])
2104+
return sep.join(np.where(mask, na_rep, data))
2105+
return sep.join(data)
21062106

21072107
try:
21082108
# turn anything in "others" into lists of Series
@@ -2145,18 +2145,27 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
21452145
join=(join if join == 'inner' else 'outer'),
21462146
keys=range(len(others)), copy=False)
21472147
data, others = data.align(others, join=join)
2148-
df = concat([data, others], axis=1, copy=False).astype('object')
2149-
else:
2150-
df = concat([data] + others, axis=1, copy=False).astype('object')
2151-
2152-
# calculate in numpy using str_cat_core; result is 1-dim np.ndarray
2153-
if na_rep is None:
2154-
mask = df.isna().values.any(axis=1)
2155-
result = np.full(len(data), fill_value=np.nan, dtype='object')
2156-
result[~mask] = str_cat_core(df.values[~mask], sep)
2157-
else:
2158-
df = df.fillna(na_rep)
2159-
result = str_cat_core(df.values, sep)
2148+
others = [others[x] for x in others] # again list of Series
2149+
2150+
all_cols = [x.astype(object).values for x in [data] + others]
2151+
masks = np.array([isna(x) for x in all_cols])
2152+
union_mask = np.logical_or.reduce(masks, axis=0)
2153+
2154+
if na_rep is None and union_mask.any():
2155+
result = np.empty(len(data), dtype=object)
2156+
np.putmask(result, union_mask, np.nan)
2157+
2158+
not_masked = ~union_mask
2159+
all_cols = interleave_sep([x[not_masked] for x in all_cols], sep)
2160+
2161+
result[not_masked] = np.sum(all_cols, axis=0)
2162+
elif na_rep is not None and union_mask.any():
2163+
# fill NaNs
2164+
all_cols = [np.where(masks[i], na_rep, all_cols[i])
2165+
for i in range(len(all_cols))]
2166+
result = np.sum(interleave_sep(all_cols, sep), axis=0)
2167+
else: # no NaNs
2168+
result = np.sum(interleave_sep(all_cols, sep), axis=0)
21602169

21612170
if isinstance(self._orig, Index):
21622171
result = Index(result, name=self._orig.name)

0 commit comments

Comments
 (0)