Skip to content

Commit 6663f5f

Browse files
Fixed #890 Corrected the number of contiguous windows in snippet (#894)
* fix n windows in naive snippet * fix n windows in performant snippet and minor bugs * improve reproducability and update notebook * set k=10 and add some explanation * minor changes * Avoid numpy divide-by-zero warning to avoid unnecessary context
1 parent d4c6a21 commit 6663f5f

File tree

3 files changed

+74
-82
lines changed

3 files changed

+74
-82
lines changed

docs/Tutorial_Time_Series_Snippets.ipynb

Lines changed: 66 additions & 68 deletions
Large diffs are not rendered by default.

stumpy/snippets.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ def _get_all_profiles(
102102

103103
right_pad = 0
104104
T_subseq_isconstant = core.process_isconstant(T, s, mpdist_T_subseq_isconstant)
105+
n_contiguous_windows = int(T.shape[0] // m)
105106
if T.shape[0] % m != 0:
106107
right_pad = int(m * np.ceil(T.shape[0] / m) - T.shape[0])
107108
pad_width = (0, right_pad)
@@ -111,12 +112,12 @@ def _get_all_profiles(
111112
)
112113

113114
n_padded = T.shape[0]
114-
D = np.empty(((n_padded // m) - 1, n_padded - m + 1), dtype=np.float64)
115+
D = np.empty((n_contiguous_windows, n_padded - m + 1), dtype=np.float64)
115116

116117
M_T, Σ_T = core.compute_mean_std(T, s)
117118

118119
# Iterate over non-overlapping subsequences, see Definition 3
119-
for i in range((n_padded // m) - 1):
120+
for i in range(n_contiguous_windows):
120121
start = i * m
121122
stop = (i + 1) * m
122123
S_i = T[start:stop]
@@ -290,17 +291,13 @@ def snippets(
290291
mpdist_T_subseq_isconstant=mpdist_T_subseq_isconstant,
291292
)
292293

293-
pad_width = (0, int(m * np.ceil(T.shape[0] / m) - T.shape[0]))
294-
T_padded = np.pad(T, pad_width, mode="constant", constant_values=np.nan)
295-
n_padded = T_padded.shape[0]
296-
297294
snippets = np.empty((k, m), dtype=np.float64)
298295
snippets_indices = np.empty(k, dtype=np.int64)
299296
snippets_profiles = np.empty((k, D.shape[-1]), dtype=np.float64)
300297
snippets_fractions = np.empty(k, dtype=np.float64)
301298
snippets_areas = np.empty(k, dtype=np.float64)
302299
Q = np.full(D.shape[-1], np.inf, dtype=np.float64)
303-
indices = np.arange(0, n_padded - m, m, dtype=np.int64)
300+
indices = np.arange(D.shape[0], dtype=np.int64) * m
304301
snippets_regimes_list = []
305302

306303
for i in range(k):

tests/naive.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1548,6 +1548,7 @@ def get_all_mpdist_profiles(
15481548

15491549
T_subseq_isconstant = rolling_isconstant(T, s, mpdist_T_subseq_isconstant)
15501550
right_pad = 0
1551+
n_contiguous_windows = int(T.shape[0] // m)
15511552
if T.shape[0] % m != 0:
15521553
right_pad = int(m * np.ceil(T.shape[0] / m) - T.shape[0])
15531554
pad_width = (0, right_pad)
@@ -1557,10 +1558,10 @@ def get_all_mpdist_profiles(
15571558
)
15581559

15591560
n_padded = T.shape[0]
1560-
D = np.empty(((n_padded // m) - 1, n_padded - m + 1))
1561+
D = np.empty((n_contiguous_windows, n_padded - m + 1))
15611562

15621563
# Iterate over non-overlapping subsequences, see Definition 3
1563-
for i in range((n_padded // m) - 1):
1564+
for i in range(n_contiguous_windows):
15641565
start = i * m
15651566
stop = (i + 1) * m
15661567
S_i = T[start:stop]
@@ -1601,17 +1602,13 @@ def mpdist_snippets(
16011602
mpdist_T_subseq_isconstant=mpdist_T_subseq_isconstant,
16021603
)
16031604

1604-
pad_width = (0, int(m * np.ceil(T.shape[0] / m) - T.shape[0]))
1605-
T_padded = np.pad(T, pad_width, mode="constant", constant_values=np.nan)
1606-
n_padded = T_padded.shape[0]
1607-
16081605
snippets = np.empty((k, m))
16091606
snippets_indices = np.empty(k, dtype=np.int64)
16101607
snippets_profiles = np.empty((k, D.shape[-1]))
16111608
snippets_fractions = np.empty(k)
16121609
snippets_areas = np.empty(k)
16131610
Q = np.inf
1614-
indices = np.arange(0, n_padded - m, m)
1611+
indices = np.arange(0, D.shape[0] * m, m)
16151612
snippets_regimes_list = []
16161613

16171614
for snippet_idx in range(k):

0 commit comments

Comments
 (0)