Skip to content

Commit e682996

Browse files
committed
Add tests for sh.hs.frontier/jobsmeta
1 parent bfddca9 commit e682996

File tree

3 files changed

+196
-198
lines changed

3 files changed

+196
-198
lines changed

tests/hubstorage/conftest.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,20 @@ def setup_test(hsclient, hsproject, hscollection, request, vcr_instance):
6363
_set_testbotgroup(hsproject)
6464
_remove_all_jobs(hsproject)
6565
_clean_collection(hscollection)
66+
_delete_frontier_slot(hsproject)
6667
yield
6768
_remove_all_jobs(hsproject)
69+
_clean_collection(hscollection)
70+
_delete_frontier_slot(hsproject)
6871
_unset_testbotgroup(hsproject)
6972

7073
# ----------------------------------------------------------------------------
7174

75+
def _delete_frontier_slot(hsproject):
76+
frontier = hsproject.frontier
77+
frontier.delete_slot(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT)
78+
79+
7280
def _clean_collection(hscollection):
7381
for item in hscollection.iter_values():
7482
hscollection.delete(item['_key'])

tests/hubstorage/test_frontier.py

Lines changed: 84 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1,128 +1,114 @@
11
"""
22
Test Frontier
33
"""
4-
from .hstestcase import HSTestCase
4+
from .conftest import TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT
55

66

7-
class FrontierTest(HSTestCase):
7+
def _get_urls(batch):
8+
return [r[0] for r in batch['requests']]
89

9-
def setUp(self):
10-
self._delete_slot()
1110

12-
def tearDown(self):
13-
self._delete_slot()
11+
def test_add_read(hsproject):
12+
frontier = hsproject.frontier
1413

15-
def _delete_slot(self):
16-
frontier = self.project.frontier
17-
frontier.delete_slot(self.frontier, self.slot)
14+
fps = [{'fp': '/'}]
15+
frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps)
16+
fps = [{'fp': '/index.html'}, {'fp': '/index2.html'}]
17+
frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps)
18+
frontier.flush()
1819

19-
def _remove_all_ids(self):
20-
frontier = self.project.frontier
21-
ids = [batch['id'] for batch in frontier.read(self.frontier, self.slot)]
22-
frontier.delete(self.frontier, self.slot, ids)
20+
urls = [_get_urls(batch) for batch
21+
in frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT)]
22+
expected_urls = [[u'/', u'/index.html', u'/index2.html']]
23+
assert urls == expected_urls
2324

24-
def _get_urls(self, batch):
25-
return [r[0] for r in batch['requests']]
2625

27-
def test_add_read(self):
28-
frontier = self.project.frontier
26+
def test_add_multiple_chunks(hsproject):
27+
frontier = hsproject.frontier
28+
old_count = frontier.newcount
2929

30-
fps = [{'fp': '/'}]
31-
frontier.add(self.frontier, self.slot, fps)
32-
fps = [{'fp': '/index.html'}, {'fp': '/index2.html'}]
33-
frontier.add(self.frontier, self.slot, fps)
34-
frontier.flush()
30+
batch_size = 50
31+
fps1 = [{'fp': '/index_%s.html' % fp} for fp in range(0, batch_size)]
32+
frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps1)
3533

36-
urls = [self._get_urls(batch) for batch
37-
in frontier.read(self.frontier, self.slot)]
38-
expected_urls = [[u'/', u'/index.html', u'/index2.html']]
39-
self.assertEqual(urls, expected_urls)
34+
fps2 = [{'fp': '/index_%s.html' % fp} for fp in range(batch_size, batch_size * 2)]
35+
frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps2)
4036

41-
def test_add_multiple_chunks(self):
42-
frontier = self.project.frontier
43-
old_count = frontier.newcount
37+
fps3 = [{'fp': '/index_%s.html' % fp} for fp in range(batch_size * 2, batch_size * 3)]
38+
frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps3)
39+
frontier.flush()
4440

45-
batch_size = 50
46-
fps1 = [{'fp': '/index_%s.html' % fp} for fp in range(0, batch_size)]
47-
frontier.add(self.frontier, self.slot, fps1)
41+
assert frontier.newcount == 150 + old_count
4842

49-
fps2 = [{'fp': '/index_%s.html' % fp} for fp in range(batch_size, batch_size * 2)]
50-
frontier.add(self.frontier, self.slot, fps2)
43+
# insert repeated fingerprints
44+
fps4 = [{'fp': '/index_%s.html' % fp} for fp in range(0, batch_size)]
45+
frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps3)
46+
frontier.flush()
5147

52-
fps3 = [{'fp': '/index_%s.html' % fp} for fp in range(batch_size * 2, batch_size * 3)]
53-
frontier.add(self.frontier, self.slot, fps3)
54-
frontier.flush()
48+
# new count is the same
49+
assert frontier.newcount == 150 + old_count
5550

56-
self.assertEqual(frontier.newcount, 150 + old_count)
51+
# get first 100
52+
batches = list(frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, mincount=100))
53+
urls = [_get_urls(batch) for batch in batches]
54+
expected_urls = [[fp['fp'] for fp in fps1 + fps2]]
55+
assert urls == expected_urls
5756

58-
# insert repeated fingerprints
59-
fps4 = [{'fp': '/index_%s.html' % fp} for fp in range(0, batch_size)]
60-
frontier.add(self.frontier, self.slot, fps3)
61-
frontier.flush()
57+
# delete first 100
58+
ids = [batch['id'] for batch in batches]
59+
frontier.delete(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, ids)
6260

63-
# new count is the same
64-
self.assertEqual(frontier.newcount, 150 + old_count)
61+
# get remaining 50
62+
batches = list(frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT))
63+
urls = [_get_urls(batch) for batch in batches]
64+
expected_urls = [[fp['fp'] for fp in fps3]]
65+
assert urls == expected_urls
6566

66-
# get first 100
67-
batches = list(frontier.read(self.frontier, self.slot, mincount=100))
68-
urls = [self._get_urls(batch) for batch in batches]
69-
expected_urls = [[fp['fp'] for fp in fps1 + fps2]]
70-
self.assertEqual(urls, expected_urls)
7167

72-
# delete first 100
73-
ids = [batch['id'] for batch in batches]
74-
frontier.delete(self.frontier, self.slot, ids)
68+
def test_add_big_chunk(hsproject):
69+
frontier = hsproject.frontier
7570

76-
# get remaining 50
77-
batches = list(frontier.read(self.frontier, self.slot))
78-
urls = [self._get_urls(batch) for batch in batches]
79-
expected_urls = [[fp['fp'] for fp in fps3]]
80-
self.assertEqual(urls, expected_urls)
71+
batch_size = 300
72+
fps1 = [{'fp': '/index_%s.html' % fp} for fp in range(0, batch_size)]
73+
frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps1)
74+
frontier.flush()
8175

82-
def test_add_big_chunk(self):
83-
frontier = self.project.frontier
76+
# get first 100
77+
batches = list(frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, mincount=100))
78+
urls = [_get_urls(batch) for batch in batches]
79+
expected_urls = [[fp['fp'] for fp in fps1[:100]]]
80+
assert urls == expected_urls
8481

85-
batch_size = 300
86-
fps1 = [{'fp': '/index_%s.html' % fp} for fp in range(0, batch_size)]
87-
frontier.add(self.frontier, self.slot, fps1)
88-
frontier.flush()
82+
# delete first 100
83+
ids = [batch['id'] for batch in batches]
84+
frontier.delete(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, ids)
8985

90-
# get first 100
91-
batches = list(frontier.read(self.frontier, self.slot, mincount=100))
92-
urls = [self._get_urls(batch) for batch in batches]
93-
expected_urls = [[fp['fp'] for fp in fps1[:100]]]
94-
self.assertEqual(urls, expected_urls)
86+
# get next 100
87+
batches = list(frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, mincount=100))
88+
urls = [_get_urls(batch) for batch in batches]
89+
expected_urls = [[fp['fp'] for fp in fps1[100:200]]]
90+
assert urls == expected_urls
9591

96-
# delete first 100
97-
ids = [batch['id'] for batch in batches]
98-
frontier.delete(self.frontier, self.slot, ids)
92+
# delete next 100
93+
ids = [batch['id'] for batch in batches]
94+
frontier.delete(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, ids)
9995

100-
# get next 100
101-
batches = list(frontier.read(self.frontier, self.slot, mincount=100))
102-
urls = [self._get_urls(batch) for batch in batches]
103-
expected_urls = [[fp['fp'] for fp in fps1[100:200]]]
104-
self.assertEqual(urls, expected_urls)
96+
# get next 100
97+
batches = list(frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, mincount=100))
98+
urls = [_get_urls(batch) for batch in batches]
99+
expected_urls = [[fp['fp'] for fp in fps1[200:300]]]
100+
assert urls == expected_urls
105101

106-
# delete next 100
107-
ids = [batch['id'] for batch in batches]
108-
frontier.delete(self.frontier, self.slot, ids)
109-
110-
# get next 100
111-
batches = list(frontier.read(self.frontier, self.slot, mincount=100))
112-
urls = [self._get_urls(batch) for batch in batches]
113-
expected_urls = [[fp['fp'] for fp in fps1[200:300]]]
114-
self.assertEqual(urls, expected_urls)
115-
116-
def test_add_extra_params(self):
117-
frontier = self.project.frontier
118-
119-
qdata = {"a": 1, "b": 2, "c": 3}
120-
fps = [{'fp': '/', "qdata": qdata}]
121-
frontier.add(self.frontier, self.slot, fps)
122-
frontier.flush()
123-
124-
expected_request = [[u'/', {u'a': 1, u'c': 3, u'b': 2}]]
125-
batches = list(frontier.read(self.frontier, self.slot))
126-
request = batches[0]['requests']
127-
self.assertEqual(request, expected_request)
128102

103+
def test_add_extra_params(hsproject):
104+
frontier = hsproject.frontier
105+
106+
qdata = {"a": 1, "b": 2, "c": 3}
107+
fps = [{'fp': '/', "qdata": qdata}]
108+
frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps)
109+
frontier.flush()
110+
111+
expected_request = [[u'/', {u'a': 1, u'c': 3, u'b': 2}]]
112+
batches = list(frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT))
113+
request = batches[0]['requests']
114+
assert request == expected_request

0 commit comments

Comments
 (0)