|
1 | 1 | """
|
2 | 2 | Test Frontier
|
3 | 3 | """
|
4 |
| -from .hstestcase import HSTestCase |
| 4 | +from .conftest import TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT |
5 | 5 |
|
6 | 6 |
|
7 |
| -class FrontierTest(HSTestCase): |
| 7 | +def _get_urls(batch): |
| 8 | + return [r[0] for r in batch['requests']] |
8 | 9 |
|
9 |
| - def setUp(self): |
10 |
| - self._delete_slot() |
11 | 10 |
|
12 |
| - def tearDown(self): |
13 |
| - self._delete_slot() |
| 11 | +def test_add_read(hsproject): |
| 12 | + frontier = hsproject.frontier |
14 | 13 |
|
15 |
| - def _delete_slot(self): |
16 |
| - frontier = self.project.frontier |
17 |
| - frontier.delete_slot(self.frontier, self.slot) |
| 14 | + fps = [{'fp': '/'}] |
| 15 | + frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps) |
| 16 | + fps = [{'fp': '/index.html'}, {'fp': '/index2.html'}] |
| 17 | + frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps) |
| 18 | + frontier.flush() |
18 | 19 |
|
19 |
| - def _remove_all_ids(self): |
20 |
| - frontier = self.project.frontier |
21 |
| - ids = [batch['id'] for batch in frontier.read(self.frontier, self.slot)] |
22 |
| - frontier.delete(self.frontier, self.slot, ids) |
| 20 | + urls = [_get_urls(batch) for batch |
| 21 | + in frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT)] |
| 22 | + expected_urls = [[u'/', u'/index.html', u'/index2.html']] |
| 23 | + assert urls == expected_urls |
23 | 24 |
|
24 |
| - def _get_urls(self, batch): |
25 |
| - return [r[0] for r in batch['requests']] |
26 | 25 |
|
27 |
| - def test_add_read(self): |
28 |
| - frontier = self.project.frontier |
| 26 | +def test_add_multiple_chunks(hsproject): |
| 27 | + frontier = hsproject.frontier |
| 28 | + old_count = frontier.newcount |
29 | 29 |
|
30 |
| - fps = [{'fp': '/'}] |
31 |
| - frontier.add(self.frontier, self.slot, fps) |
32 |
| - fps = [{'fp': '/index.html'}, {'fp': '/index2.html'}] |
33 |
| - frontier.add(self.frontier, self.slot, fps) |
34 |
| - frontier.flush() |
| 30 | + batch_size = 50 |
| 31 | + fps1 = [{'fp': '/index_%s.html' % fp} for fp in range(0, batch_size)] |
| 32 | + frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps1) |
35 | 33 |
|
36 |
| - urls = [self._get_urls(batch) for batch |
37 |
| - in frontier.read(self.frontier, self.slot)] |
38 |
| - expected_urls = [[u'/', u'/index.html', u'/index2.html']] |
39 |
| - self.assertEqual(urls, expected_urls) |
| 34 | + fps2 = [{'fp': '/index_%s.html' % fp} for fp in range(batch_size, batch_size * 2)] |
| 35 | + frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps2) |
40 | 36 |
|
41 |
| - def test_add_multiple_chunks(self): |
42 |
| - frontier = self.project.frontier |
43 |
| - old_count = frontier.newcount |
| 37 | + fps3 = [{'fp': '/index_%s.html' % fp} for fp in range(batch_size * 2, batch_size * 3)] |
| 38 | + frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps3) |
| 39 | + frontier.flush() |
44 | 40 |
|
45 |
| - batch_size = 50 |
46 |
| - fps1 = [{'fp': '/index_%s.html' % fp} for fp in range(0, batch_size)] |
47 |
| - frontier.add(self.frontier, self.slot, fps1) |
| 41 | + assert frontier.newcount == 150 + old_count |
48 | 42 |
|
49 |
| - fps2 = [{'fp': '/index_%s.html' % fp} for fp in range(batch_size, batch_size * 2)] |
50 |
| - frontier.add(self.frontier, self.slot, fps2) |
| 43 | + # insert repeated fingerprints |
| 44 | + fps4 = [{'fp': '/index_%s.html' % fp} for fp in range(0, batch_size)] |
| 45 | + frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps3) |
| 46 | + frontier.flush() |
51 | 47 |
|
52 |
| - fps3 = [{'fp': '/index_%s.html' % fp} for fp in range(batch_size * 2, batch_size * 3)] |
53 |
| - frontier.add(self.frontier, self.slot, fps3) |
54 |
| - frontier.flush() |
| 48 | + # new count is the same |
| 49 | + assert frontier.newcount == 150 + old_count |
55 | 50 |
|
56 |
| - self.assertEqual(frontier.newcount, 150 + old_count) |
| 51 | + # get first 100 |
| 52 | + batches = list(frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, mincount=100)) |
| 53 | + urls = [_get_urls(batch) for batch in batches] |
| 54 | + expected_urls = [[fp['fp'] for fp in fps1 + fps2]] |
| 55 | + assert urls == expected_urls |
57 | 56 |
|
58 |
| - # insert repeated fingerprints |
59 |
| - fps4 = [{'fp': '/index_%s.html' % fp} for fp in range(0, batch_size)] |
60 |
| - frontier.add(self.frontier, self.slot, fps3) |
61 |
| - frontier.flush() |
| 57 | + # delete first 100 |
| 58 | + ids = [batch['id'] for batch in batches] |
| 59 | + frontier.delete(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, ids) |
62 | 60 |
|
63 |
| - # new count is the same |
64 |
| - self.assertEqual(frontier.newcount, 150 + old_count) |
| 61 | + # get remaining 50 |
| 62 | + batches = list(frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT)) |
| 63 | + urls = [_get_urls(batch) for batch in batches] |
| 64 | + expected_urls = [[fp['fp'] for fp in fps3]] |
| 65 | + assert urls == expected_urls |
65 | 66 |
|
66 |
| - # get first 100 |
67 |
| - batches = list(frontier.read(self.frontier, self.slot, mincount=100)) |
68 |
| - urls = [self._get_urls(batch) for batch in batches] |
69 |
| - expected_urls = [[fp['fp'] for fp in fps1 + fps2]] |
70 |
| - self.assertEqual(urls, expected_urls) |
71 | 67 |
|
72 |
| - # delete first 100 |
73 |
| - ids = [batch['id'] for batch in batches] |
74 |
| - frontier.delete(self.frontier, self.slot, ids) |
| 68 | +def test_add_big_chunk(hsproject): |
| 69 | + frontier = hsproject.frontier |
75 | 70 |
|
76 |
| - # get remaining 50 |
77 |
| - batches = list(frontier.read(self.frontier, self.slot)) |
78 |
| - urls = [self._get_urls(batch) for batch in batches] |
79 |
| - expected_urls = [[fp['fp'] for fp in fps3]] |
80 |
| - self.assertEqual(urls, expected_urls) |
| 71 | + batch_size = 300 |
| 72 | + fps1 = [{'fp': '/index_%s.html' % fp} for fp in range(0, batch_size)] |
| 73 | + frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps1) |
| 74 | + frontier.flush() |
81 | 75 |
|
82 |
| - def test_add_big_chunk(self): |
83 |
| - frontier = self.project.frontier |
| 76 | + # get first 100 |
| 77 | + batches = list(frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, mincount=100)) |
| 78 | + urls = [_get_urls(batch) for batch in batches] |
| 79 | + expected_urls = [[fp['fp'] for fp in fps1[:100]]] |
| 80 | + assert urls == expected_urls |
84 | 81 |
|
85 |
| - batch_size = 300 |
86 |
| - fps1 = [{'fp': '/index_%s.html' % fp} for fp in range(0, batch_size)] |
87 |
| - frontier.add(self.frontier, self.slot, fps1) |
88 |
| - frontier.flush() |
| 82 | + # delete first 100 |
| 83 | + ids = [batch['id'] for batch in batches] |
| 84 | + frontier.delete(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, ids) |
89 | 85 |
|
90 |
| - # get first 100 |
91 |
| - batches = list(frontier.read(self.frontier, self.slot, mincount=100)) |
92 |
| - urls = [self._get_urls(batch) for batch in batches] |
93 |
| - expected_urls = [[fp['fp'] for fp in fps1[:100]]] |
94 |
| - self.assertEqual(urls, expected_urls) |
| 86 | + # get next 100 |
| 87 | + batches = list(frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, mincount=100)) |
| 88 | + urls = [_get_urls(batch) for batch in batches] |
| 89 | + expected_urls = [[fp['fp'] for fp in fps1[100:200]]] |
| 90 | + assert urls == expected_urls |
95 | 91 |
|
96 |
| - # delete first 100 |
97 |
| - ids = [batch['id'] for batch in batches] |
98 |
| - frontier.delete(self.frontier, self.slot, ids) |
| 92 | + # delete next 100 |
| 93 | + ids = [batch['id'] for batch in batches] |
| 94 | + frontier.delete(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, ids) |
99 | 95 |
|
100 |
| - # get next 100 |
101 |
| - batches = list(frontier.read(self.frontier, self.slot, mincount=100)) |
102 |
| - urls = [self._get_urls(batch) for batch in batches] |
103 |
| - expected_urls = [[fp['fp'] for fp in fps1[100:200]]] |
104 |
| - self.assertEqual(urls, expected_urls) |
| 96 | + # get next 100 |
| 97 | + batches = list(frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, mincount=100)) |
| 98 | + urls = [_get_urls(batch) for batch in batches] |
| 99 | + expected_urls = [[fp['fp'] for fp in fps1[200:300]]] |
| 100 | + assert urls == expected_urls |
105 | 101 |
|
106 |
| - # delete next 100 |
107 |
| - ids = [batch['id'] for batch in batches] |
108 |
| - frontier.delete(self.frontier, self.slot, ids) |
109 |
| - |
110 |
| - # get next 100 |
111 |
| - batches = list(frontier.read(self.frontier, self.slot, mincount=100)) |
112 |
| - urls = [self._get_urls(batch) for batch in batches] |
113 |
| - expected_urls = [[fp['fp'] for fp in fps1[200:300]]] |
114 |
| - self.assertEqual(urls, expected_urls) |
115 |
| - |
116 |
| - def test_add_extra_params(self): |
117 |
| - frontier = self.project.frontier |
118 |
| - |
119 |
| - qdata = {"a": 1, "b": 2, "c": 3} |
120 |
| - fps = [{'fp': '/', "qdata": qdata}] |
121 |
| - frontier.add(self.frontier, self.slot, fps) |
122 |
| - frontier.flush() |
123 |
| - |
124 |
| - expected_request = [[u'/', {u'a': 1, u'c': 3, u'b': 2}]] |
125 |
| - batches = list(frontier.read(self.frontier, self.slot)) |
126 |
| - request = batches[0]['requests'] |
127 |
| - self.assertEqual(request, expected_request) |
128 | 102 |
|
| 103 | +def test_add_extra_params(hsproject): |
| 104 | + frontier = hsproject.frontier |
| 105 | + |
| 106 | + qdata = {"a": 1, "b": 2, "c": 3} |
| 107 | + fps = [{'fp': '/', "qdata": qdata}] |
| 108 | + frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps) |
| 109 | + frontier.flush() |
| 110 | + |
| 111 | + expected_request = [[u'/', {u'a': 1, u'c': 3, u'b': 2}]] |
| 112 | + batches = list(frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT)) |
| 113 | + request = batches[0]['requests'] |
| 114 | + assert request == expected_request |
0 commit comments