diff --git a/hubstorage/frontier.py b/hubstorage/frontier.py index 6f74451..b37cacc 100644 --- a/hubstorage/frontier.py +++ b/hubstorage/frontier.py @@ -63,3 +63,21 @@ def delete(self, frontier, slot, ids): def delete_slot(self, frontier, slot): self.apidelete((frontier, 's', slot)) + + def count_slot(self, frontier, slot): + total = { + 'count': 0, + 'scanned': 0, + } + start = None + while True: + ret = list(self.apiget( + (frontier, 's', slot, 'q/count'), + params={'start': start} + )) + total['count'] += ret[0]['count'] + total['scanned'] += ret[0]['scanned'] + start = ret[0].get('nextstart') + if not start: + break + return total diff --git a/tests/test_frontier.py b/tests/test_frontier.py index e781377..b0d173c 100644 --- a/tests/test_frontier.py +++ b/tests/test_frontier.py @@ -41,6 +41,7 @@ def test_add_read(self): def test_add_multiple_chunks(self): frontier = self.project.frontier old_count = frontier.newcount + initial_count = frontier.count_slot(self.frontier, self.slot) batch_size = 50 fps1 = [{'fp': '/index_%s.html' % fp} for fp in range(0, batch_size)] @@ -55,6 +56,10 @@ def test_add_multiple_chunks(self): self.assertEqual(frontier.newcount, 150 + old_count) + # test count_slot + count = frontier.count_slot(self.frontier, self.slot) + self.assertEqual(count['count'] - initial_count['count'], 150) + # insert repeated fingerprints fps4 = [{'fp': '/index_%s.html' % fp} for fp in range(0, batch_size)] frontier.add(self.frontier, self.slot, fps3) @@ -73,6 +78,10 @@ def test_add_multiple_chunks(self): ids = [batch['id'] for batch in batches] frontier.delete(self.frontier, self.slot, ids) + # test count_slot again + count = frontier.count_slot(self.frontier, self.slot) + self.assertEqual(count['count'] - initial_count['count'], 150 - 100) + # get remaining 50 batches = list(frontier.read(self.frontier, self.slot)) urls = [self._get_urls(batch) for batch in batches]