|
1 | 1 | import random
|
2 | 2 | from contextlib import closing
|
3 | 3 |
|
| 4 | +import pytest |
4 | 5 | from six.moves import range
|
5 | 6 |
|
6 | 7 | from scrapinghub import HubstorageClient
|
7 | 8 | from scrapinghub.hubstorage.utils import millitime
|
8 | 9 |
|
9 |
| -from .hstestcase import HSTestCase |
10 |
| - |
11 |
| - |
12 |
| -class SystemTest(HSTestCase): |
13 |
| - |
14 |
| - MAGICN = 1211 |
15 |
| - |
16 |
| - def setUp(self): |
17 |
| - super(HSTestCase, self).setUp() |
18 |
| - self.endpoint = self.hsclient.endpoint |
19 |
| - # Panel - no client auth, only project auth using user auth token |
20 |
| - self.panelclient = HubstorageClient(endpoint=self.endpoint) |
21 |
| - self.panelproject = self.panelclient.get_project(self.projectid, auth=self.auth) |
22 |
| - |
23 |
| - def tearDown(self): |
24 |
| - super(HSTestCase, self).tearDown() |
25 |
| - self.panelclient.close() |
26 |
| - |
27 |
| - def test_succeed_with_close_reason(self): |
28 |
| - self._do_test_success('all-good', 'all-good') |
29 |
| - |
30 |
| - def test_succeed_without_close_reason(self): |
31 |
| - self._do_test_success(None, 'no_reason') |
32 |
| - |
33 |
| - def test_scraper_failure(self): |
34 |
| - job = self._do_test_job(IOError('no more resources, ha!'), 'failed') |
35 |
| - # MAGICN per log level messages plus one of last failure |
36 |
| - stats = job.logs.stats() |
37 |
| - self.assertTrue(stats) |
38 |
| - self.assertEqual(stats['totals']['input_values'], self.MAGICN * 4 + 1) |
39 |
| - |
40 |
| - def _do_test_success(self, job_close_reason, expected_close_reason): |
41 |
| - job = self._do_test_job(job_close_reason, expected_close_reason) |
42 |
| - self.assertEqual(job.items.stats()['totals']['input_values'], self.MAGICN) |
43 |
| - self.assertEqual(job.logs.stats()['totals']['input_values'], self.MAGICN * 4) |
44 |
| - self.assertEqual(job.requests.stats()['totals']['input_values'], self.MAGICN) |
45 |
| - |
46 |
| - def _do_test_job(self, job_close_reason, expected_close_reason): |
47 |
| - p = self.panelproject |
48 |
| - pushed = p.jobq.push(self.spidername) |
49 |
| - # check pending state |
50 |
| - job = p.get_job(pushed['key']) |
51 |
| - self.assertEqual(job.metadata.get('state'), 'pending') |
52 |
| - # consume msg from runner |
53 |
| - self._run_runner(pushed, close_reason=job_close_reason) |
54 |
| - # query again from panel |
55 |
| - job = p.get_job(pushed['key']) |
56 |
| - self.assertEqual(job.metadata.get('state'), 'finished') |
57 |
| - self.assertEqual(job.metadata.get('close_reason'), expected_close_reason) |
58 |
| - return job |
59 |
| - |
60 |
| - def _run_runner(self, pushed, close_reason): |
61 |
| - client = HubstorageClient(endpoint=self.endpoint, auth=self.auth) |
62 |
| - with closing(client) as runnerclient: |
63 |
| - job = self.start_job() |
64 |
| - self.assertFalse(job.metadata.get('stop_requested')) |
65 |
| - job.metadata.update(host='localhost', slot=1) |
66 |
| - self.assertEqual(job.metadata.get('state'), 'running') |
67 |
| - # run scraper |
68 |
| - try: |
69 |
| - self._run_scraper(job.key, job.jobauth, close_reason=close_reason) |
70 |
| - except Exception as exc: |
71 |
| - job.logs.error(message=str(exc), appendmode=True) |
72 |
| - job.close_writers() |
73 |
| - job.jobq.finish(job, close_reason='failed') |
74 |
| - # logging from runner must append and never remove messages logged |
75 |
| - # by scraper |
76 |
| - self.assertTrue(job.logs.batch_append) |
77 |
| - else: |
78 |
| - job.jobq.finish(job, close_reason=close_reason or 'no_reason') |
79 |
| - |
80 |
| - def _run_scraper(self, jobkey, jobauth, close_reason=None): |
81 |
| - httpmethods = 'GET PUT POST DELETE HEAD OPTIONS TRACE CONNECT'.split() |
82 |
| - # Scraper - uses job level auth, no global or project auth available |
83 |
| - client = HubstorageClient(endpoint=self.endpoint) |
84 |
| - with closing(client) as scraperclient: |
85 |
| - job = scraperclient.get_job(jobkey, auth=jobauth) |
86 |
| - for idx in range(self.MAGICN): |
87 |
| - iid = job.items.write({'uuid': idx}) |
88 |
| - job.logs.debug('log debug %s' % idx, idx=idx) |
89 |
| - job.logs.info('log info %s' % idx, idx=idx) |
90 |
| - job.logs.warn('log warn %s' % idx, idx=idx) |
91 |
| - job.logs.error('log error %s' % idx, idx=idx) |
92 |
| - sid = job.samples.write([idx, idx, idx]) |
93 |
| - rid = job.requests.add( |
94 |
| - url='http://test.com/%d' % idx, |
95 |
| - status=random.randint(100, 1000), |
96 |
| - method=random.choice(httpmethods), |
97 |
| - rs=random.randint(0, 100000), |
98 |
| - duration=random.randint(0, 1000), |
99 |
| - parent=random.randrange(0, idx + 1) if idx > 10 else None, |
100 |
| - ts=millitime() + random.randint(100, 100000), |
101 |
| - ) |
102 |
| - self.assertEqual(iid, idx) |
103 |
| - self.assertEqual(sid, idx) |
104 |
| - self.assertEqual(rid, idx) |
105 |
| - |
106 |
| - if isinstance(close_reason, Exception): |
107 |
| - raise close_reason |
108 |
| - |
109 |
| - if close_reason: |
110 |
| - job.metadata['close_reason'] = close_reason |
111 |
| - |
112 |
| - job.metadata.save() |
| 10 | +from .conftest import TEST_ENDPOINT, TEST_SPIDER_NAME |
| 11 | +from .conftest import TEST_PROJECT_ID, TEST_AUTH |
| 12 | +from .conftest import start_job |
| 13 | + |
| 14 | + |
| 15 | +MAGICN = 1211 |
| 16 | + |
| 17 | + |
| 18 | +@pytest.fixture |
| 19 | +def panelclient(): |
| 20 | + # Panel - no client auth, only project auth using user auth token |
| 21 | + return HubstorageClient(endpoint=TEST_ENDPOINT) |
| 22 | + |
| 23 | + |
| 24 | +@pytest.fixture |
| 25 | +def panelproject(panelclient): |
| 26 | + return panelclient.get_project(TEST_PROJECT_ID, auth=TEST_AUTH) |
| 27 | + |
| 28 | + |
| 29 | +@pytest.fixture(autouse=True) |
| 30 | +def close_panelclient(panelclient): |
| 31 | + yield |
| 32 | + panelclient.close() |
| 33 | + |
| 34 | + |
| 35 | +def test_succeed_with_close_reason(hsproject, panelproject): |
| 36 | + _do_test_success(hsproject, panelproject, 'all-good', 'all-good') |
| 37 | + |
| 38 | + |
| 39 | +def test_succeed_without_close_reason(hsproject, panelproject): |
| 40 | + _do_test_success(hsproject, panelproject, None, 'no_reason') |
| 41 | + |
| 42 | + |
| 43 | +def _do_test_success(*args): |
| 44 | + """Simple wrapper around _do_test_job with additonal checks""" |
| 45 | + job = _do_test_job(*args) |
| 46 | + assert job.items.stats()['totals']['input_values'] == MAGICN |
| 47 | + assert job.logs.stats()['totals']['input_values'] == MAGICN * 4 |
| 48 | + assert job.requests.stats()['totals']['input_values'] == MAGICN |
| 49 | + |
| 50 | + |
| 51 | +def test_scraper_failure(hsproject, panelproject): |
| 52 | + job = _do_test_job( |
| 53 | + hsproject, |
| 54 | + panelproject, |
| 55 | + IOError('no more resources, ha!'), |
| 56 | + 'failed', |
| 57 | + ) |
| 58 | + # MAGICN per log level messages plus one of last failure |
| 59 | + stats = job.logs.stats() |
| 60 | + assert stats |
| 61 | + assert stats['totals']['input_values'] == MAGICN * 4 + 1 |
| 62 | + |
| 63 | + |
| 64 | +def _do_test_job(hsproject, panelproject, |
| 65 | + job_close_reason, expected_close_reason): |
| 66 | + pushed = panelproject.jobq.push(TEST_SPIDER_NAME) |
| 67 | + # check pending state |
| 68 | + job = panelproject.get_job(pushed['key']) |
| 69 | + assert job.metadata.get('state') == 'pending' |
| 70 | + # consume msg from runner |
| 71 | + _run_runner(hsproject, pushed, close_reason=job_close_reason) |
| 72 | + # query again from panel |
| 73 | + job = panelproject.get_job(pushed['key']) |
| 74 | + assert job.metadata.get('state') == 'finished' |
| 75 | + assert job.metadata.get('close_reason') == expected_close_reason |
| 76 | + return job |
| 77 | + |
| 78 | +def _run_runner(hsproject, pushed, close_reason): |
| 79 | + client = HubstorageClient(endpoint=TEST_ENDPOINT, auth=TEST_AUTH) |
| 80 | + with closing(client) as runnerclient: |
| 81 | + job = start_job(hsproject) |
| 82 | + assert not job.metadata.get('stop_requested') |
| 83 | + job.metadata.update(host='localhost', slot=1) |
| 84 | + assert job.metadata.get('state') == 'running' |
| 85 | + # run scraper |
| 86 | + try: |
| 87 | + _run_scraper(job.key, job.jobauth, close_reason=close_reason) |
| 88 | + except Exception as exc: |
| 89 | + job.logs.error(message=str(exc), appendmode=True) |
| 90 | + job.close_writers() |
| 91 | + job.jobq.finish(job, close_reason='failed') |
| 92 | + # logging from runner must append and never remove messages logged |
| 93 | + # by scraper |
| 94 | + assert job.logs.batch_append |
| 95 | + else: |
| 96 | + job.jobq.finish(job, close_reason=close_reason or 'no_reason') |
| 97 | + |
| 98 | + |
| 99 | +def _run_scraper(jobkey, jobauth, close_reason=None): |
| 100 | + httpmethods = 'GET PUT POST DELETE HEAD OPTIONS TRACE CONNECT'.split() |
| 101 | + # Scraper - uses job level auth, no global or project auth available |
| 102 | + client = HubstorageClient(endpoint=TEST_ENDPOINT) |
| 103 | + # use some fixed timestamp to represent current time |
| 104 | + now_ts = 1476803148638 |
| 105 | + with closing(client) as scraperclient: |
| 106 | + job = scraperclient.get_job(jobkey, auth=jobauth) |
| 107 | + for idx in range(MAGICN): |
| 108 | + iid = job.items.write({'uuid': idx}) |
| 109 | + job.logs.debug('log debug %s' % idx, idx=idx) |
| 110 | + job.logs.info('log info %s' % idx, idx=idx) |
| 111 | + job.logs.warn('log warn %s' % idx, idx=idx) |
| 112 | + job.logs.error('log error %s' % idx, idx=idx) |
| 113 | + sid = job.samples.write([idx, idx, idx]) |
| 114 | + rid = job.requests.add( |
| 115 | + url='http://test.com/%d' % idx, |
| 116 | + status=random.randint(100, 1000), |
| 117 | + method=random.choice(httpmethods), |
| 118 | + rs=random.randint(0, 100000), |
| 119 | + duration=random.randint(0, 1000), |
| 120 | + parent=random.randrange(0, idx + 1) if idx > 10 else None, |
| 121 | + ts=now_ts + 100 + idx, |
| 122 | + ) |
| 123 | + assert iid == idx |
| 124 | + assert sid == idx |
| 125 | + assert rid == idx |
| 126 | + |
| 127 | + if isinstance(close_reason, Exception): |
| 128 | + raise close_reason |
| 129 | + |
| 130 | + if close_reason: |
| 131 | + job.metadata['close_reason'] = close_reason |
| 132 | + |
| 133 | + job.metadata.save() |
0 commit comments