Skip to content

It adds the ability to cancel multiple jobs #125

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 23, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ docs/_build

.DS_Store
pytestdebug.log
.idea
7 changes: 7 additions & 0 deletions scrapinghub/client/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ class Unauthorized(ScrapinghubAPIError):
"""Request lacks valid authentication credentials for the target resource."""


class Forbidden(ScrapinghubAPIError):
"""You don't have the permission to access the requested resource.
It is either read-protected or not readable by the server."""


class NotFound(ScrapinghubAPIError):
"""Entity doesn't exist (e.g. spider or project)."""

Expand Down Expand Up @@ -68,6 +73,8 @@ def wrapped(*args, **kwargs):
raise BadRequest(http_error=exc)
elif status_code == 401:
raise Unauthorized(http_error=exc)
elif status_code == 403:
raise Forbidden(http_error=exc)
elif status_code == 404:
raise NotFound(http_error=exc)
elif status_code == 413:
Expand Down
60 changes: 60 additions & 0 deletions scrapinghub/client/jobs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import absolute_import

import json

from ..hubstorage.job import JobMeta as _JobMeta
from ..hubstorage.job import Items as _Items
from ..hubstorage.job import Logs as _Logs
Expand Down Expand Up @@ -77,6 +79,64 @@ def count(self, spider=None, state=None, has_tag=None, lacks_tag=None,
params['spider'] = self.spider.name
return next(self._project.jobq.apiget(('count',), params=params))

def cancel_jobs(self, keys=None, count=None, **params):
"""Cancel a list of jobs using the keys provided.

:param keys: (optional) a list of strings containing the job keys in
the format: <project>/<spider>/<job_id>.
:param count: (optional) it requires admin access. Used for admins
to bulk cancel an amount of ``count`` jobs.

:return: a dict with the amount of jobs cancelled.
:rtype: :class:`dict`

Usage:

- cancel jobs 123 and 321 from project 111 and spiders 222 and 333::

>>> project.jobs.cancel_jobs(['111/222/123', '111/333/321'])
{'count': 2}

- cancel 100 jobs asynchronously::

>>> project.jobs.cancel_jobs(count=100)
{'count': 100}
"""
update_kwargs(params, count=count, keys=keys)
keys = params.get('keys')
count = params.get('count')

if keys and count:
raise ValueError("keys and count can't be defined simultaneously")

elif not keys and not count:
raise ValueError("keys or count should be defined")

elif keys:
if not isinstance(keys, list):
raise ValueError("keys should be a list")

# it raises ValueError if invalid
keys = [parse_job_key(k) for k in keys]

if not all([key.project_id == self.project_id for key in keys]):
raise ValueError(
"all keys should belong to project: %s" % self.project_id
)

# change it to the format in which JobQ expects.
data = [{"key": str(k)} for k in keys]

# may raise BadRequest if JobQ doesn't validate
return list(self._project.jobq.apipost("cancel",
data=json.dumps(data)))[0]
elif count:
if not isinstance(count, int):
raise ValueError("count should be an int")

# may raise Forbidden
return self._project.jobq.apipost("cancel?count=%s" % count)

def iter(self, count=None, start=None, spider=None, state=None,
has_tag=None, lacks_tag=None, startts=None, endts=None,
meta=None, **params):
Expand Down
1 change: 1 addition & 0 deletions tests/client/cassetes/test_job/test_cancel_jobs.gz
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
eJy11gd428YVAGDJe4/EiR07TmDGVllX4BRljbKJoihqYtlSKtqCnV7TE3jiQSIBvgOgYZdt2iTdSfde6d57792me++999677w6QSpGwrVhfbX/+Doe7A/De/+5484oatMSNtS0tLdNMuJZjQ+vhVmMTXlu2xwQ1PexzYQWBlfEarAqGCgY+cz1YXYM1cWMN9lSYx50irDVW4cXI8GgB1hkrsekLC9Yb+7HFPa/ak0xms4ngXy7d05XqSiVp1UoK305MuvjsDWr+uFOcg43Gldh0/XGrmPdoydXTbVXhTDLTy2eCP21u1Soykeeu7uHb6MFlGy0Wb8IJclIaNqn35YziHRc212BL3NiAPcdcJvS+ErM92Epgm7EL+6pz+A227poC38kucX88mUmkE2nYTo2teL/PNFnV0wds0ynifbiAwIXGZrxROmVV27UimyhTj8EOqiISjIaLCFysInEweRB2UvXwfse2mQos7CJwieqbYqyq07I1zWA3NbYEozx8P32I2SWPwx4ClxorsL8zC3upytD8iMJclcFlBC439mEvrVbLlknl8slZfWZmRp9wREX3RZnJN2dF0Kh67T4fv1dYp9RQ2GdksO9q6lqmdrKy1L8nOiHm+8Y6hcKtIhUGV9Rgf6DC9ajnu3CgBm1xlVn5fLjH4btUVirMdWmJQVx91vBhuKfPMUMHg6FFGct7EWg39uJVwWftWiarXe+XtUwq3a2lsz25TE8upw0eKYAehBxzioghQSCpHuBPW6YjbEgF4TLDcHkyXGkCGWNbQ7iUwSw1Vsv+ctmZgQ4CORUsabpdGx4pXDd8dBQ6g0S6zNNNx5myGBwi0GXcKZ/iignPmWK2XmTT485svniq8+T0WH8BU1swzPGOwRPHqk5nZfC4xwbM7BQ7OdExkjOGJgfGRq8tHervyw15neZYbmyyMJVNHSlYvRqbrVoY3XwQhHQYhExqPgidMgi92hE6K0nns+mOju7OVKpXG6Eezyd7tVFaYaOWx/JDdBa6A17z0SgHvHoI9Ko8ZLvg3lRlYJqKOcgTuI8aH3hu1/qD772S+nxjDa4K8yxkQfQZbXhxOhakPdYTc6Zi7bFJB0sYL8KqTaaTuVSsBle7vl+D/jjHTeSaOK/bOQb43dgxruVNG0Xm7m8UGRiU9O4b57gbXMe3U47FfT3fQTnW72G+k3Is1CG+O3AWhuAIgaPGDf+PlMMw5VjvI/P1fgPlWN734xrl512mo77PsTILcY7leCzOsQr5QuUdV5U3Vld5BoETS6i8k4sq70YC919UeSSi8h5A4KboyntgfeVRAuMRlWdG6i0SYPN6J+r0lgjwKL1WoHeSL5lsGslOKbJlRbYiycp9fXCgADaK7f6fWNdzBG5sielKou4wSZhOJYkru8n6SkjKJzNw+MajMgXVpuMJCIizH0+u0upFHEW+8jsd+p1Bv+fvZzbwM6f8nGrwc1r5eVC88XirEXhw4/H2kIDMaEDmZgIPVUbskmXPJvF7OhMpeFjEAXcLgVuj3dxGje3Yb+hyTctk+vHwp8zDCTxCJQnrEh4ZuLhGBuZRBB59Vt2HlO7HRJ7EjyVwu9GK/d1wBzX2hLpcV5fjhFPW+yRgfVhY+EnwOAKPV6MPwhMCdU9EdeuxI1ZltvwhEYMnKVhPVrCeUg/rqecNKx3CeloI6+lxjpKewQMsz+QBjWeFNJ69LBrPCWg8V9F4XgONOxWN5zfReAGBFzbSeNEiGi8m8JJmGi+NoPEyAi+PpvGKM9F4JYFXLdB4dR2N1xB47RJovC6SxusJvIEvAcUbCbyJhxze3MThLYrDWxWHty06Gt+OHnJL9QALHkxqm6wM78CzUqb4xtOxKTYX69EazuR2LeoG7nwE3imT+C5l6N2hofeEht4bGnqfNISn1vtV5nMp+MAySH0wIPUhRerDDaQ+okh9tInUXQQ+1kjq44tIfYLAJ5tJfSqC1KcJfCaa1GfPROpzBD6/QOoLdaS+SOBLSyD15UhSXyHwVfXh6Qx87dyyvk7gG/OyvomyNqnjzXR828O8ZmrwLYXr2wrXd+r3mu8u+xD7XrjXfF85+UHo5Iehkx+FTn68rL3mJwGMnyoYP2uA8XMF4xdNMH5J4FeNMH69CMZvCPy2GcbvImD8nsAfomH88Uww/kTgzwsw/lIH468E/rYEGH+PhPEPAv8MYKTgX+eG8W8C/wlhiBY8j9CGjEhswrItl7NiTLS2yp/Rfk2swIb0IVaqxgIRsUrOW+aJJFbLRRQUsUatD0SslX2Ki1gnm0qMWC+bCo3YIJvLcCM24gKKjtikHnqgJjarRj0gsUWOkm+2Vd5rYCS2YScR2/H/BkziAuyr9yQuVEN3yKGNqsRFanAjLHGxmrJTToniJXapaZHCxCVq7m45N3Qm9qjh89TEpWrEXjnibOC6JDhxmZrbbE5crlbR5CqBPLFPDT0HPhFT866QSQwJ7o8keCAgyPzEfwGFLKo2
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
eJyllfd/20QYxtM90slq6QDVlGBKZHk0JnEwxW1DR4ZT7IRrylEu8sWn2Jb0ajhOi6Hsvfcue2/+CP4sXp3ckMYuzYfa/uGWdO/4Po8vrGxCV5Ss6+rqqnPHNSwTVgyvIJtwbpged5ju4ZoLKymsijZhdXjU4eBz14M1TVgbJWtxpcY9YZVgHVmNk/F8oQjrySoc+o4BG8h+HAnPszOalkrFwl9fItMf749rzDY0xzdjsy7evVE+P22V5qGbHMKh608bpazHyq6a6LEda5brXjYZfnpc2yhxJytc1cNo1HDaw0qls/hA8FACNsl4BWe448LmJmyJko24MuFyR82VuenBVgrbyE5cs+cxB1N1dQdjMsvCn9aSsUQsAdsZ2Yr7OV3ntqcOmbpVwn24jsL1ZDNulM8Zdq9S4jNV5nG4gcmKhKfhRgo3yUoc0A7ADiYvP2KZJpeFhZ0UbpZrFc5tlVWNOoddjGwJT3kYnzrCzbInYDeFPWQlrqdTsJfJDl06UZy3OdxC4VayD1eZbVcNnQWv1xrq3NycOmM5NdV3qjyInJdAYTLsnI/5OsY5eRT2kSSuHWauoStTteV+T6ch4vtkvYTCtREVDrc1YX9Ihesxz3fh9ib0RGVng/vhjuG/ZVdq3HVZmUNUppUfhjt9gR06EJXJ6a3kvCC5uyj0km1LkpPEqIysCdarVWsOYhQ0mVpAYK+SHy+eyI8VIB6W3eWeqltWxeCQoJAkF4NbXGfGsyrcVEu8Pm01srxydIZMEs8aqpwaTfYJfXSsUBg9Xa/W8xN1L8kKp8rHxyAxNONPzs5N5wrVukjnpvon5qd8r1YfgUGFN2wDa5Et+rxXSSaUk35VScaTcSWRyvSlMsm0cmy0OKiMskYAYDaVOHhwIB2PDyrjzBNZbVApsBovGB7PjrAGpEKaEFfUJxyk0Cdr59cN3XJMSIesXCpWNWTlbgr9sqipfhhgsvJ15sxDhsKgPB/C2ascCctxT3imFOCbpXAv2YuzMP5UK/7EwGXxwyHmi+4m3NfqsxMIIkd6cHI+ErY9kolYlUhvZNZCCeOkpVotofUlI0047Pp+E45EBZrI0ahYJ0VybKgIQ2IDGfjXMFzPcpCSWL0WW6TMmG7VNHyzqy1+rRbczOF+0T0WkHSsTevHKZz4b62fZAJ1PdxB1yNMoJhHxQ4mULVjYhcT/1szed8XKJPxqEBtnIoKlIRYkMEDUgaFKNnd6pTrqoHUHauq5gLO1bxjlA0TihQmyIrAW2CSke04IGoBQTF0rk623PxBCkSWFvmG02GfjwbpTFE4s4w+P9TRjCiFh+XNA3A2BLQQAvoIhdCbTAywoWFF07E4THfwK51CqbOkeZtLzlAoL3VJEQJoiBzZgDsRm5uBK0dgVoJVkWBVA7AW/pJqSFbfcsmCBbJ0Zuq8CqboJnvw6TPnIxU+H8koi4hOxPETaVKwgtbZUYGggQhZckRIjtsixwvIQTf3Zb+T/VC/BpDmQpAaEqT5JSCdkyCdvzpIj1Joiisi9BiFxxcQurAIoScoPLkMhJ7qiNDTFJ6RgSaS8OxlDD1H4fl2hl7owNCLFF7qzNDLbQy9QuHVpQy9FjL0OjK0STqXbvmmh62NN+ENidGbEqO3FvvT29fsT++0/Oldycl7LU7eb3HyQYuTD6/JYT4KwfhYgvHJEjA+lWB8dnUwLlL4/MpgfEHhywUwvloExtcUvlkGGN92BOM7Ct+Ly13lBwo/thPxUwcifqbwS2cifm0j4jcKvy8l4o+QiD/bXOUvxIH7sX8AEwJsIA==
51 changes: 51 additions & 0 deletions tests/client/test_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from scrapinghub.client.logs import Logs
from scrapinghub.client.requests import Requests
from scrapinghub.client.samples import Samples
from scrapinghub.client.exceptions import BadRequest

from ..conftest import TEST_PROJECT_ID
from ..conftest import TEST_SPIDER_NAME
Expand Down Expand Up @@ -44,6 +45,56 @@ def test_job_update_tags(spider):
assert job2.metadata.get('tags') == ['tag2']


def test_cancel_jobs_validation(spider):
with pytest.raises(ValueError) as err:
spider.jobs.cancel_jobs()

assert 'keys or count should be defined' in str(err)

with pytest.raises(ValueError) as err:
spider.jobs.cancel_jobs(['2222222/1/1'], count=2)

assert "keys and count can't be defined simultaneously" in str(err)

with pytest.raises(ValueError) as err:
spider.jobs.cancel_jobs(keys="testing")

assert 'keys should be a list' in str(err)

with pytest.raises(ValueError) as err:
spider.jobs.cancel_jobs(count=[1,2])

assert 'count should be an int' in str(err)

with pytest.raises(ValueError) as err:
spider.jobs.cancel_jobs(['2222222/1/1', '2222226/1/1'])

assert 'all keys should belong to project' in str(err)


def test_cancel_jobs(spider):
job1 = spider.jobs.run(job_args={'subid': 'tags-1'}, add_tag=['tag1'])
job2 = spider.jobs.run(job_args={'subid': 'tags-2'}, add_tag=['tag2'])
assert job1.metadata.get('state') == 'pending'
assert job2.metadata.get('state') == 'pending'

output = spider.jobs.cancel_jobs([job1.key, job2.key])

assert job1.metadata.get('state') == 'finished'
assert job2.metadata.get('state') == 'finished'
assert output == {'count': 2}


def test_cancel_jobs_non_existent(spider):
job1 = spider.jobs.run(job_args={'subid': 'tags-1'}, add_tag=['tag1'])
assert job1.metadata.get('state') == 'pending'

# Non-existent job
output = spider.jobs.cancel_jobs(['%s/1/10000' % job1.project_id])
assert output == {'count': 0}
assert job1.metadata.get('state') == 'pending'


def test_job_start(spider):
job = spider.jobs.run()
assert job.metadata.get('state') == 'pending'
Expand Down