diff --git a/docs/conf.py b/docs/conf.py index c3b9b3f6..b9725776 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # python-scrapinghub documentation build configuration file, created by # sphinx-quickstart on Fri Mar 24 12:28:40 2017. @@ -54,9 +53,9 @@ master_doc = 'index' # General information about the project. -project = u'scrapinghub' -copyright = u'2010-{}, Scrapinghub'.format(YEAR) -author = u'Scrapinghub' +project = 'scrapinghub' +copyright = f'2010-{YEAR}, Scrapinghub' +author = 'Scrapinghub' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -135,8 +134,8 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'python-scrapinghub.tex', u'python-scrapinghub Documentation', - u'Pablo Hoffman, Daniel Graña', 'manual'), + (master_doc, 'python-scrapinghub.tex', 'python-scrapinghub Documentation', + 'Pablo Hoffman, Daniel Graña', 'manual'), ] @@ -145,7 +144,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - (master_doc, 'python-scrapinghub', u'python-scrapinghub Documentation', + (master_doc, 'python-scrapinghub', 'python-scrapinghub Documentation', [author], 1) ] @@ -156,13 +155,9 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'python-scrapinghub', u'python-scrapinghub Documentation', + (master_doc, 'python-scrapinghub', 'python-scrapinghub Documentation', author, 'python-scrapinghub', 'One line description of project.', 'Miscellaneous'), ] -# Set Sphinx Read The Docs theme -import sphinx_rtd_theme - html_theme = 'sphinx_rtd_theme' -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] diff --git a/requirements-base.txt b/requirements-base.txt index 8f7f75ea..10831109 100644 --- a/requirements-base.txt +++ b/requirements-base.txt @@ -1,3 +1,2 @@ requests>=1.0 retrying>=1.3.3 -six>=1.10.0 diff --git a/scrapinghub/client/__init__.py b/scrapinghub/client/__init__.py index 89057af2..f7a82e7e 100644 --- a/scrapinghub/client/__init__.py +++ b/scrapinghub/client/__init__.py @@ -16,17 +16,17 @@ class Connection(_Connection): @_wrap_http_errors def _request(self, *args, **kwargs): - return super(Connection, self)._request(*args, **kwargs) + return super()._request(*args, **kwargs) class HubstorageClient(_HubstorageClient): @_wrap_http_errors def request(self, *args, **kwargs): - return super(HubstorageClient, self).request(*args, **kwargs) + return super().request(*args, **kwargs) -class ScrapinghubClient(object): +class ScrapinghubClient: """Main class to work with the Scrapy Cloud API. :param auth: (optional) Scrapy Cloud API key or other Scrapy Cloud auth diff --git a/scrapinghub/client/activity.py b/scrapinghub/client/activity.py index b5d1777f..96205f68 100644 --- a/scrapinghub/client/activity.py +++ b/scrapinghub/client/activity.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from .proxy import _Proxy from .utils import parse_job_key, update_kwargs diff --git a/scrapinghub/client/collections.py b/scrapinghub/client/collections.py index 9cdc061b..0ebab223 100644 --- a/scrapinghub/client/collections.py +++ b/scrapinghub/client/collections.py @@ -1,7 +1,4 @@ -from __future__ import absolute_import - -from six import string_types -from six.moves import collections_abc +from collections.abc import Iterable from ..hubstorage.collectionsrt import Collection as _Collection @@ -96,7 +93,7 @@ def list(self): return list(self.iter()) -class Collection(object): +class Collection: """Representation of a project collection object. Not a public constructor: use :class:`Collections` instance to get a @@ -184,8 +181,8 @@ def delete(self, keys): The method returns ``None`` (original method returns an empty generator). """ - if (not isinstance(keys, string_types) and - not isinstance(keys, collections_abc.Iterable)): + if (not isinstance(keys, str) and + not isinstance(keys, Iterable)): raise ValueError("You should provide string key or iterable " "object providing string keys") self._origin.delete(keys) diff --git a/scrapinghub/client/exceptions.py b/scrapinghub/client/exceptions.py index d804a796..493b573b 100644 --- a/scrapinghub/client/exceptions.py +++ b/scrapinghub/client/exceptions.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import from functools import wraps from requests import HTTPError @@ -29,7 +27,7 @@ def __init__(self, message=None, http_error=None): self.http_error = http_error if not message: message = _get_http_error_msg(http_error) - super(ScrapinghubAPIError, self).__init__(message) + super().__init__(message) class BadRequest(ScrapinghubAPIError): diff --git a/scrapinghub/client/frontiers.py b/scrapinghub/client/frontiers.py index 38298a0b..171439f3 100644 --- a/scrapinghub/client/frontiers.py +++ b/scrapinghub/client/frontiers.py @@ -1,9 +1,6 @@ -from __future__ import absolute_import from functools import partial from collections import defaultdict -from six import string_types - from ..hubstorage.frontier import Frontier as _Frontier from ..hubstorage.utils import urlpathjoin @@ -15,7 +12,7 @@ class _HSFrontier(_Frontier): """Modified hubstorage Frontier with newcount per slot.""" def __init__(self, *args, **kwargs): - super(_HSFrontier, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.newcount = defaultdict(int) def _get_writer(self, frontier, slot): @@ -84,7 +81,7 @@ class Frontiers(_Proxy): >>> project.frontiers.close() """ def __init__(self, *args, **kwargs): - super(Frontiers, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) def get(self, name): """Get a frontier by name. @@ -125,7 +122,7 @@ def close(self): self._origin.close() -class Frontier(object): +class Frontier: """Representation of a frontier object. Not a public constructor: use :class:`Frontiers` instance to get a @@ -201,7 +198,7 @@ def newcount(self): if frontier == self.key) -class FrontierSlot(object): +class FrontierSlot: """Representation of a frontier slot object. Not a public constructor: use :class:`Frontier` instance to get a @@ -295,7 +292,7 @@ def newcount(self): return newcount_values.get((self._frontier.key, self.key), 0) -class FrontierSlotFingerprints(object): +class FrontierSlotFingerprints: """Representation of request fingerprints collection stored in slot.""" def __init__(self, slot): @@ -311,7 +308,7 @@ def add(self, fps): origin = self._frontier._frontiers._origin writer = origin._get_writer(self._frontier.key, self.key) fps = list(fps) if not isinstance(fps, list) else fps - if not all(isinstance(fp, string_types) for fp in fps): + if not all(isinstance(fp, str) for fp in fps): raise ValueError('Fingerprint should be of a string type') for fp in fps: writer.write({'fp': fp}) @@ -338,7 +335,7 @@ def list(self, **params): return list(self.iter(**params)) -class FrontierSlotQueue(object): +class FrontierSlotQueue: """Representation of request batches queue stored in slot.""" def __init__(self, slot): diff --git a/scrapinghub/client/items.py b/scrapinghub/client/items.py index e9be38c0..eba8e89d 100644 --- a/scrapinghub/client/items.py +++ b/scrapinghub/client/items.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import sys from .proxy import _ItemsResourceProxy, _DownloadableProxyMixin @@ -84,10 +82,10 @@ def _modify_iter_params(self, params): :return: a dict with updated set of params. :rtype: :class:`dict` """ - params = super(Items, self)._modify_iter_params(params) + params = super()._modify_iter_params(params) offset = params.pop('offset', None) if offset: - params['start'] = '{}/{}'.format(self.key, offset) + params['start'] = f'{self.key}/{offset}' return params def list_iter(self, chunksize=1000, *args, **kwargs): diff --git a/scrapinghub/client/jobs.py b/scrapinghub/client/jobs.py index 75d0acbc..b38a56c3 100644 --- a/scrapinghub/client/jobs.py +++ b/scrapinghub/client/jobs.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import json from ..hubstorage.job import JobMeta as _JobMeta @@ -17,7 +15,7 @@ from .utils import get_tags_for_update, parse_job_key, update_kwargs -class Jobs(object): +class Jobs: """Class representing a collection of jobs for a project/spider. Not a public constructor: use :class:`~scrapinghub.client.projects.Project` @@ -445,7 +443,7 @@ def update_tags(self, add=None, remove=None, spider=None): return result['count'] -class Job(object): +class Job: """Class representing a job object. Not a public constructor: use :class:`~scrapinghub.client.ScrapinghubClient` @@ -568,7 +566,7 @@ def update(self, state, **params): job = next(self._project.jobq.update(self, state=state, **params)) return job['prevstate'] except StopIteration: - raise NotFound("Job {} doesn't exist".format(self.key)) + raise NotFound(f"Job {self.key} doesn't exist") def cancel(self): """Schedule a running job for cancellation. diff --git a/scrapinghub/client/logs.py b/scrapinghub/client/logs.py index 3ab37e96..f739bd68 100644 --- a/scrapinghub/client/logs.py +++ b/scrapinghub/client/logs.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import json import logging @@ -90,15 +88,15 @@ def _modify_iter_params(self, params): :return: a modified dictionary with params. :rtype: :class:`dict` """ - params = super(Logs, self)._modify_iter_params(params) + params = super()._modify_iter_params(params) offset = params.pop('offset', None) if offset: - params['start'] = '{}/{}'.format(self.key, offset) + params['start'] = f'{self.key}/{offset}' level = params.pop('level', None) if level: minlevel = getattr(LogLevel, level, None) if minlevel is None: - raise ValueError("Unknown log level: {}".format(level)) + raise ValueError(f"Unknown log level: {level}") level_filter = json.dumps(['level', '>=', [minlevel]]) # there can already be some filters handled by super class method params['filter'] = params.get('filter', []) + [level_filter] diff --git a/scrapinghub/client/projects.py b/scrapinghub/client/projects.py index a67f3266..e821f3c3 100644 --- a/scrapinghub/client/projects.py +++ b/scrapinghub/client/projects.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from ..hubstorage.activity import Activity as _Activity from ..hubstorage.collectionsrt import Collections as _Collections from ..hubstorage.project import Settings as _Settings @@ -13,7 +11,7 @@ from .utils import parse_project_id -class Projects(object): +class Projects: """Collection of projects available to current user. Not a public constructor: use :class:`~scrapinghub.client.ScrapinghubClient` @@ -95,7 +93,7 @@ def summary(self, state=None, **params): return self._client._hsclient.projects.jobsummaries(**params) -class Project(object): +class Project: """Class representing a project object and its resources. Not a public constructor: use :class:`~scrapinghub.client.ScrapinghubClient` diff --git a/scrapinghub/client/proxy.py b/scrapinghub/client/proxy.py index f7d10b4d..8bfe71ae 100644 --- a/scrapinghub/client/proxy.py +++ b/scrapinghub/client/proxy.py @@ -1,6 +1,3 @@ -from __future__ import absolute_import - -import six import json from ..hubstorage import ValueTooLarge as _ValueTooLarge @@ -8,7 +5,7 @@ from .exceptions import ValueTooLarge -class _Proxy(object): +class _Proxy: """A helper to create a class instance and proxy its methods to origin. The internal proxy class is useful to link class attributes from its @@ -99,7 +96,7 @@ def close(self, block=True): self._origin.close(block) -class _DownloadableProxyMixin(object): +class _DownloadableProxyMixin: def iter(self, _path=None, count=None, requests_params=None, **apiparams): """A general method to iterate through elements. @@ -150,7 +147,7 @@ def update(self, values): raise TypeError("values should be a dict") data = next(self._origin.apiget()) data.update(values) - self._origin.apipost(jl={k: v for k, v in six.iteritems(data) + self._origin.apipost(jl={k: v for k, v in data.items() if k not in self._origin.ignore_fields}, is_idempotent=True) @@ -167,7 +164,7 @@ def iter(self): :return: an iterator over key/value pairs. :rtype: :class:`collections.abc.Iterable` """ - return six.iteritems(next(self._origin.apiget())) + return iter(next(self._origin.apiget()).items()) def _format_iter_filters(params): @@ -179,7 +176,7 @@ def _format_iter_filters(params): if filters and isinstance(filters, list): filter_data = [] for elem in params.pop('filter'): - if isinstance(elem, six.string_types): + if isinstance(elem, str): filter_data.append(elem) elif isinstance(elem, (list, tuple)): filter_data.append(json.dumps(elem)) diff --git a/scrapinghub/client/requests.py b/scrapinghub/client/requests.py index 1ff227ea..2acdf5d5 100644 --- a/scrapinghub/client/requests.py +++ b/scrapinghub/client/requests.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from .proxy import _ItemsResourceProxy, _DownloadableProxyMixin diff --git a/scrapinghub/client/samples.py b/scrapinghub/client/samples.py index 87a8e9bc..21e84616 100644 --- a/scrapinghub/client/samples.py +++ b/scrapinghub/client/samples.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from .proxy import _ItemsResourceProxy diff --git a/scrapinghub/client/spiders.py b/scrapinghub/client/spiders.py index 1d665801..ffd45c72 100644 --- a/scrapinghub/client/spiders.py +++ b/scrapinghub/client/spiders.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from requests.compat import urljoin from .exceptions import NotFound, _wrap_http_errors @@ -7,7 +5,7 @@ from .utils import get_tags_for_update -class Spiders(object): +class Spiders: """Class to work with a collection of project spiders. Not a public constructor: use :class:`~scrapinghub.client.projects.Project` @@ -45,7 +43,7 @@ def get(self, spider, **params): project = self._client._hsclient.get_project(self.project_id) spider_id = project.ids.spider(spider, **params) if spider_id is None: - raise NotFound("Spider {} doesn't exist.".format(spider)) + raise NotFound(f"Spider {spider} doesn't exist.") return Spider(self._client, self.project_id, spider_id, spider) def list(self): @@ -75,7 +73,7 @@ def iter(self): return iter(self.list()) -class Spider(object): +class Spider: """Class representing a Spider object. Not a public constructor: use :class:`Spiders` instance to get @@ -97,7 +95,7 @@ class Spider(object): def __init__(self, client, project_id, spider_id, spider): self.project_id = project_id - self.key = '{}/{}'.format(str(project_id), str(spider_id)) + self.key = f'{str(project_id)}/{str(spider_id)}' self._id = str(spider_id) self.name = spider self.jobs = Jobs(client, project_id, self) @@ -124,7 +122,7 @@ def list_tags(self): :return: a list of spider tags. :rtype: :class:`list[str]` """ - path = 'v2/projects/{}/spiders/{}'.format(self.project_id, self._id) + path = f'v2/projects/{self.project_id}/spiders/{self._id}' url = urljoin(self._client._connection.url, path) response = self._client._connection._session.get(url) response.raise_for_status() diff --git a/scrapinghub/client/utils.py b/scrapinghub/client/utils.py index e78d848d..5d103a87 100644 --- a/scrapinghub/client/utils.py +++ b/scrapinghub/client/utils.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import os import json import logging @@ -7,10 +5,8 @@ import warnings from codecs import decode -import six - -class LogLevel(object): +class LogLevel: DEBUG = logging.DEBUG INFO = logging.INFO WARNING = logging.WARNING @@ -19,7 +15,7 @@ class LogLevel(object): SILENT = CRITICAL + 1 -class JobKey(object): +class JobKey: def __init__(self, project_id, spider_id, job_id): self.project_id = project_id @@ -27,7 +23,7 @@ def __init__(self, project_id, spider_id, job_id): self.job_id = job_id def __str__(self): - return '{}/{}/{}'.format(self.project_id, self.spider_id, self.job_id) + return f'{self.project_id}/{self.spider_id}/{self.job_id}' def parse_project_id(project_id): @@ -53,7 +49,7 @@ def parse_job_key(job_key): """ if isinstance(job_key, tuple): parts = job_key - elif isinstance(job_key, six.string_types): + elif isinstance(job_key, str): parts = job_key.split('/') else: raise ValueError("Job key should be a string or a tuple, got {}: {}" @@ -118,12 +114,12 @@ def parse_auth(auth): "nor SHUB_JOBAUTH environment variables is set") if isinstance(auth, tuple): - all_strings = all(isinstance(k, six.string_types) for k in auth) + all_strings = all(isinstance(k, str) for k in auth) if len(auth) != 2 or not all_strings: raise ValueError("Wrong authentication credentials") return auth - if not isinstance(auth, six.string_types): + if not isinstance(auth, str): raise ValueError("Wrong authentication credentials") jwt_auth = _search_for_jwt_credentials(auth) @@ -140,7 +136,7 @@ def _search_for_jwt_credentials(auth): except (binascii.Error, TypeError): return try: - if not isinstance(decoded_auth, six.string_types): + if not isinstance(decoded_auth, str): decoded_auth = decoded_auth.decode('ascii') login, _, password = decoded_auth.partition(':') if password and parse_job_key(login): diff --git a/scrapinghub/hubstorage/batchuploader.py b/scrapinghub/hubstorage/batchuploader.py index c0601cd6..72844a58 100644 --- a/scrapinghub/hubstorage/batchuploader.py +++ b/scrapinghub/hubstorage/batchuploader.py @@ -3,9 +3,7 @@ import random import logging import warnings -import six -from six.moves import range -from six.moves.queue import Queue +from queue import Queue from io import BytesIO from gzip import GzipFile from itertools import count @@ -18,7 +16,7 @@ logger = logging.getLogger('hubstorage.batchuploader') -class BatchUploader(object): +class BatchUploader: # Wait time between all batches status checks worker_loop_delay = 1.0 @@ -167,9 +165,9 @@ def _tryupload(self, batch): '[HTTP error %s] %s\n%s', url, offset, r.status_code, r.reason, r.text.rstrip()) return r - except (socket.error, requests.RequestException) as e: + except (OSError, requests.RequestException) as e: if isinstance(e, requests.HTTPError): - emsg = "[HTTP error {0}] {1}".format(e.response.status_code, + emsg = "[HTTP error {}] {}".format(e.response.status_code, e.response.text.rstrip()) else: emsg = str(e) @@ -201,7 +199,7 @@ class ValueTooLarge(ValueError): """Raised when a serialized item is greater than 1MB""" -class _BatchWriter(object): +class _BatchWriter: #: Truncate overly big items to that many bytes for the error message. ERRMSG_DATA_TRUNCATION_LEN = 1024 @@ -257,7 +255,7 @@ def __str__(self): def _encode_identity(iterable): data = BytesIO() for item in iterable: - if isinstance(item, six.text_type): + if isinstance(item, str): item = item.encode('utf8') data.write(item) data.write(b'\n') @@ -268,7 +266,7 @@ def _encode_gzip(iterable): data = BytesIO() with GzipFile(fileobj=data, mode='w') as gzo: for item in iterable: - if isinstance(item, six.text_type): + if isinstance(item, str): item = item.encode('utf8') gzo.write(item) gzo.write(b'\n') diff --git a/scrapinghub/hubstorage/client.py b/scrapinghub/hubstorage/client.py index 508c9445..2af04a45 100644 --- a/scrapinghub/hubstorage/client.py +++ b/scrapinghub/hubstorage/client.py @@ -45,7 +45,7 @@ def _get_package_version(): return __version__ -class HubstorageClient(object): +class HubstorageClient: DEFAULT_ENDPOINT = 'https://storage.scrapinghub.com/' DEFAULT_USER_AGENT = 'python-scrapinghub/{version}'.format( diff --git a/scrapinghub/hubstorage/collectionsrt.py b/scrapinghub/hubstorage/collectionsrt.py index 4818b19a..fa2d0cae 100644 --- a/scrapinghub/hubstorage/collectionsrt.py +++ b/scrapinghub/hubstorage/collectionsrt.py @@ -104,7 +104,7 @@ def count(self, _type, _name, **params): def _validate_collection(self, coltype, colname): if coltype not in {'s', 'cs', 'vs', 'vcs'}: - raise ValueError('Invalid collection type: {}'.format(coltype)) + raise ValueError(f'Invalid collection type: {coltype}') if not re.match(r'^\w+$', colname): raise ValueError('Invalid collection name {!r}, only alphanumeric ' @@ -134,7 +134,7 @@ def _batch(self, method, path, total_param, progress=None, **params): raise -class Collection(object): +class Collection: def __init__(self, coltype, colname, collections): self.coltype = coltype diff --git a/scrapinghub/hubstorage/frontier.py b/scrapinghub/hubstorage/frontier.py index e82f2d90..1f9517e4 100644 --- a/scrapinghub/hubstorage/frontier.py +++ b/scrapinghub/hubstorage/frontier.py @@ -1,4 +1,3 @@ - from .resourcetype import ResourceType from .utils import urlpathjoin @@ -17,7 +16,7 @@ class Frontier(ResourceType): def __init__(self, *a, **kw): self._writers = {} # dict of writers indexed by (frontier, slot) self.newcount = 0 - super(Frontier, self).__init__(*a, **kw) + super().__init__(*a, **kw) def _get_writer(self, frontier, slot): key = (frontier, slot) diff --git a/scrapinghub/hubstorage/job.py b/scrapinghub/hubstorage/job.py index 0939b249..eb406d68 100644 --- a/scrapinghub/hubstorage/job.py +++ b/scrapinghub/hubstorage/job.py @@ -5,7 +5,7 @@ from .jobq import JobQ -class Job(object): +class Job: def __init__(self, client, key, auth=None, jobauth=None, metadata=None): self.key = urlpathjoin(key) @@ -45,7 +45,7 @@ def purged(self): class JobMeta(MappingResourceType): resource_type = 'jobs' - ignore_fields = set(('auth', '_key', 'state')) + ignore_fields = {'auth', '_key', 'state'} def authtoken(self): return self.liveget('auth') diff --git a/scrapinghub/hubstorage/jobq.py b/scrapinghub/hubstorage/jobq.py index bae12a21..14aa73ad 100644 --- a/scrapinghub/hubstorage/jobq.py +++ b/scrapinghub/hubstorage/jobq.py @@ -125,8 +125,7 @@ def delete(self, job, **params): def _jobkeys(self, job): if isinstance(job, list): for x in job: - for k in self._jobkeys(x): - yield k + yield from self._jobkeys(x) elif isinstance(job, dict): yield job['key'] elif hasattr(job, 'key'): diff --git a/scrapinghub/hubstorage/project.py b/scrapinghub/hubstorage/project.py index 9d8fa3ac..fe6eef52 100644 --- a/scrapinghub/hubstorage/project.py +++ b/scrapinghub/hubstorage/project.py @@ -9,7 +9,7 @@ from .utils import urlpathjoin, xauth -class Project(object): +class Project: def __init__(self, client, projectid, auth=None): self.client = client diff --git a/scrapinghub/hubstorage/resourcetype.py b/scrapinghub/hubstorage/resourcetype.py index 3ad8f545..e59b19c0 100644 --- a/scrapinghub/hubstorage/resourcetype.py +++ b/scrapinghub/hubstorage/resourcetype.py @@ -3,9 +3,8 @@ import socket import time -import six import requests.exceptions as rexc -from six.moves import range, collections_abc +from collections.abc import MutableMapping from .utils import urlpathjoin, xauth from .serialization import jlencode, jldecode, mpdecode @@ -16,7 +15,7 @@ STATS_CHUNK_SIZE = 512 * 1024 -class ResourceType(object): +class ResourceType: resource_type = None key_suffix = None @@ -68,8 +67,7 @@ def _iter_lines(self, _path, **kwargs): r = self.client.request(**kwargs) lines = r.iter_lines(chunk_size=chunk_size) - if six.PY3: - return (l.decode(r.encoding or 'utf8') for l in lines) + return (l.decode(r.encoding or 'utf8') for l in lines) return lines def apirequest(self, _path=None, **kwargs): @@ -136,7 +134,7 @@ def _retry(self, iter_callback, resume=False, _path=None, requests_params=None, yield chunk offset += 1 break - except (ValueError, socket.error, rexc.RequestException) as exc: + except (ValueError, OSError, rexc.RequestException) as exc: # catch requests exceptions other than HTTPError if isinstance(exc, rexc.HTTPError): raise @@ -159,9 +157,8 @@ def iter_msgpack(self, _path=None, requests_params=None, **apiparams): requests_params.setdefault('stream', True) requests_params.setdefault('is_idempotent', True) requests_params = self._enforce_msgpack(**requests_params) - for chunk in self._retry(self._iter_content, False, _path, - requests_params, **apiparams): - yield chunk + yield from self._retry(self._iter_content, False, _path, + requests_params, **apiparams) def iter_json(self, _path=None, requests_params=None, **apiparams): """Reliably iterate through all data as json strings""" @@ -169,9 +166,8 @@ def iter_json(self, _path=None, requests_params=None, **apiparams): requests_params.setdefault('method', 'GET') requests_params.setdefault('stream', True) requests_params.setdefault('is_idempotent', True) - for line in self._retry(self._iter_lines, True, _path, requests_params, - **apiparams): - yield line + yield from self._retry(self._iter_lines, True, _path, requests_params, + **apiparams) class ItemsResourceType(ResourceType): @@ -229,7 +225,7 @@ def stats(self): return next(self.apiget('stats', chunk_size=STATS_CHUNK_SIZE)) -class MappingResourceType(ResourceType, collections_abc.MutableMapping): +class MappingResourceType(ResourceType, MutableMapping): _cached = None ignore_fields = () @@ -237,13 +233,13 @@ class MappingResourceType(ResourceType, collections_abc.MutableMapping): def __init__(self, *a, **kw): self._cached = kw.pop('cached', None) self._deleted = set() - super(MappingResourceType, self).__init__(*a, **kw) + super().__init__(*a, **kw) def __str__(self): return str(self._data) def __repr__(self): - return '{}({})'.format(self.__class__.__name__, repr(self._data)) + return f'{self.__class__.__name__}({repr(self._data)})' @property def _data(self): @@ -267,7 +263,7 @@ def save(self): if not self.ignore_fields: self.apipost(jl=self._data, is_idempotent=True) else: - self.apipost(jl={k: v for k, v in six.iteritems(self._data) + self.apipost(jl={k: v for k, v in self._data.items() if k not in self.ignore_fields}, is_idempotent=True) diff --git a/scrapinghub/hubstorage/serialization.py b/scrapinghub/hubstorage/serialization.py index c64b5ef5..68cb1e33 100644 --- a/scrapinghub/hubstorage/serialization.py +++ b/scrapinghub/hubstorage/serialization.py @@ -1,4 +1,3 @@ -import six from json import dumps, loads from datetime import datetime @@ -14,9 +13,9 @@ def jlencode(iterable): - if isinstance(iterable, (dict, six.string_types)): + if isinstance(iterable, (dict, str)): iterable = [iterable] - return u'\n'.join(jsonencode(o) for o in iterable) + return '\n'.join(jsonencode(o) for o in iterable) def jldecode(lineiterable): @@ -30,8 +29,7 @@ def mpdecode(iterable): unpacker.feed(chunk) # Each chunk can have none or many objects, # so here we dispatch any object ready - for obj in unpacker: - yield obj + yield from unpacker def jsonencode(o): @@ -49,4 +47,4 @@ def jsondefault(o): d = delta.days return (u + (s + d * ADAYINSECONDS) * 1e6) // 1000 else: - return six.text_type(o) + return str(o) diff --git a/scrapinghub/hubstorage/utils.py b/scrapinghub/hubstorage/utils.py index 9ac2e3c9..d808ba89 100644 --- a/scrapinghub/hubstorage/utils.py +++ b/scrapinghub/hubstorage/utils.py @@ -1,6 +1,5 @@ -import six import time -from six.moves.queue import Empty +from queue import Empty def urlpathjoin(*parts): @@ -36,10 +35,10 @@ def urlpathjoin(*parts): continue elif isinstance(p, tuple): p = urlpathjoin(*p) - elif not isinstance(p, six.text_type): - p = six.text_type(p) + elif not isinstance(p, str): + p = str(p) - url = p if url is None else u'{0}/{1}'.format(url.rstrip(u'/'), p) + url = p if url is None else '{}/{}'.format(url.rstrip('/'), p) return url @@ -75,12 +74,12 @@ def millitime(*a, **kw): return int(ts * 1000) -class iterqueue(object): +class iterqueue: """Iterate a queue til a maximum number of messages are read or the queue is empty it exposes an attribute "count" with the number of messages read - >>> from six.moves.queue import Queue + >>> from queue import Queue >>> q = Queue() >>> for x in range(10): ... q.put(x) @@ -146,6 +145,6 @@ def sizeof_fmt(num): """ for unit in ['B', 'KiB', 'MiB']: if abs(num) < 1024.0: - return "%.0f %s" % (num, unit) + return f"{num:.0f} {unit}" num /= 1024.0 - return "%.0f %s" % (num, 'GiB') + return "{:.0f} {}".format(num, 'GiB') diff --git a/scrapinghub/legacy.py b/scrapinghub/legacy.py index 30789185..d1abc1dc 100644 --- a/scrapinghub/legacy.py +++ b/scrapinghub/legacy.py @@ -2,7 +2,6 @@ Scrapinghub API Client Library """ -from __future__ import division, print_function, absolute_import import os import sys import json @@ -26,7 +25,7 @@ logger = logging.getLogger('scrapinghub') -class Connection(object): +class Connection: """Main class to access Scrapinghub API. """ @@ -85,7 +84,7 @@ def _create_session(self): s = session() s.auth = (self.apikey, self.password) s.headers.update({ - 'User-Agent': 'python-scrapinghub/{0}'.format(__version__), + 'User-Agent': f'python-scrapinghub/{__version__}', }) # For python-requests >= 1.x s.stream = True @@ -100,10 +99,10 @@ def _build_url(self, method, format): try: base_path = self.API_METHODS[method] except KeyError: - raise APIError("Unknown method: {0}".format(method), + raise APIError(f"Unknown method: {method}", _type=APIError.ERR_VALUE_ERROR) else: - path = "{0}.{1}".format(base_path, format) + path = f"{base_path}.{format}" return urljoin(self.url, path) def _get(self, method, format, params=None, headers=None, raw=False): @@ -111,7 +110,7 @@ def _get(self, method, format, params=None, headers=None, raw=False): from requests.compat import urlencode url = self._build_url(method, format) if params: - url = "{0}?{1}".format(url, urlencode(params, True)) + url = f"{url}?{urlencode(params, True)}" return self._request(url, None, headers, format, raw) def _post(self, method, format, params=None, headers=None, raw=False, files=None): @@ -194,7 +193,7 @@ def project_names(self): return self.project_ids() -class RequestProxyMixin(object): +class RequestProxyMixin: def _add_params(self, params): return params @@ -271,8 +270,8 @@ def __init__(self, project, **params): self._jobs = None def __repr__(self): - params = ', '.join("{0}={1}".format(*i) for i in self.params.items()) - return "JobSet({0.project!r}, {1})".format(self, params) + params = ', '.join("{}={}".format(*i) for i in self.params.items()) + return f"JobSet({self.project!r}, {params})" def __iter__(self): self._load_jobs() @@ -311,7 +310,7 @@ def _load_jobs(self): # jl status expected is only "ok" status = status_line.get('status', '') if status != 'ok': - raise APIError("Unknown response status: {0}".format(status)) + raise APIError(f"Unknown response status: {status}") self._jobs = result @@ -360,7 +359,7 @@ def items(self, offset=0, count=None, meta=None): yield item retrieved += 1 break - except (ValueError, socket.error, requests.RequestException, httplib.HTTPException) as exc: + except (ValueError, OSError, requests.RequestException, httplib.HTTPException) as exc: lastexc = exc params['offset'] += retrieved if 'count' in params: @@ -420,5 +419,5 @@ class APIError(Exception): ERR_SERVER_ERROR = "err_server_error" def __init__(self, message, _type=None): - super(APIError, self).__init__(message) + super().__init__(message) self._type = _type or self.ERR_DEFAULT diff --git a/setup.py b/setup.py index ae5ce0e8..6da86e68 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ platforms=['Any'], packages=['scrapinghub', 'scrapinghub.client', 'scrapinghub.hubstorage'], package_data={'scrapinghub': ['VERSION']}, - install_requires=['requests>=1.0', 'retrying>=1.3.3', 'six>=1.10.0'], + install_requires=['requests>=1.0', 'retrying>=1.3.3'], extras_require={'msgpack': mpack_required}, python_requires='>=3.8', classifiers=[ diff --git a/tests/client/test_collections.py b/tests/client/test_collections.py index 775d8fb8..36aba643 100644 --- a/tests/client/test_collections.py +++ b/tests/client/test_collections.py @@ -1,7 +1,6 @@ from contextlib import closing import pytest -from six.moves import range from scrapinghub.client.exceptions import BadRequest from scrapinghub.client.exceptions import NotFound diff --git a/tests/client/test_frontiers.py b/tests/client/test_frontiers.py index b20cffb7..6134b8dc 100644 --- a/tests/client/test_frontiers.py +++ b/tests/client/test_frontiers.py @@ -1,8 +1,7 @@ import time from types import GeneratorType -from six import string_types -from six.moves import collections_abc +from collections.abc import Iterable from scrapinghub.client.frontiers import Frontiers, Frontier, FrontierSlot from ..conftest import TEST_FRONTIER_SLOT @@ -36,7 +35,7 @@ def test_frontiers(project, frontier, frontier_name): # test for iter() method frontiers_names = frontiers.iter() - assert isinstance(frontiers_names, collections_abc.Iterable) + assert isinstance(frontiers_names, Iterable) assert frontier_name in list(frontiers_names) # test for list() method @@ -58,7 +57,7 @@ def test_frontier(project, frontier): _add_test_requests_to_frontier(frontier) slots = frontier.iter() - assert isinstance(slots, collections_abc.Iterable) + assert isinstance(slots, Iterable) assert TEST_FRONTIER_SLOT in list(slots) slots = frontier.list() @@ -84,7 +83,7 @@ def test_frontier_slot(project, frontier): assert len(batches) == 1 assert isinstance(batches[0], dict) assert sorted(batches[0].keys()) == ['id', 'requests'] - assert isinstance(batches[0]['id'], string_types) + assert isinstance(batches[0]['id'], str) requests = batches[0]['requests'] assert len(requests) == 2 assert requests == [['/some/path.html', None], diff --git a/tests/client/test_items.py b/tests/client/test_items.py index 79f68c33..c3547fa4 100644 --- a/tests/client/test_items.py +++ b/tests/client/test_items.py @@ -1,5 +1,4 @@ import pytest -from six.moves import range from .utils import normalize_job_for_tests diff --git a/tests/client/test_job.py b/tests/client/test_job.py index 7c36d12a..b1681604 100644 --- a/tests/client/test_job.py +++ b/tests/client/test_job.py @@ -1,5 +1,5 @@ import pytest -from six.moves import collections_abc +from collections.abc import Iterator from scrapinghub.client.items import Items from scrapinghub.client.jobs import Job @@ -223,7 +223,7 @@ def test_metadata_delete(spider): def test_metadata_iter_list(spider): job = spider.jobs.run(meta={'meta1': 'data1', 'meta2': 'data2'}) meta_iter = job.metadata.iter() - assert isinstance(meta_iter, collections_abc.Iterator) + assert isinstance(meta_iter, Iterator) meta_list = job.metadata.list() assert ('meta1', 'data1') in meta_list assert ('meta2', 'data2') in meta_list diff --git a/tests/client/test_projects.py b/tests/client/test_projects.py index 65eb831e..4127a7ef 100644 --- a/tests/client/test_projects.py +++ b/tests/client/test_projects.py @@ -1,10 +1,10 @@ import types from collections import defaultdict +from collections.abc import Iterator import pytest import responses from requests.compat import urljoin -from six.moves import range, collections_abc from scrapinghub import ScrapinghubClient from scrapinghub.client.activity import Activity @@ -288,7 +288,7 @@ def test_settings_delete(project): def test_settings_iter_list(project): project.settings.set('job_runtime_limit', 24) settings_iter = project.settings.iter() - assert isinstance(settings_iter, collections_abc.Iterator) + assert isinstance(settings_iter, Iterator) settings_list = project.settings.list() assert ('job_runtime_limit', 24) in settings_list assert settings_list == list(settings_iter) diff --git a/tests/client/test_proxy.py b/tests/client/test_proxy.py index dfde07f3..1b9925cc 100644 --- a/tests/client/test_proxy.py +++ b/tests/client/test_proxy.py @@ -1,4 +1,4 @@ -import mock +from unittest import mock import pytest from scrapinghub.client.proxy import _format_iter_filters diff --git a/tests/client/test_spiders.py b/tests/client/test_spiders.py index 48ca2832..15a5a8c4 100644 --- a/tests/client/test_spiders.py +++ b/tests/client/test_spiders.py @@ -2,8 +2,6 @@ from collections import defaultdict import pytest -from six import string_types -from six.moves import range from scrapinghub.client.exceptions import DuplicateJobError from scrapinghub.client.exceptions import BadRequest @@ -32,8 +30,8 @@ def test_spiders_list(project): def test_spider_base(project, spider): - assert isinstance(spider._id, string_types) - assert isinstance(spider.key, string_types) + assert isinstance(spider._id, str) + assert isinstance(spider.key, str) assert spider.key == spider.project_id + '/' + spider._id assert spider.name == TEST_SPIDER_NAME assert spider.project_id == TEST_PROJECT_ID diff --git a/tests/client/test_utils.py b/tests/client/test_utils.py index 253e1b4a..2383d5b3 100644 --- a/tests/client/test_utils.py +++ b/tests/client/test_utils.py @@ -2,7 +2,7 @@ import pytest from codecs import encode -import mock +from unittest import mock from scrapinghub.client.utils import parse_auth, parse_job_key diff --git a/tests/conftest.py b/tests/conftest.py index 59ad94f2..d0cc9702 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -43,7 +43,7 @@ def upgrade_cassette(cassette): -class VCRGzipSerializer(object): +class VCRGzipSerializer: """Custom ZIP serializer for VCR.py.""" def serialize(self, cassette_dict): @@ -93,7 +93,7 @@ def normalize_cassette(cassette_dict): interaction['request']['headers']['Authorization'] = ( 'Basic {}'.format( base64.b64encode( - '{}:'.format(DEFAULT_ADMIN_AUTH).encode('utf-8') + f'{DEFAULT_ADMIN_AUTH}:'.encode() ).decode('utf-8') ) ) diff --git a/tests/hubstorage/test_activity.py b/tests/hubstorage/test_activity.py index d874f922..0ddd5834 100644 --- a/tests/hubstorage/test_activity.py +++ b/tests/hubstorage/test_activity.py @@ -1,12 +1,11 @@ """ Test Activity """ -from six.moves import range def test_post_and_reverse_get(hsproject): # make some sample data - orig_data = [{u'foo': 42, u'counter': i} for i in range(20)] + orig_data = [{'foo': 42, 'counter': i} for i in range(20)] data1 = orig_data[:10] data2 = orig_data[10:] diff --git a/tests/hubstorage/test_batchuploader.py b/tests/hubstorage/test_batchuploader.py index 435bee24..2d850c54 100644 --- a/tests/hubstorage/test_batchuploader.py +++ b/tests/hubstorage/test_batchuploader.py @@ -3,7 +3,6 @@ """ import time import pytest -from six.moves import range from collections import defaultdict from scrapinghub.hubstorage import ValueTooLarge diff --git a/tests/hubstorage/test_client.py b/tests/hubstorage/test_client.py index d6533c31..4489f762 100644 --- a/tests/hubstorage/test_client.py +++ b/tests/hubstorage/test_client.py @@ -28,15 +28,15 @@ def test_push_job(hsclient, hsproject): ) job = start_job(hsproject) meta = job.metadata - assert meta.get('state') == u'running', hsclient.auth - assert meta.get('foo') == u'baz' + assert meta.get('state') == 'running', hsclient.auth + assert meta.get('foo') == 'baz' hsproject.jobq.finish(job) hsproject.jobq.delete(job) # job auth token is valid only while job is running meta = hsclient.get_job(job.key).metadata - assert meta.get('state') == u'deleted' - assert meta.get('foo') == u'baz' + assert meta.get('state') == 'deleted' + assert meta.get('foo') == 'baz' def test_jobsummaries(hsclient): @@ -45,7 +45,7 @@ def test_jobsummaries(hsclient): def _get_summary(): jss = hsclient.projects.jobsummaries() - mjss = dict((str(js['project']), js) for js in jss) + mjss = {str(js['project']): js for js in jss} return mjss.get(TEST_PROJECT_ID) summary = apipoll(_get_summary) diff --git a/tests/hubstorage/test_collections.py b/tests/hubstorage/test_collections.py index bb26c8c7..5895eb74 100644 --- a/tests/hubstorage/test_collections.py +++ b/tests/hubstorage/test_collections.py @@ -6,7 +6,6 @@ import pytest from scrapinghub import HubstorageClient -from six.moves import range from ..conftest import TEST_COLLECTION_NAME from .testutil import failing_downloader diff --git a/tests/hubstorage/test_frontier.py b/tests/hubstorage/test_frontier.py index fedc154f..9ad76b93 100644 --- a/tests/hubstorage/test_frontier.py +++ b/tests/hubstorage/test_frontier.py @@ -27,7 +27,7 @@ def test_add_read(hsproject, frontier_name): urls = [_get_urls(batch) for batch in frontier.read(frontier_name, TEST_FRONTIER_SLOT)] - expected_urls = [[u'/', u'/index.html', u'/index2.html']] + expected_urls = [['/', '/index.html', '/index2.html']] assert urls == expected_urls @@ -121,7 +121,7 @@ def test_add_extra_params(hsproject, frontier_name): frontier.add(frontier_name, TEST_FRONTIER_SLOT, fps) frontier.flush() - expected_request = [[u'/', {u'a': 1, u'c': 3, u'b': 2}]] + expected_request = [['/', {'a': 1, 'c': 3, 'b': 2}]] batches = list(frontier.read(frontier_name, TEST_FRONTIER_SLOT)) request = batches[0]['requests'] assert request == expected_request diff --git a/tests/hubstorage/test_jobq.py b/tests/hubstorage/test_jobq.py index 8bc41278..b80fe969 100644 --- a/tests/hubstorage/test_jobq.py +++ b/tests/hubstorage/test_jobq.py @@ -2,9 +2,7 @@ Test JobQ """ import os -import six import pytest -from six.moves import range from scrapinghub.hubstorage.jobq import DuplicateJobError from scrapinghub.hubstorage.utils import apipoll @@ -24,28 +22,28 @@ def test_push(hsclient, hsproject): assert 'auth' in qjob, qjob job = hsclient.get_job(qjob['key']) - assert job.metadata.get('state') == u'pending' + assert job.metadata.get('state') == 'pending' assert job.metadata.get('spider') == TEST_SPIDER_NAME assert job.metadata.get('auth') == qjob['auth'] jobq.start(job) job.metadata.expire() - assert job.metadata.get('state') == u'running' + assert job.metadata.get('state') == 'running' jobq.finish(job) job.metadata.expire() - assert job.metadata.get('state') == u'finished' + assert job.metadata.get('state') == 'finished' jobq.delete(job) job.metadata.expire() - assert job.metadata.get('state') == u'deleted' + assert job.metadata.get('state') == 'deleted' def test_push_with_extras(hsclient, hsproject): qjob = hsproject.jobq.push(TEST_SPIDER_NAME, foo='bar', baz='fuu') job = hsclient.get_job(qjob['key']) - assert job.metadata.get('foo') == u'bar' - assert job.metadata.get('baz') == u'fuu' + assert job.metadata.get('foo') == 'bar' + assert job.metadata.get('baz') == 'fuu' def test_push_with_priority(hsclient, hsproject): @@ -60,7 +58,7 @@ def test_push_with_state(hsclient, hsproject): assert 'key' in qjob, qjob assert 'auth' in qjob, qjob job = hsclient.get_job(qjob['key']) - assert job.metadata.get('state') == u'running' + assert job.metadata.get('state') == 'running' def test_push_with_unique(hsproject): @@ -89,10 +87,10 @@ def test_startjob(hsproject): assert nj.pop('pending_time', None), nj assert nj.pop('running_time', None), nj assert nj.pop('auth', None), nj - assert nj[u'key'] == qj['key'] - assert nj[u'spider'] == TEST_SPIDER_NAME - assert nj[u'state'] == u'running' - assert nj[u'priority'] == jobq.PRIO_NORMAL + assert nj['key'] == qj['key'] + assert nj['spider'] == TEST_SPIDER_NAME + assert nj['state'] == 'running' + assert nj['priority'] == jobq.PRIO_NORMAL def test_startjob_with_extras(hsproject): @@ -110,10 +108,10 @@ def test_startjob_with_extras(hsproject): 'nil': None, } qj = jobq.push(TEST_SPIDER_NAME, **pushextras) - startextras = dict(('s_' + k, v) for k, v in six.iteritems(pushextras)) + startextras = {'s_' + k: v for k, v in pushextras.items()} nj = jobq.start(**startextras) assert qj['key'] == nj['key'] - for k, v in six.iteritems(dict(pushextras, **startextras)): + for k, v in dict(pushextras, **startextras).items(): if type(v) is float: assert abs(nj.get(k) - v) < 0.0001 else: @@ -136,8 +134,8 @@ def test_summary(hsproject): jobq.push(TEST_SPIDER_NAME) jobq.push(TEST_SPIDER_NAME, state='running') jobq.push(TEST_SPIDER_NAME, state='finished') - summaries = dict((s['name'], s) for s in jobq.summary()) - assert set(summaries), set(['pending', 'running', 'finished']) + summaries = {s['name']: s for s in jobq.summary()} + assert set(summaries), {'pending', 'running', 'finished'} assert jobq.summary('pending') assert jobq.summary('running') assert jobq.summary('finished') @@ -271,11 +269,10 @@ def test_list_with_startts_endts(hsproject): def test_spider_updates(hsproject, hsspiderid): jobq = hsproject.jobq - spiderkey = '%s/%s' % (TEST_PROJECT_ID, hsspiderid) + spiderkey = f'{TEST_PROJECT_ID}/{hsspiderid}' def finish_and_delete_jobs(): - for job in jobq.finish(spiderkey): - yield job + yield from jobq.finish(spiderkey) jobq.delete(spiderkey) q1 = jobq.push(TEST_SPIDER_NAME) @@ -283,7 +280,7 @@ def finish_and_delete_jobs(): q3 = jobq.push(TEST_SPIDER_NAME, state='finished') q4 = jobq.push(TEST_SPIDER_NAME, state='deleted') - r = dict((x['key'], x['prevstate']) for x in finish_and_delete_jobs()) + r = {x['key']: x['prevstate'] for x in finish_and_delete_jobs()} assert r.get(q1['key']) == 'pending', r assert r.get(q2['key']) == 'running', r assert r.get(q3['key']) == 'finished', r diff --git a/tests/hubstorage/test_jobsmeta.py b/tests/hubstorage/test_jobsmeta.py index 6f5ff7db..6514f6c6 100644 --- a/tests/hubstorage/test_jobsmeta.py +++ b/tests/hubstorage/test_jobsmeta.py @@ -9,7 +9,7 @@ def _assertMetadata(meta1, meta2): def _clean(m): - return dict((k, v) for k, v in m.items() if k != 'updated_time') + return {k: v for k, v in m.items() if k != 'updated_time'} meta1 = _clean(meta1) meta2 = _clean(meta2) diff --git a/tests/hubstorage/test_project.py b/tests/hubstorage/test_project.py index 4d849e4b..9415af73 100644 --- a/tests/hubstorage/test_project.py +++ b/tests/hubstorage/test_project.py @@ -1,10 +1,8 @@ """ Test Project """ -import six import json import pytest -from six.moves import range from requests.exceptions import HTTPError from scrapinghub import HubstorageClient @@ -20,8 +18,8 @@ def test_projectid(hsclient): p1 = hsclient.get_project(int(TEST_PROJECT_ID)) p2 = hsclient.get_project(str(TEST_PROJECT_ID)) assert p1.projectid == p2.projectid - assert isinstance(p1.projectid, six.text_type) - assert isinstance(p2.projectid, six.text_type) + assert isinstance(p1.projectid, str) + assert isinstance(p2.projectid, str) with pytest.raises(AssertionError): hsclient.get_project('111/3') @@ -76,14 +74,14 @@ def test_get_jobs_with_legacy_filter(hsproject): def test_push_job(hsproject): job = hsproject.push_job(TEST_SPIDER_NAME, state='running', priority=hsproject.jobq.PRIO_HIGH, - foo=u'bar') - assert job.metadata.get('state') == u'running' - assert job.metadata.get('foo') == u'bar' + foo='bar') + assert job.metadata.get('state') == 'running' + assert job.metadata.get('foo') == 'bar' hsproject.jobq.finish(job) hsproject.jobq.delete(job) job.metadata.expire() - assert job.metadata.get('state') == u'deleted' - assert job.metadata.get('foo') == u'bar' + assert job.metadata.get('state') == 'deleted' + assert job.metadata.get('foo') == 'bar' def test_auth(hsclient, json_and_msgpack): @@ -197,20 +195,20 @@ def test_requests(hsproject): job.requests.close() rr = job.requests.list() assert next(rr) == { - u'status': 200, u'rs': 1337, - u'url': u'http://test.com/', u'time': ts, - u'duration': 5, u'method': u'GET', + 'status': 200, 'rs': 1337, + 'url': 'http://test.com/', 'time': ts, + 'duration': 5, 'method': 'GET', } assert next(rr) == { - u'status': 400, u'parent': 0, u'rs': 0, - u'url': u'http://test.com/2', u'time': ts + 1, - u'duration': 1, u'method': u'POST', + 'status': 400, 'parent': 0, 'rs': 0, + 'url': 'http://test.com/2', 'time': ts + 1, + 'duration': 1, 'method': 'POST', } assert next(rr) == { - u'status': 400, u'fp': u'1234', u'parent': 0, - u'rs': 0, u'url': u'http://test.com/3', - u'time': ts + 2, u'duration': 1, - u'method': u'PUT', + 'status': 400, 'fp': '1234', 'parent': 0, + 'rs': 0, 'url': 'http://test.com/3', + 'time': ts + 2, 'duration': 1, + 'method': 'PUT', } with pytest.raises(StopIteration): next(rr) diff --git a/tests/hubstorage/test_retry.py b/tests/hubstorage/test_retry.py index 1db07d3d..e2cae934 100644 --- a/tests/hubstorage/test_retry.py +++ b/tests/hubstorage/test_retry.py @@ -6,10 +6,10 @@ import pytest import responses -from mock import patch +from unittest.mock import patch from requests import HTTPError, ConnectionError from scrapinghub import HubstorageClient -from six.moves.http_client import BadStatusLine +from http.client import BadStatusLine from ..conftest import TEST_AUTH, TEST_ENDPOINT from ..conftest import TEST_PROJECT_ID, TEST_SPIDER_NAME @@ -122,7 +122,7 @@ def test_retrier_does_not_catch_unwanted_exception(hsspiderid): # Act job, metadata, err = None, None, None try: - job = client.get_job('%s/%s/%s' % (TEST_PROJECT_ID, hsspiderid, 42)) + job = client.get_job(f'{TEST_PROJECT_ID}/{hsspiderid}/{42}') metadata = dict(job.metadata) except HTTPError as e: err = e @@ -154,7 +154,7 @@ def request_callback(request): if attempts_count[0] <= 2: raise ConnectionError("Connection aborted.", BadStatusLine("''")) if attempts_count[0] == 3: - return err_code, {}, u'' + return err_code, {}, '' else: resp_body = dict(job_metadata) return 200, {}, json.dumps(resp_body) @@ -162,7 +162,7 @@ def request_callback(request): mock_api(callback=request_callback) # Act - job = client.get_job('%s/%s/%s' % (TEST_PROJECT_ID, hsspiderid, 42)) + job = client.get_job(f'{TEST_PROJECT_ID}/{hsspiderid}/{42}') # Assert assert dict(job_metadata) == dict(job.metadata) @@ -184,7 +184,7 @@ def test_api_delete_can_be_set_to_non_idempotent(hsspiderid): mock_api(method=DELETE, callback=callback_delete) # Act - job = client.get_job('%s/%s/%s' % (TEST_PROJECT_ID, hsspiderid, 42)) + job = client.get_job(f'{TEST_PROJECT_ID}/{hsspiderid}/{42}') err = None try: @@ -237,7 +237,7 @@ def test_delete_requests_are_retried(hsspiderid): mock_api(method=DELETE, callback=callback_delete) # Act - job = client.get_job('%s/%s/%s' % (TEST_PROJECT_ID, hsspiderid, 42)) + job = client.get_job(f'{TEST_PROJECT_ID}/{hsspiderid}/{42}') job.metadata['foo'] = 'bar' del job.metadata['foo'] job.metadata.save() @@ -262,7 +262,7 @@ def test_metadata_save_does_retry(hsspiderid): mock_api(method=POST, callback=callback_post) # Act - job = client.get_job('%s/%s/%s' % (TEST_PROJECT_ID, hsspiderid, 42)) + job = client.get_job(f'{TEST_PROJECT_ID}/{hsspiderid}/{42}') job.metadata['foo'] = 'bar' job.metadata.save() @@ -306,7 +306,7 @@ def test_get_job_does_retry(hsspiderid): mock_api(callback=callback) # Act - job = client.get_job('%s/%s/%s' % (TEST_PROJECT_ID, hsspiderid, 42)) + job = client.get_job(f'{TEST_PROJECT_ID}/{hsspiderid}/{42}') # Assert assert dict(job_metadata) == dict(job.metadata) @@ -329,7 +329,7 @@ def test_get_job_does_fails_if_no_retries(hsspiderid): # Act job, metadata, err = None, None, None try: - job = client.get_job('%s/%s/%s' % (TEST_PROJECT_ID, hsspiderid, 42)) + job = client.get_job(f'{TEST_PROJECT_ID}/{hsspiderid}/{42}') metadata = dict(job.metadata) except HTTPError as e: err = e @@ -357,7 +357,7 @@ def test_get_job_does_fails_on_too_many_retries(hsspiderid): # Act job, metadata, err = None, None, None try: - job = client.get_job('%s/%s/%s' % (TEST_PROJECT_ID, hsspiderid, 42)) + job = client.get_job(f'{TEST_PROJECT_ID}/{hsspiderid}/{42}') metadata = dict(job.metadata) except HTTPError as e: err = e diff --git a/tests/hubstorage/test_system.py b/tests/hubstorage/test_system.py index bdbc2152..18165303 100644 --- a/tests/hubstorage/test_system.py +++ b/tests/hubstorage/test_system.py @@ -2,7 +2,6 @@ from contextlib import closing import pytest -from six.moves import range from scrapinghub import HubstorageClient from scrapinghub.hubstorage.utils import millitime diff --git a/tests/legacy/test_connection.py b/tests/legacy/test_connection.py index b5ec68fe..3afd8178 100644 --- a/tests/legacy/test_connection.py +++ b/tests/legacy/test_connection.py @@ -1,6 +1,6 @@ import os import json -import mock +from unittest import mock import pytest import requests @@ -66,7 +66,7 @@ def test_connection_create_session(connection): assert isinstance(session, requests.Session) assert session.auth == ('testkey', '') assert (session.headers.get('User-Agent') == - 'python-scrapinghub/{}'.format(__version__)) + f'python-scrapinghub/{__version__}') assert session.stream assert not session.prefetch diff --git a/tests/legacy/test_job.py b/tests/legacy/test_job.py index 9472683f..8c9004f5 100644 --- a/tests/legacy/test_job.py +++ b/tests/legacy/test_job.py @@ -1,4 +1,4 @@ -import mock +from unittest import mock import pytest import requests diff --git a/tests/legacy/test_jobset.py b/tests/legacy/test_jobset.py index c96da1e7..f603daaf 100644 --- a/tests/legacy/test_jobset.py +++ b/tests/legacy/test_jobset.py @@ -1,4 +1,4 @@ -import mock +from unittest import mock import pytest from scrapinghub import APIError diff --git a/tests/legacy/test_project.py b/tests/legacy/test_project.py index 985b5048..996d5838 100644 --- a/tests/legacy/test_project.py +++ b/tests/legacy/test_project.py @@ -1,4 +1,4 @@ -import mock +from unittest import mock from scrapinghub import Connection from scrapinghub import Job, JobSet diff --git a/tests/legacy/test_proxymixin.py b/tests/legacy/test_proxymixin.py index 9f126027..0ee71b01 100644 --- a/tests/legacy/test_proxymixin.py +++ b/tests/legacy/test_proxymixin.py @@ -1,4 +1,4 @@ -import mock +from unittest import mock def test_requestproxymixin_add_params(proxy_mixin):