|
1 | 1 | """
|
2 | 2 | High level Hubstorage client
|
3 | 3 | """
|
| 4 | +from httplib import BadStatusLine |
| 5 | +import logging |
4 | 6 | import pkgutil
|
5 |
| -from requests import session, adapters |
| 7 | +from requests import session, adapters, HTTPError, ConnectionError, Timeout |
| 8 | +from retrying import Retrying |
6 | 9 | from .utils import xauth, urlpathjoin
|
7 | 10 | from .project import Project
|
8 | 11 | from .job import Job
|
|
14 | 17 | __version__ = pkgutil.get_data('hubstorage', 'VERSION').strip()
|
15 | 18 |
|
16 | 19 |
|
| 20 | +logger = logging.getLogger('HubstorageClient') |
| 21 | + |
| 22 | +_HTTP_ERROR_CODES_TO_RETRY = (408, 429, 503, 504) |
| 23 | + |
| 24 | + |
| 25 | +def _hc_retry_on_exception(err): |
| 26 | + """Callback used by the client to restrict the retry to acceptable errors""" |
| 27 | + if (isinstance(err, HTTPError) and err.response.status_code in _HTTP_ERROR_CODES_TO_RETRY): |
| 28 | + logger.warning("Server failed with %d status code, retrying (maybe)" % (err.response.status_code,)) |
| 29 | + return True |
| 30 | + |
| 31 | + # TODO: python3 compatibility: BadStatusLine error are wrapped differently |
| 32 | + if (isinstance(err, ConnectionError) and err.args[0] == 'Connection aborted.' and |
| 33 | + isinstance(err.args[1], BadStatusLine) and err.args[1][0] == repr('')): |
| 34 | + logger.warning("Protocol failed with BadStatusLine, retrying (maybe)") |
| 35 | + return True |
| 36 | + |
| 37 | + if isinstance(err, Timeout): |
| 38 | + logger.warning("Server connection timeout, retrying (maybe)") |
| 39 | + return True |
| 40 | + |
| 41 | + return False |
| 42 | + |
17 | 43 | class HubstorageClient(object):
|
18 | 44 |
|
19 | 45 | DEFAULT_ENDPOINT = 'http://storage.scrapinghub.com/'
|
20 | 46 | USERAGENT = 'python-hubstorage/{0}'.format(__version__)
|
21 |
| - DEFAULT_TIMEOUT = 60.0 |
22 | 47 |
|
23 |
| - def __init__(self, auth=None, endpoint=None, connection_timeout=None, |
24 |
| - max_retries=0): |
| 48 | + DEFAULT_CONNECTION_TIMEOUT_S = 60.0 |
| 49 | + RETRY_DEFAUT_MAX_RETRY_TIME_S = 60.0 |
| 50 | + |
| 51 | + RETRY_DEFAULT_MAX_RETRIES = 3 |
| 52 | + RETRY_DEFAULT_JITTER_MS = 500 |
| 53 | + RETRY_DEFAULT_EXPONENTIAL_BACKOFF_MS = 500 |
| 54 | + |
| 55 | + def __init__(self, auth=None, endpoint=None, connection_timeout=None, max_retries=None, max_retry_time=None): |
| 56 | + """ |
| 57 | + Note: |
| 58 | + max_retries and max_retry_time change how the client attempt to retry failing requests that are |
| 59 | + idempotent (safe to execute multiple time). |
| 60 | +
|
| 61 | + HubstorageClient(max_retries=3) will retry requests 3 times, no matter the time it takes. |
| 62 | + Use max_retry_time if you want to bound the time spent in retrying. |
| 63 | +
|
| 64 | + By default, requests are retried at most 3 times, during 60 seconds. |
| 65 | +
|
| 66 | + Args: |
| 67 | + auth (str): The client authentication token |
| 68 | + endpoint (str): The API root address |
| 69 | + connection_timeout (int): The connection timeout for a _single request_ |
| 70 | + max_retries (int): The number of time idempotent requests may be retried |
| 71 | + max_retry_time (int): The time, in seconds, during which the client can retry a request |
| 72 | + """ |
25 | 73 | self.auth = xauth(auth)
|
26 | 74 | self.endpoint = endpoint or self.DEFAULT_ENDPOINT
|
27 |
| - self.connection_timeout = connection_timeout or self.DEFAULT_TIMEOUT |
28 |
| - self.session = self._create_session(max_retries) |
| 75 | + self.connection_timeout = connection_timeout or self.DEFAULT_CONNECTION_TIMEOUT_S |
| 76 | + self.session = self._create_session() |
| 77 | + self.retrier = self._create_retrier(max_retries, max_retry_time) |
29 | 78 | self.jobq = JobQ(self, None)
|
30 | 79 | self.projects = Projects(self, None)
|
31 | 80 | self.root = ResourceType(self, None)
|
32 | 81 | self._batchuploader = None
|
33 | 82 |
|
34 |
| - def _create_session(self, max_retries): |
| 83 | + def request(self, is_idempotent=False, **kwargs): |
| 84 | + """ |
| 85 | + Execute an HTTP request with the current client session. |
| 86 | +
|
| 87 | + Use the retry policy configured in the client when is_idempotent is True |
| 88 | + """ |
| 89 | + kwargs.setdefault('timeout', self.connection_timeout) |
| 90 | + |
| 91 | + def invoke_request(): |
| 92 | + r = self.session.request(**kwargs) |
| 93 | + |
| 94 | + if not r.ok: |
| 95 | + logger.debug('%s: %s', r, r.content) |
| 96 | + r.raise_for_status() |
| 97 | + return r |
| 98 | + |
| 99 | + if is_idempotent: |
| 100 | + return self.retrier.call(invoke_request) |
| 101 | + else: |
| 102 | + return invoke_request() |
| 103 | + |
| 104 | + def _create_retrier(self, max_retries, max_retry_time): |
| 105 | + """ |
| 106 | + Create the Retrier object used to process idempotent client requests. |
| 107 | +
|
| 108 | + If only max_retries is set, the default max_retry_time is ignored. |
| 109 | +
|
| 110 | + Args: |
| 111 | + max_retries (int): the number of retries to be attempted |
| 112 | + max_retry_time (int): the number of time, in seconds, to retry for. |
| 113 | + Returns: |
| 114 | + A Retrying instance, that implements a call(func) method. |
| 115 | + """ |
| 116 | + |
| 117 | + # Client sets max_retries only |
| 118 | + if max_retries is not None and max_retry_time is None: |
| 119 | + stop_max_delay = None |
| 120 | + stop_max_attempt_number = max_retries + 1 |
| 121 | + wait_exponential_multiplier = self.RETRY_DEFAULT_EXPONENTIAL_BACKOFF_MS |
| 122 | + else: |
| 123 | + stop_max_delay = (max_retry_time or self.RETRY_DEFAUT_MAX_RETRY_TIME_S) * 1000.0 |
| 124 | + stop_max_attempt_number = (max_retries or self.RETRY_DEFAULT_MAX_RETRIES) + 1 |
| 125 | + |
| 126 | + # Compute the backoff to allow for max_retries queries during the allowed delay |
| 127 | + # Solves the following formula (assumes requests are immediate): |
| 128 | + # max_retry_time = sum(exp_multiplier * 2 ** i) for i from 1 to max_retries + 1 |
| 129 | + wait_exponential_multiplier = stop_max_delay / ((2 ** (stop_max_attempt_number + 1)) - 2) |
| 130 | + |
| 131 | + return Retrying(stop_max_attempt_number=stop_max_attempt_number, |
| 132 | + stop_max_delay=stop_max_delay, |
| 133 | + retry_on_exception=_hc_retry_on_exception, |
| 134 | + wait_exponential_multiplier=wait_exponential_multiplier, |
| 135 | + wait_jitter_max=self.RETRY_DEFAULT_JITTER_MS) |
| 136 | + |
| 137 | + def _create_session(self): |
35 | 138 | s = session()
|
36 |
| - if max_retries > 0: |
37 |
| - a = adapters.HTTPAdapter(max_retries=max_retries) |
38 |
| - s.mount('http://', a) |
39 |
| - s.mount('https://', a) |
40 | 139 | s.headers.update({'User-Agent': self.USERAGENT})
|
41 | 140 | return s
|
42 | 141 |
|
|
0 commit comments