From d10edfbe4b8a15ea40e36bcda193c4c10dbce68e Mon Sep 17 00:00:00 2001 From: Daniel Schneller Date: Fri, 16 Nov 2018 10:26:33 +0100 Subject: [PATCH 1/3] Option to fix "S3ResponseError: 403 Forbidden" The S3 uploader fails if bucket permissions are restricted to only allow accessing certain prefixes in a bucket. The default behavior for boto's "get_bucket()" is to "validate" it by accessing the bucket's root, needlessly breaking the uploader even though all necessary permissions might be present. This patch adds a new command line switch --upload.s3.skip_bucket_validation to disable this behavior. --- mongodb_consistent_backup/Upload/S3/S3.py | 4 +++- .../Upload/S3/S3Session.py | 23 +++++++++++-------- .../Upload/S3/S3UploadPool.py | 6 +++-- .../Upload/S3/__init__.py | 4 ++++ 4 files changed, 24 insertions(+), 13 deletions(-) diff --git a/mongodb_consistent_backup/Upload/S3/S3.py b/mongodb_consistent_backup/Upload/S3/S3.py index 4dff9a45..931e3dc6 100644 --- a/mongodb_consistent_backup/Upload/S3/S3.py +++ b/mongodb_consistent_backup/Upload/S3/S3.py @@ -23,6 +23,7 @@ def __init__(self, manager, config, timer, base_dir, backup_dir, **kwargs): self.chunk_size = self.chunk_size_mb * 1024 * 1024 self.s3_acl = self.config.upload.s3.acl self.key_prefix = base_dir + self.validate_bucket = not self.config.upload.s3.skip_bucket_validation self.threads(self.config.upload.threads) self._pool = None @@ -38,7 +39,8 @@ def __init__(self, manager, config, timer, base_dir, backup_dir, **kwargs): self.threads(), self.remove_uploaded, self.chunk_size, - self.s3_acl + self.s3_acl, + validate_bucket=self.validate_bucket ) def get_key_name(self, file_path): diff --git a/mongodb_consistent_backup/Upload/S3/S3Session.py b/mongodb_consistent_backup/Upload/S3/S3Session.py index 177ff81b..5594412c 100644 --- a/mongodb_consistent_backup/Upload/S3/S3Session.py +++ b/mongodb_consistent_backup/Upload/S3/S3Session.py @@ -8,14 +8,15 @@ class S3Session: - def __init__(self, region, access_key, secret_key, bucket_name, secure=True, num_retries=5, socket_timeout=15): - self.region = region - self.access_key = access_key - self.secret_key = secret_key - self.secure = secure - self.num_retries = num_retries - self.socket_timeout = socket_timeout - + def __init__(self, region, access_key, secret_key, bucket_name, secure=True, num_retries=5, socket_timeout=15, + **kwargs): + self.region = region + self.access_key = access_key + self.secret_key = secret_key + self.secure = secure + self.num_retries = num_retries + self.socket_timeout = socket_timeout + self.validate_bucket = kwargs.get("validate_bucket") # monkey patch for bucket_name with dots # https://github.com/boto/boto/issues/2836 if self.secure and '.' in bucket_name: @@ -77,8 +78,10 @@ def connect(self): def get_bucket(self, bucket_name): try: - logging.debug("Connecting to AWS S3 Bucket: %s" % bucket_name) - return self._conn.get_bucket(bucket_name) + logging.debug("Connecting to AWS S3 Bucket: %s (%s validation)" % (bucket_name, + "with" if self.validate_bucket + else "without")) + return self._conn.get_bucket(bucket_name, validate=self.validate_bucket) except boto.exception.S3ResponseError, e: if self.is_forbidden_error(e): logging.error("Got forbidden error from AWS S3 for bucket %s! Please check your access/secret key" % bucket_name) diff --git a/mongodb_consistent_backup/Upload/S3/S3UploadPool.py b/mongodb_consistent_backup/Upload/S3/S3UploadPool.py index 96beaffd..d60e0b6e 100644 --- a/mongodb_consistent_backup/Upload/S3/S3UploadPool.py +++ b/mongodb_consistent_backup/Upload/S3/S3UploadPool.py @@ -28,7 +28,7 @@ def _reduce_method(m): class S3UploadPool(): - def __init__(self, bucket_name, region, access_key, secret_key, threads=4, remove_uploaded=False, chunk_bytes=50 * 1024 * 1024, key_acl=None): + def __init__(self, bucket_name, region, access_key, secret_key, threads=4, remove_uploaded=False, chunk_bytes=50 * 1024 * 1024, key_acl=None, **kwargs): self.bucket_name = bucket_name self.region = region self.access_key = access_key @@ -37,6 +37,7 @@ def __init__(self, bucket_name, region, access_key, secret_key, threads=4, remov self.remove_uploaded = remove_uploaded self.chunk_bytes = chunk_bytes self.key_acl = key_acl + self.validate_bucket = kwargs.get("validate_bucket") self.multipart_min_bytes = 5242880 @@ -46,7 +47,8 @@ def __init__(self, bucket_name, region, access_key, secret_key, threads=4, remov self._pool = Pool(processes=self.threads) try: - self.s3_conn = S3Session(self.region, self.access_key, self.secret_key, self.bucket_name) + self.s3_conn = S3Session(self.region, self.access_key, self.secret_key, self.bucket_name, + validate_bucket=self.validate_bucket) self.bucket = self.s3_conn.get_bucket(self.bucket_name) except Exception, e: raise OperationError(e) diff --git a/mongodb_consistent_backup/Upload/S3/__init__.py b/mongodb_consistent_backup/Upload/S3/__init__.py index 9765fa47..17d63705 100644 --- a/mongodb_consistent_backup/Upload/S3/__init__.py +++ b/mongodb_consistent_backup/Upload/S3/__init__.py @@ -10,6 +10,10 @@ def config(parser): help="S3 Uploader AWS Secret Key (required for S3 upload)") parser.add_argument("--upload.s3.bucket_name", dest="upload.s3.bucket_name", type=str, help="S3 Uploader destination bucket name") + parser.add_argument("--upload.s3.skip_bucket_validation", dest="upload.s3.skip_bucket_validation", default=False, + action="store_true", + help="S3 Upload will check upfront if the bucket exists. Skip this check if bucket " + "permissions don't allow access to the bucket's root. (default: false)") parser.add_argument("--upload.s3.bucket_prefix", dest="upload.s3.bucket_prefix", type=str, help="S3 Uploader destination bucket path prefix") parser.add_argument("--upload.s3.bucket_explicit_key", dest="upload.s3.bucket_explicit_key", type=str, From 846d4a5caa5d6977db77cfc70fdf1213906ec160 Mon Sep 17 00:00:00 2001 From: Daniel Schneller Date: Thu, 22 Nov 2018 18:48:16 +0100 Subject: [PATCH 2/3] Related: Fix flake8: Make regex a raw string --- mongodb_consistent_backup/Upload/Rsync/Rsync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mongodb_consistent_backup/Upload/Rsync/Rsync.py b/mongodb_consistent_backup/Upload/Rsync/Rsync.py index 6c7d1e0d..d0580caa 100644 --- a/mongodb_consistent_backup/Upload/Rsync/Rsync.py +++ b/mongodb_consistent_backup/Upload/Rsync/Rsync.py @@ -56,7 +56,7 @@ def init(self): def rsync_info(self): if not self._rsync_info: output = check_output([self.rsync_binary, "--version"]) - search = re.search("^rsync\s+version\s([0-9.-]+)\s+protocol\sversion\s(\d+)", output) + search = re.search(r"^rsync\s+version\s([0-9.-]+)\s+protocol\sversion\s(\d+)", output) self.rsync_version = search.group(1) self._rsync_info = {"version": self.rsync_version, "protocol_version": int(search.group(2))} return self._rsync_info From 64be1380345cf631d29cc2a555984ff3a32f34dd Mon Sep 17 00:00:00 2001 From: Daniel Schneller Date: Thu, 22 Nov 2018 19:32:22 +0100 Subject: [PATCH 3/3] Fix indentation --- mongodb_consistent_backup/Upload/S3/S3Session.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mongodb_consistent_backup/Upload/S3/S3Session.py b/mongodb_consistent_backup/Upload/S3/S3Session.py index 5594412c..f6815d02 100644 --- a/mongodb_consistent_backup/Upload/S3/S3Session.py +++ b/mongodb_consistent_backup/Upload/S3/S3Session.py @@ -79,8 +79,8 @@ def connect(self): def get_bucket(self, bucket_name): try: logging.debug("Connecting to AWS S3 Bucket: %s (%s validation)" % (bucket_name, - "with" if self.validate_bucket - else "without")) + "with" if self.validate_bucket + else "without")) return self._conn.get_bucket(bucket_name, validate=self.validate_bucket) except boto.exception.S3ResponseError, e: if self.is_forbidden_error(e):