mirror of
https://github.com/reddit-archive/reddit.git
synced 2026-04-27 03:00:12 -04:00
upload_static_files_to_s3: Don't list the whole bucket to find changes.
The static files S3 bucket has been getting a lot larger recently, between subreddit stylesheets being in there and the static file cleaner being disabled due to a bug. This is causing the deploy process to take upwards of 3 minutes just to determine that no files need to be uploaded to the bucket. As a short-term workaround, this changes the uploader to check each key individually with an S3 HEAD request rather than listing the whole bucket. This is slower than best case of listing the bucket, but is significantly faster than the current condition (~25 second runtime now).
This commit is contained in:
@@ -35,9 +35,6 @@ mimetypes.encodings_map['.gzip'] = 'gzip'
|
||||
def upload(config_file):
|
||||
bucket, config = read_static_file_config(config_file)
|
||||
|
||||
# build a list of files already in the bucket
|
||||
remote_files = {key.name : key.etag.strip('"') for key in bucket.list()}
|
||||
|
||||
# upload local files not already in the bucket
|
||||
for root, dirs, files in os.walk(config["static_root"]):
|
||||
for file in files:
|
||||
@@ -55,12 +52,12 @@ def upload(config_file):
|
||||
if encoding:
|
||||
headers['Content-Encoding'] = encoding
|
||||
|
||||
existing_key = bucket.get_key(key_name)
|
||||
key = bucket.new_key(key_name)
|
||||
with open(absolute_path, 'rb') as f:
|
||||
etag, base64_tag = key.compute_md5(f)
|
||||
|
||||
# don't upload the file if it already exists unmodified in the bucket
|
||||
if remote_files.get(key_name, None) == etag:
|
||||
if existing_key and existing_key.etag.strip('"') == etag:
|
||||
continue
|
||||
|
||||
print "uploading", key_name, "to S3..."
|
||||
|
||||
Reference in New Issue
Block a user