Skip to content

Commit a472af4

Browse files
iberryfulib-steffen
authored andcommitted
Optimize storage performance (#225)
* rework doanload all archives * fix typo * add exists method for storage * avoid unnecessary upload
1 parent 25a2420 commit a472af4

5 files changed

Lines changed: 54 additions & 58 deletions

File tree

src/api/handlers/job_api.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -558,7 +558,8 @@ def post(self):
558558

559559
g.release_db()
560560

561-
storage.upload_output(stream, key)
561+
if not storage.exists(key):
562+
storage.upload_output(stream, key)
562563

563564
for c in clusters:
564565
stream.seek(0)

src/api/handlers/project.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -298,9 +298,9 @@ def post(self, project_id, build_id):
298298
abort(400, 'Project is not of type "upload"')
299299

300300
key = '%s.zip' % build_id
301-
302-
stream = request.files['project.zip'].stream
303-
storage.upload_project(stream, key)
301+
if not storage.exists(key):
302+
stream = request.files['project.zip'].stream
303+
storage.upload_project(stream, key)
304304

305305
return OK('successfully uploaded data')
306306

@@ -403,4 +403,4 @@ def post(self, project_id):
403403

404404
g.db.commit()
405405

406-
return OK('successfully started build', data=data)
406+
return OK('successfully started build', data=data)

src/api/handlers/projects/jobs.py

Lines changed: 4 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import requests
1111

12-
from flask import g, abort, Response, send_file, request, after_this_request
12+
from flask import g, abort, Response, send_file, request, redirect
1313
from flask_restplus import Resource, fields
1414

1515
from pyinfraboxutils import get_logger, get_env
@@ -394,9 +394,8 @@ def get(self, project_id, job_id):
394394
job_cluster = result['cluster_name']
395395
key = '%s/%s' % (job_id, filename)
396396

397-
if os.environ['INFRABOX_CLUSTER_NAME'] == job_cluster:
398-
f = storage.download_archive(key)
399-
else:
397+
f = storage.download_archive(key)
398+
if not f and os.environ['INFRABOX_CLUSTER_NAME'] != job_cluster:
400399
c = g.db.execute_one_dict('''
401400
SELECT *
402401
FROM cluster
@@ -450,53 +449,7 @@ def get(self, project_id, job_id):
450449
'''
451450
Returns all archives
452451
'''
453-
result = g.db.execute_one_dict('''
454-
SELECT archive
455-
FROM job
456-
WHERE id = %s
457-
AND project_id = %s
458-
''', [job_id, project_id])
459-
460-
if not result or not result['archive']:
461-
abort(404)
462-
463-
base_path = os.path.join('/tmp', str(uuid.uuid4()))
464-
archive_dir = os.path.join(base_path, 'archive')
465-
os.mkdir(base_path)
466-
os.mkdir(archive_dir)
467-
468-
@after_this_request
469-
def _remove_file(response):
470-
if os.path.exists(base_path):
471-
shutil.rmtree(base_path)
472-
return response
473-
474-
for item in result['archive']:
475-
filename = item['filename']
476-
url = "%s/api/v1/projects/%s/jobs/%s/archive/download?filename=%s" % \
477-
(get_env('INFRABOX_ROOT_URL'), project_id, job_id, filename)
478-
try:
479-
token = encode_user_token(g.token['user']['id'])
480-
except Exception:
481-
#public project has no token here.
482-
token = ""
483-
headers = {'Authorization': 'bearer ' + token}
484-
485-
r = requests.get(url, headers=headers, timeout=120, verify=False)
486-
if r.status_code != 200:
487-
continue
488-
489-
with open(os.path.join(archive_dir, os.path.basename(filename)), 'w') as f:
490-
f.write(r.content)
491-
492-
if not os.listdir(archive_dir):
493-
abort(404)
494-
495-
tar_file = os.path.join(base_path, 'archive_%s' % job_id +'.tar.gz')
496-
with tarfile.open(tar_file, mode='w:gz') as archive:
497-
archive.add(archive_dir, arcname='archive')
498-
499-
return send_file(tar_file, as_attachment=True, attachment_filename=os.path.basename(tar_file))
452+
return redirect("/api/v1/projects/%s/jobs/%s/archive/download?filename=all_archives.tar.gz" %(project_id, job_id))
500453

501454
@ns.route('/<job_id>/console')
502455
@api.response(403, 'Not Authorized')

src/job/job.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import traceback
1313
import urllib3
1414
import yaml
15+
import tarfile
1516

1617
from pyinfrabox.infrabox import validate_json
1718
from pyinfrabox.docker_compose import create_from
@@ -318,13 +319,15 @@ def convert_coverage_result(self, f):
318319

319320
def upload_archive(self):
320321
c = self.console
322+
archive_exists = False
323+
testresult_exists = False
321324

322325
if os.path.exists(self.infrabox_archive_dir):
323326
files = self.get_files_in_dir(self.infrabox_archive_dir)
324327

325328
if files:
326329
c.collect("Uploading /infrabox/upload/archive", show=True)
327-
330+
archive_exists = True
328331
for f in files:
329332
c.collect("%s" % f, show=True)
330333
self.post_file_to_api_server("/archive", f, filename=f.replace(self.infrabox_upload_dir, ''))
@@ -333,10 +336,19 @@ def upload_archive(self):
333336
files = self.get_files_in_dir(self.infrabox_testresult_dir)
334337

335338
if files:
336-
339+
testresult_exists = True
337340
for f in files:
338341
c.collect("%s" % f, show=True)
339342

343+
tar_file = os.path.join(self.infrabox_upload_dir, 'all_archives' + '.tar.gz')
344+
with tarfile.open(tar_file, mode='w:gz') as archive:
345+
if archive_exists:
346+
archive.add(self.infrabox_archive_dir, arcname='archive')
347+
if testresult_exists:
348+
archive.add(self.infrabox_testresult_dir, arcname='testresult')
349+
350+
self.post_file_to_api_server("/archive", tar_file)
351+
340352

341353
def upload_coverage_results(self):
342354
if not os.path.exists(self.infrabox_coverage_dir):

src/pyinfraboxutils/storage.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import uuid
44

55
import boto3
6+
from botocore.errorfactory import ClientError
67
from google.cloud import storage as gcs
78
from flask import after_this_request
89
from flask import _app_ctx_stack as stack
@@ -69,6 +70,9 @@ def download_cache(self, key):
6970
def delete_cache(self, key):
7071
return self._delete('cache/%s' % key)
7172

73+
def exists(self, key):
74+
return
75+
7276
class S3(Storage):
7377
def __init__(self):
7478
super(Storage, self).__init__()
@@ -85,6 +89,14 @@ def __init__(self):
8589
self.bucket = get_env('INFRABOX_STORAGE_S3_BUCKET')
8690
self.create_buckets()
8791

92+
def exists(self, key):
93+
client = self._get_client()
94+
try:
95+
client.head_object(Bucket=self.bucket, Key=key)
96+
except ClientError:
97+
return False
98+
return True
99+
88100
def _upload(self, stream, key):
89101
client = self._get_client()
90102
client.put_object(Body=stream,
@@ -146,6 +158,12 @@ def _delete(self, key):
146158
except:
147159
pass
148160

161+
def exists(self, key):
162+
client = gcs.Client()
163+
bucket = client.get_bucket(self.bucket)
164+
blob = bucket.blob(key)
165+
return blob.exists()
166+
149167
def _upload(self, stream, key):
150168
client = gcs.Client()
151169
bucket = client.get_bucket(self.bucket)
@@ -173,6 +191,10 @@ def __init__(self):
173191
super(Storage, self).__init__()
174192
self.container = 'infrabox'
175193

194+
def exists(self, key):
195+
client = self._get_client()
196+
return client.exists(container_name=self.container, blob_name=key)
197+
176198
def _upload(self, stream, key):
177199
client = self._get_client()
178200
if not client.exists(container_name=self.container):
@@ -217,6 +239,14 @@ def __init__(self):
217239
self.project_name = get_env('INFRABOX_STORAGE_SWIFT_PROJECT_NAME')
218240
self.project_domain_name = get_env('INFRABOX_STORAGE_SWIFT_PROJECT_DOMAIN_NAME')
219241

242+
def exists(self, key):
243+
client = self._get_client()
244+
try:
245+
client.head_object(self.container, key)
246+
except ClientException:
247+
return False
248+
return True
249+
220250
def _upload(self, stream, key):
221251
client = self._get_client()
222252
try:

0 commit comments

Comments
 (0)