Skip to content

Commit ef0b7d6

Browse files
committed
minor fixes
1 parent abe128b commit ef0b7d6

6 files changed

Lines changed: 62 additions & 32 deletions

File tree

deploy/infrabox/templates/api/deployment.yaml

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -51,20 +51,6 @@ spec:
5151
-
5252
name: INFRABOX_GERRIT_ENABLED
5353
value: {{ .Values.gerrit.enabled | quote }}
54-
livenessProbe:
55-
httpGet:
56-
path: /ping
57-
port: 8080
58-
initialDelaySeconds: 5
59-
periodSeconds: 5
60-
timeoutSeconds: 5
61-
readinessProbe:
62-
httpGet:
63-
path: /ping
64-
port: 8080
65-
initialDelaySeconds: 5
66-
periodSeconds: 5
67-
timeoutSeconds: 5
6854
volumes:
6955
{{ include "volumes_database" . | indent 16 }}
7056
{{ include "volumes_rsa" . | indent 16 }}

src/api/handlers/job_api.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -596,8 +596,6 @@ def get(self, parent_job_id):
596596

597597
key = "%s/%s" % (parent_job_id, filename)
598598

599-
app.logger.error(key)
600-
601599
g.release_db()
602600
f = storage.download_output(key)
603601

src/controller/pkg/stub/pipeline.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ func (c *Controller) deletePipelineInvocation(cr *v1alpha1.IBPipelineInvocation,
4747
cr.SetFinalizers([]string{})
4848
err = updateStatus(cr, log)
4949
if err != nil && !errors.IsNotFound(err) {
50-
logrus.Errorf("Failed to remove finalizers: %v", err)
5150
return err
5251
}
5352

src/job/infrabox_job/job.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -93,18 +93,25 @@ def create_jobs(self, jobs):
9393
"jobs": jobs,
9494
}
9595

96-
r = requests.post("%s/create_jobs" % self.api_server,
97-
headers=self.get_headers(),
98-
json=payload, timeout=60, verify=self.verify)
96+
while True:
97+
r = requests.post("%s/create_jobs" % self.api_server,
98+
headers=self.get_headers(),
99+
json=payload, timeout=60, verify=self.verify)
99100

100-
if r.status_code != 200:
101-
msg = r.text
102-
try:
103-
msg = r.json()['message']
104-
except:
105-
pass
101+
if r.status_code == 200:
102+
return
106103

107-
raise Failure(msg)
104+
if r.status_code == 400:
105+
msg = r.text
106+
try:
107+
msg = r.json()['message']
108+
except:
109+
pass
110+
111+
raise Failure(msg)
112+
113+
self.console.collect('Failed to connect to API, retrying.', show=True)
114+
time.sleep(3)
108115

109116
def post_api_server(self, endpoint, data=None):
110117
while True:
@@ -119,6 +126,7 @@ def post_api_server(self, endpoint, data=None):
119126
except Exception as e:
120127
print e
121128

129+
self.console.collect('Failed to connect to API, retrying.', show=True)
122130
time.sleep(1)
123131

124132
def post_stats(self, stat):
@@ -165,7 +173,7 @@ def _get_file_from_api_server(self, url, path):
165173
message = None
166174

167175
r = None
168-
for _ in xrange(0, 5):
176+
for _ in xrange(0, 20):
169177
try:
170178
message = None
171179
r = requests.get("%s%s" % (self.api_server, url),
@@ -176,6 +184,8 @@ def _get_file_from_api_server(self, url, path):
176184
return
177185

178186
if r.status_code != 200:
187+
self.console.collect('Failed to download file (%s), retrying' % r.status_code, show=True)
188+
time.sleep(10)
179189
continue
180190

181191
with open(path, 'wb') as f:
@@ -185,6 +195,7 @@ def _get_file_from_api_server(self, url, path):
185195

186196
except Exception as e:
187197
message = str(e)
198+
self.console.collect('Failed to download file (%s), retrying' % message, show=True)
188199
time.sleep(10)
189200
continue
190201

@@ -199,7 +210,7 @@ def _get_file_from_api_server(self, url, path):
199210
except:
200211
pass
201212

202-
raise Failure('Failed to download file: %s' % msg)
213+
raise Failure('Failed to download file(%s): %s' % (r.status_code, msg))
203214

204215
def post_file_to_api_server(self, url, path, filename=None, split=False):
205216
if not filename:

src/job/job.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,10 @@ def deploy_images(self, image_name):
736736
c.header("Deploying", show=True)
737737

738738
for dep in self.deployments:
739-
self.deploy_image(image_name, dep)
739+
target = dep.get('target', None)
740+
741+
if not target:
742+
self.deploy_image(image_name, dep)
740743

741744
def login_docker_registry(self):
742745
c = self.console
@@ -839,6 +842,13 @@ def run_docker_container(self, image_name):
839842
logger.exception(ex)
840843
raise Failure("Could not get exit code of container")
841844

845+
try:
846+
c.execute(("docker", "commit", container_name, image_name))
847+
c.header("Finalize", show=True)
848+
except Exception as ex:
849+
logger.exception(ex)
850+
raise Failure("Could not commit and push container")
851+
842852
logger.exception(e)
843853
raise Failure("Container run exited with error (exit code=%s)" % exit_code)
844854

@@ -954,13 +964,19 @@ def run_job_docker(self, c):
954964

955965
if self.deployments:
956966
for d in self.deployments:
957-
self.build_docker_image(image_name_build, image_name_latest, d.get('target', None))
967+
target = d.get('target', None)
968+
969+
if not target and not self.job.get('build_only', True):
970+
continue
971+
972+
self.build_docker_image(image_name_build, image_name_latest, target=target)
958973
c.header("Deploying", show=True)
959974
self.deploy_image(image_name_build, d)
960975

961976
if not self.job.get('build_only', True):
962977
self.build_docker_image(image_name_build, image_name_latest)
963978
self.run_docker_container(image_name_build)
979+
self.deploy_images(image_name_build)
964980

965981
c.header("Finalize", show=True)
966982

src/scheduler/kubernetes/scheduler.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,8 +366,16 @@ def handle_orphaned_jobs(self):
366366
if not message:
367367
message = stepStatus['State']['terminated'].get('reason', 'Unknown Error')
368368

369+
if not message and current_state != 'finished':
370+
import json
371+
self.logger.error(json.dumps(status, indent=4))
372+
369373
delete_job = True
370374

375+
if message == 'Error':
376+
import json
377+
self.logger.error(json.dumps(status, indent=4))
378+
371379
if s == "error":
372380
current_state = 'error'
373381
delete_job = True
@@ -376,7 +384,11 @@ def handle_orphaned_jobs(self):
376384

377385
if 'stepStatuses' in status and status['stepStatuses']:
378386
stepStatus = status['stepStatuses'][-1]
379-
node_name = stepStatus.get('nodeName', None)
387+
nn = stepStatus.get('nodeName', None)
388+
389+
if nn:
390+
# don't overwrite existing node name with none
391+
node_name = nn
380392

381393
start_date = status.get('startTime', None)
382394
end_date = status.get('completionTime', None)
@@ -448,6 +460,7 @@ def assign_cluster(self):
448460
""", [cluster_name, j[0]])
449461
cursor.close()
450462

463+
451464
def update_cluster_state(self):
452465
cluster_name = os.environ['INFRABOX_CLUSTER_NAME']
453466
labels = []
@@ -470,6 +483,13 @@ def update_cluster_state(self):
470483
items = data.get('items', [])
471484

472485
for i in items:
486+
metadata = i.get('metadata', {})
487+
l = metadata.get('labels', {})
488+
master = l.get('node-role.kubernetes.io/master', "false")
489+
490+
if master == "true":
491+
continue
492+
473493
nodes += 1
474494
cpu += int(i['status']['capacity']['cpu'])
475495
mem = i['status']['capacity']['memory']

0 commit comments

Comments
 (0)