OILS / soil / web.py View on Github | oils.pub

782 lines, 243 significant
1#!/usr/bin/env python2
2"""
3soil/web.py - Dashboard that uses the "Event Sourcing" Paradigm
4
5Given state like this:
6
7https://test.oils-for-unix.org/
8 github-jobs/
9 1234/ # $GITHUB_RUN_NUMBER
10 cpp-small.tsv # benchmarks/time.py output. Success/failure for each task.
11 cpp-small.json # metadata when job is DONE
12
13 (cpp-small.wwz is linked to, but not part of the state.)
14
15 (cpp-small.state # maybe for more transient events)
16
17This script generates:
18
19https://test.oils-for-unix.org/
20 github-jobs/
21 tmp-$$.index.html # jobs for all runs
22 1234/
23 tmp-$$.index.html # jobs and tasks for a given run
24 tmp-$$.remove.txt # TODO: consolidate 'cleanup', to make it faster
25
26 # For sourcehut
27 git-0101abab/
28 tmp-$$.index.html
29
30How to test changes to this file:
31
32 $ soil/web-init.sh deploy-code
33 $ soil/web-worker.sh remote-rewrite-jobs-index github- ${GITHUB_RUN_NUMBER}
34 $ soil/web-worker.sh remote-rewrite-jobs-index sourcehut- git-${commit_hash}
35
36"""
37from __future__ import print_function
38
39import collections
40import csv
41import datetime
42import json
43import itertools
44import os
45import re
46import sys
47from doctools import html_head
48from vendor import jsontemplate
49
50
51def log(msg, *args):
52 if args:
53 msg = msg % args
54 print(msg, file=sys.stderr)
55
56
57def PrettyTime(now, start_time):
58 """
59 Return a pretty string like 'an hour ago', 'Yesterday', '3 months ago', 'just
60 now', etc
61 """
62 # *** UNUSED because it only makes sense on a dynamic web page! ***
63 # Loosely based on
64 # https://stackoverflow.com/questions/1551382/user-friendly-time-format-in-python
65
66 return 'unused'
67
68
69def _MinutesSeconds(num_seconds):
70 num_seconds = round(num_seconds) # round to integer
71 minutes = num_seconds / 60
72 seconds = num_seconds % 60
73 return '%d:%02d' % (minutes, seconds)
74
75
76LINE_RE = re.compile(r'(\w+)[ ]+([\d.]+)')
77
78def _ParsePullTime(time_p_str):
79 """
80 Given time -p output like
81
82 real 0.01
83 user 0.02
84 sys 0.02
85
86 Return the real time as a string, or - if we don't know it.
87 """
88 for line in time_p_str.splitlines():
89 m = LINE_RE.match(line)
90 if m:
91 name, value = m.groups()
92 if name == 'real':
93 return _MinutesSeconds(float(value))
94
95 return '-' # Not found
96
97
98DETAILS_RUN_T = jsontemplate.Template('''\
99
100<table>
101<tr class="spacer">
102 <td></td>
103</tr>
104
105<tr class="commit-row">
106 <td>
107 <code>
108 {.section github-commit-link}
109 <a href="https://github.com/oilshell/oil/commit/{commit-hash}">{commit-hash-short}</a>
110 {.end}
111
112 {.section sourcehut-commit-link}
113 <a href="https://git.sr.ht/~andyc/oil/commit/{commit-hash}">{commit-hash-short}</a>
114 {.end}
115 </code>
116 </td>
117
118 <td class="commit-line">
119 {.section github-pr}
120 <i>
121 PR <a href="https://github.com/oilshell/oil/pull/{pr-number}">#{pr-number}</a>
122 from <a href="https://github.com/oilshell/oil/tree/{head-ref}">{head-ref}</a>
123 </i>
124 {.end}
125 {.section commit-desc}
126 {@|html}
127 {.end}
128
129 {.section git-branch}
130 <br/>
131 <div style="text-align: right; font-family: monospace">{@}</div>
132 {.end}
133 </td>
134
135</tr>
136<tr class="spacer">
137 <td></td>
138</tr>
139
140</table>
141''')
142
143
144DETAILS_TABLE_T = jsontemplate.Template('''\
145<table class="col1-right col3-right col4-right col5-right col6-right">
146
147 <thead>
148 <tr>
149 <td>ID</td>
150 <td>Job Name</td>
151 <td>Start Time</td>
152 <td>Pull Time</td>
153 <td>Run Time</td>
154 <td>Status</td>
155 </tr>
156 </thead>
157
158 {.repeated section jobs}
159 <tr>
160
161 <td>{job_num}</td>
162
163 <!-- internal link -->
164 <td> <code><a href="#job-{job-name}">{job-name}</a></code> </td>
165
166 <td><a href="{job_url}">{start_time_str}</a></td>
167 <td>
168 {.section pull_time_str}
169 <a href="{run_wwz_path}/_tmp/soil/image.html">{@}</a>
170 {.or}
171 -
172 {.end}
173 </td>
174
175 <td>{run_time_str}</td>
176
177 <td> <!-- status -->
178 {.section passed}
179 <span class="pass">pass</span>
180 {.end}
181
182 {.section failed}
183 <span class="fail">FAIL</span><br/>
184 <span class="fail-detail">
185 {.section one-failure}
186 task <code>{@}</code>
187 {.end}
188
189 {.section multiple-failures}
190 {num-failures} of {num-tasks} tasks
191 {.end}
192 </span>
193 {.end}
194 </td>
195
196 </tr>
197 {.end}
198
199</table>
200''')
201
202
203def ParseJobs(stdin):
204 """
205 Given the output of list-json, open JSON and corresponding TSV, and yield a
206 list of JSON template rows.
207 """
208 for i, line in enumerate(stdin):
209 json_path = line.strip()
210
211 #if i % 20 == 0:
212 # log('job %d = %s', i, json_path)
213
214 with open(json_path) as f:
215 meta = json.load(f)
216 #print(meta)
217
218 tsv_path = json_path[:-5] + '.tsv'
219 #log('%s', tsv_path)
220
221 all_tasks = []
222 failed_tasks = []
223 total_elapsed = 0.0
224
225 with open(tsv_path) as f:
226 reader = csv.reader(f, delimiter='\t')
227
228 try:
229 for row in reader:
230 t = {}
231 # Unpack, matching _tmp/soil/INDEX.tsv
232 ( status, elapsed,
233 t['name'], t['script_name'], t['func'], results_url) = row
234
235 t['results_url'] = None if results_url == '-' else results_url
236
237 status = int(status)
238 elapsed = float(elapsed)
239
240 t['elapsed_str'] = _MinutesSeconds(elapsed)
241
242 all_tasks.append(t)
243
244 t['status'] = status
245 if status == 0:
246 t['passed'] = True
247 else:
248 t['failed'] = True
249 failed_tasks.append(t)
250
251 total_elapsed += elapsed
252
253 except (IndexError, ValueError) as e:
254 raise RuntimeError('Error in %r: %s (%r)' % (tsv_path, e, row))
255
256 # So we can print task tables
257 meta['tasks'] = all_tasks
258
259 num_failures = len(failed_tasks)
260
261 if num_failures == 0:
262 meta['passed'] = True
263 else:
264 failed = {}
265 if num_failures == 1:
266 failed['one-failure'] = failed_tasks[0]['name']
267 else:
268 failed['multiple-failures'] = {
269 'num-failures': num_failures,
270 'num-tasks': len(all_tasks),
271 }
272 meta['failed'] = failed
273
274 meta['run_time_str'] = _MinutesSeconds(total_elapsed)
275
276 pull_time = meta.get('image-pull-time')
277 if pull_time is not None:
278 meta['pull_time_str'] = _ParsePullTime(pull_time)
279
280 start_time = meta.get('task-run-start-time')
281 if start_time is None:
282 start_time_str = '?'
283 else:
284 # Note: this is different clock! Could be desynchronized.
285 # Doesn't make sense this is static!
286 #now = time.time()
287 start_time = int(start_time)
288
289 t = datetime.datetime.fromtimestamp(start_time)
290 # %-I avoids leading 0, and is 12 hour date.
291 # lower() for 'pm' instead of 'PM'.
292 start_time_str = t.strftime('%-m/%d at %-I:%M%p').lower()
293
294 #start_time_str = PrettyTime(now, start_time)
295
296 meta['start_time_str'] = start_time_str
297
298 # Metadata for a "run". A run is for a single commit, and consists of many
299 # jobs.
300
301 meta['git-branch'] = meta.get('GITHUB_REF')
302
303 # Show the branch ref/heads/soil-staging or ref/pull/1577/merge (linkified)
304 pr_head_ref = meta.get('GITHUB_PR_HEAD_REF')
305 pr_number = meta.get('GITHUB_PR_NUMBER')
306
307 if pr_head_ref and pr_number:
308 meta['github-pr'] = {
309 'head-ref': pr_head_ref,
310 'pr-number': pr_number,
311 }
312
313 # Show the user's commit, not the merge commit
314 commit_hash = meta.get('GITHUB_PR_HEAD_SHA') or '?'
315
316 else:
317 # From soil/worker.sh save-metadata. This is intended to be
318 # CI-independent, while the environment variables above are from Github.
319 meta['commit-desc'] = meta.get('commit-line', '?')
320 commit_hash = meta.get('commit-hash') or '?'
321
322 commit_link = {
323 'commit-hash': commit_hash,
324 'commit-hash-short': commit_hash[:8],
325 }
326
327 meta['job-name'] = meta.get('job-name') or '?'
328
329 # Metadata for "Job"
330
331 # GITHUB_RUN_NUMBER (project-scoped) is shorter than GITHUB_RUN_ID (global
332 # scope)
333 github_run = meta.get('GITHUB_RUN_NUMBER')
334
335 if github_run:
336 meta['job_num'] = github_run
337 meta['index_run_url'] = '%s/' % github_run
338
339 meta['github-commit-link'] = commit_link
340
341 run_url_prefix = ''
342 else:
343 sourcehut_job_id = meta['JOB_ID']
344 meta['job_num'] = sourcehut_job_id
345 meta['index_run_url'] = 'git-%s/' % meta['commit-hash']
346
347 meta['sourcehut-commit-link'] = commit_link
348
349 # sourcehut doesn't have RUN ID, so we're in
350 # sourcehut-jobs/git-ab01cd/index.html, and need to find sourcehut-jobs/123/foo.wwz
351 run_url_prefix = '../%s/' % sourcehut_job_id
352
353 # For Github, we construct $JOB_URL in soil/github-actions.sh
354 meta['job_url'] = meta.get('JOB_URL') or '?'
355
356 prefix, _ = os.path.splitext(json_path) # x/y/123/myjob
357 parts = prefix.split('/')
358
359 # Paths relative to github-jobs/1234/
360 meta['run_wwz_path'] = run_url_prefix + parts[-1] + '.wwz' # myjob.wwz
361 meta['run_tsv_path'] = run_url_prefix + parts[-1] + '.tsv' # myjob.tsv
362 meta['run_json_path'] = run_url_prefix + parts[-1] + '.json' # myjob.json
363
364 # Relative to github-jobs/
365 last_two_parts = parts[-2:] # ['123', 'myjob']
366 meta['index_wwz_path'] = '/'.join(last_two_parts) + '.wwz' # 123/myjob.wwz
367
368 yield meta
369
370
371HTML_BODY_TOP_T = jsontemplate.Template('''
372 <body class="width50">
373 <p id="home-link">
374 <a href="..">Up</a>
375 | <a href="/">Home</a>
376 | <a href="//oils.pub/">oils.pub</a>
377 </p>
378
379 <h1>{title|html}</h1>
380''')
381
382HTML_BODY_BOTTOM = '''\
383 </body>
384</html>
385'''
386
387INDEX_HEADER = '''\
388<table>
389 <thead>
390 <tr>
391 <td colspan=1> Commit </td>
392 <td colspan=1> Description </td>
393 </tr>
394 </thead>
395'''
396
397INDEX_RUN_ROW_T = jsontemplate.Template('''\
398<tr class="spacer">
399 <td colspan=2></td>
400</tr>
401
402<tr class="commit-row">
403 <td>
404 <code>
405 {.section github-commit-link}
406 <a href="https://github.com/oilshell/oil/commit/{commit-hash}">{commit-hash-short}</a>
407 {.end}
408
409 {.section sourcehut-commit-link}
410 <a href="https://git.sr.ht/~andyc/oil/commit/{commit-hash}">{commit-hash-short}</a>
411 {.end}
412 </code>
413
414 </td>
415 </td>
416
417 <td class="commit-line">
418 {.section github-pr}
419 <i>
420 PR <a href="https://github.com/oilshell/oil/pull/{pr-number}">#{pr-number}</a>
421 from <a href="https://github.com/oilshell/oil/tree/{head-ref}">{head-ref}</a>
422 </i>
423 {.end}
424 {.section commit-desc}
425 {@|html}
426 {.end}
427
428 {.section git-branch}
429 <br/>
430 <div style="text-align: right; font-family: monospace">{@}</div>
431 {.end}
432 </td>
433
434</tr>
435<tr class="spacer">
436 <td colspan=2></td>
437</tr>
438''')
439
440INDEX_JOBS_T = jsontemplate.Template('''\
441<tr>
442 <td>
443 </td>
444 <td>
445 <a href="{index_run_url}">All Jobs and Tasks</a>
446 </td>
447</tr>
448
449{.section jobs-passed}
450 <tr>
451 <td class="pass">
452 Passed
453 </td>
454 <td>
455 {.repeated section @}
456 <code class="pass">{job-name}</code>
457 <!--
458 <span class="pass"> &#x2713; </span>
459 -->
460 {.alternates with}
461 &nbsp; &nbsp;
462 {.end}
463 </td>
464 </tr>
465{.end}
466
467{.section jobs-failed}
468 <tr>
469 <td class="fail">
470 Failed
471 </td>
472 <td>
473 {.repeated section @}
474 <span class="fail"> &#x2717; </span>
475 <code><a href="{index_run_url}#job-{job-name}">{job-name}</a></code>
476
477 <span class="fail-detail">
478 {.section failed}
479 {.section one-failure}
480 - task <code>{@}</code>
481 {.end}
482
483 {.section multiple-failures}
484 - {num-failures} of {num-tasks} tasks
485 {.end}
486 {.end}
487 </span>
488
489 {.alternates with}
490 <br />
491 {.end}
492 </td>
493 </tr>
494{.end}
495
496<tr class="spacer">
497 <td colspan=3> &nbsp; </td>
498</tr>
499
500''')
501
502def PrintIndexHtml(title, groups, f=sys.stdout):
503 # Bust cache (e.g. Safari iPad seems to cache aggressively and doesn't
504 # have Ctrl-F5)
505 html_head.Write(f, title,
506 css_urls=['../web/base.css?cache=0', '../web/soil.css?cache=0'])
507
508 d = {'title': title}
509 print(HTML_BODY_TOP_T.expand(d), file=f)
510
511 print(INDEX_HEADER, file=f)
512
513 for key, jobs in groups.iteritems():
514 # All jobs have run-level metadata, so just use the first
515
516 print(INDEX_RUN_ROW_T.expand(jobs[0]), file=f)
517
518 summary = {
519 'jobs-passed': [],
520 'jobs-failed': [],
521 'index_run_url': jobs[0]['index_run_url'],
522 }
523
524 for job in jobs:
525 if job.get('passed'):
526 summary['jobs-passed'].append(job)
527 else:
528 summary['jobs-failed'].append(job)
529
530 print(INDEX_JOBS_T.expand(summary), file=f)
531
532 print(' </table>', file=f)
533 print(HTML_BODY_BOTTOM, file=f)
534
535
536TASK_TABLE_T = jsontemplate.Template('''\
537
538<h2>All Tasks</h2>
539
540<!-- right justify elapsed and status -->
541<table class="col2-right col3-right col4-right">
542
543{.repeated section jobs}
544
545<tr> <!-- link here -->
546 <td colspan=4>
547 <a name="job-{job-name}"></a>
548 </td>
549</tr>
550
551<tr style="background-color: #EEE">
552 <td colspan=3>
553 <b>{job-name}</b>
554 &nbsp;
555 &nbsp;
556 &nbsp;
557 <a href="{run_wwz_path}/">wwz</a>
558 &nbsp;
559 <a href="{run_tsv_path}">TSV</a>
560 &nbsp;
561 <a href="{run_json_path}">JSON</a>
562 <td>
563 <a href="">Up</a>
564 </td>
565</tr>
566
567<tr class="spacer">
568 <td colspan=4> &nbsp; </td>
569</tr>
570
571<tr style="font-weight: bold">
572 <td>Task</td>
573 <td>Results</td>
574 <td>Elapsed</td>
575 <td>Status</td>
576</tr>
577
578 {.repeated section tasks}
579 <tr>
580 <td>
581 <a href="{run_wwz_path}/_tmp/soil/logs/{name}.txt">{name}</a> <br/>
582 <code>{script_name} {func}</code>
583 </td>
584
585 <td>
586 {.section results_url}
587 <a href="{run_wwz_path}/{@}">Results</a>
588 {.or}
589 {.end}
590 </td>
591
592 <td>{elapsed_str}</td>
593
594 {.section passed}
595 <td>{status}</td>
596 {.end}
597 {.section failed}
598 <td class="fail">status: {status}</td>
599 {.end}
600
601 </tr>
602 {.end}
603
604<tr class="spacer">
605 <td colspan=4> &nbsp; </td>
606</tr>
607
608{.end}
609
610</table>
611
612''')
613
614
615def PrintRunHtml(title, jobs, f=sys.stdout):
616 """Print index for jobs in a single run."""
617
618 # Have to descend an extra level
619 html_head.Write(f, title,
620 css_urls=['../../web/base.css?cache=0', '../../web/soil.css?cache=0'])
621
622 d = {'title': title}
623 print(HTML_BODY_TOP_T.expand(d), file=f)
624
625 print(DETAILS_RUN_T.expand(jobs[0]), file=f)
626
627 d2 = {'jobs': jobs}
628 print(DETAILS_TABLE_T.expand(d2), file=f)
629
630 print(TASK_TABLE_T.expand(d2), file=f)
631
632 print(HTML_BODY_BOTTOM, file=f)
633
634
635def GroupJobs(jobs, key_func):
636 """
637 Expands groupby result into a simple dict
638 """
639 groups = itertools.groupby(jobs, key=key_func)
640
641 d = collections.OrderedDict()
642
643 for key, job_iter in groups:
644 jobs = list(job_iter)
645
646 jobs.sort(key=ByTaskRunStartTime, reverse=True)
647
648 d[key] = jobs
649
650 return d
651
652
653def ByTaskRunStartTime(row):
654 return int(row.get('task-run-start-time', 0))
655
656def ByCommitDate(row):
657 # Written in the shell script
658 # This is in ISO 8601 format (git log %aI), so we can sort by it.
659 return row.get('commit-date', '?')
660
661def ByCommitHash(row):
662 return row.get('commit-hash', '?')
663
664def ByGithubRun(row):
665 # Written in the shell script
666 # This is in ISO 8601 format (git log %aI), so we can sort by it.
667 return int(row.get('GITHUB_RUN_NUMBER', 0))
668
669
670def main(argv):
671 action = argv[1]
672
673 if action == 'sourcehut-index':
674 index_out = argv[2]
675 run_index_out = argv[3]
676 run_id = argv[4] # looks like git-0101abab
677
678 assert run_id.startswith('git-'), run_id
679 commit_hash = run_id[4:]
680
681 jobs = list(ParseJobs(sys.stdin))
682
683 # sourcehut doesn't have a build number.
684 # - Sort by descnding commit date. (Minor problem: Committing on a VM with
685 # bad clock can cause commits "in the past")
686 # - Group by commit HASH, because 'git rebase' can crate different commits
687 # with the same date.
688 jobs.sort(key=ByCommitDate, reverse=True)
689 groups = GroupJobs(jobs, ByCommitHash)
690
691 title = 'Recent Jobs (sourcehut)'
692 with open(index_out, 'w') as f:
693 PrintIndexHtml(title, groups, f=f)
694
695 jobs = groups[commit_hash]
696 title = 'Jobs for commit %s' % commit_hash
697 with open(run_index_out, 'w') as f:
698 PrintRunHtml(title, jobs, f=f)
699
700 elif action == 'github-index':
701
702 index_out = argv[2]
703 run_index_out = argv[3]
704 run_id = int(argv[4]) # compared as an integer
705
706 jobs = list(ParseJobs(sys.stdin))
707
708 jobs.sort(key=ByGithubRun, reverse=True) # ordered
709 groups = GroupJobs(jobs, ByGithubRun)
710
711 title = 'Recent Jobs (Github Actions)'
712 with open(index_out, 'w') as f:
713 PrintIndexHtml(title, groups, f=f)
714
715 jobs = groups[run_id]
716 title = 'Jobs for run %d' % run_id
717
718 with open(run_index_out, 'w') as f:
719 PrintRunHtml(title, jobs, f=f)
720
721 elif action == 'cleanup':
722 try:
723 num_to_keep = int(argv[2])
724 except IndexError:
725 num_to_keep = 200
726
727 prefixes = []
728 for line in sys.stdin:
729 json_path = line.strip()
730
731 #log('%s', json_path)
732 prefixes.append(json_path[:-5])
733
734 log('%s cleanup: keep %d', sys.argv[0], num_to_keep)
735 log('%s cleanup: got %d JSON paths', sys.argv[0], len(prefixes))
736
737 # TODO: clean up git-$hash dirs
738 #
739 # github-jobs/
740 # $GITHUB_RUN_NUMBER/
741 # cpp-tarball.{json,wwz,tsv}
742 # dummy.{json,wwz,tsv}
743 # git-$hash/
744 # oils-for-unix.tar
745 #
746 # sourcehut-jobs/
747 # 1234/
748 # cpp-tarball.{json,wwz,tsv}
749 # 1235/
750 # dummy.{json,wwz,tsv}
751 # git-$hash/
752 # index.html # HTML for this job
753 # oils-for-unix.tar
754 #
755 # We might have to read the most recent JSON, find the corresponding $hash,
756 # and print that dir.
757 #
758 # Another option is to use a real database, rather than the file system!
759
760 # Sort by 999 here
761 # op.oilshell.org/github-jobs/999/foo.json
762
763 prefixes.sort(key = lambda path: int(path.split('/')[-2]))
764
765 prefixes = prefixes[:-num_to_keep]
766
767 # Show what to delete. Then the user can pipe to xargs rm to remove it.
768 for prefix in prefixes:
769 print(prefix + '.json')
770 print(prefix + '.tsv')
771 print(prefix + '.wwz')
772
773 else:
774 raise RuntimeError('Invalid action %r' % action)
775
776
777if __name__ == '__main__':
778 try:
779 main(sys.argv)
780 except RuntimeError as e:
781 print('FATAL: %s' % e, file=sys.stderr)
782 sys.exit(1)