OILS / soil / web.py View on Github | oils.pub

798 lines, 256 significant
1#!/usr/bin/env python2
2"""
3soil/web.py - Dashboard that uses the "Event Sourcing" Paradigm
4
5Given state like this:
6
7https://test.oils-for-unix.org/
8 github-jobs/
9 1234/ # $GITHUB_RUN_NUMBER
10 cpp-small.tsv # benchmarks/time.py output. Success/failure for each task.
11 cpp-small.json # metadata when job is DONE
12
13 (cpp-small.wwz is linked to, but not part of the state.)
14
15 (cpp-small.state # maybe for more transient events)
16
17This script generates:
18
19https://test.oils-for-unix.org/
20 github-jobs/
21 tmp-$$.index.html # jobs for all runs
22 1234/
23 tmp-$$.index.html # jobs and tasks for a given run
24 tmp-$$.remove.txt # TODO: consolidate 'cleanup', to make it faster
25
26 # For sourcehut
27 git-0101abab/
28 tmp-$$.index.html
29
30How to test changes to this file:
31
32 $ soil/web-init.sh deploy-code
33 $ soil/web-worker.sh remote-rewrite-jobs-index github- ${GITHUB_RUN_NUMBER}
34 $ soil/web-worker.sh remote-rewrite-jobs-index sourcehut- git-${commit_hash}
35
36"""
37from __future__ import print_function
38
39import collections
40import csv
41import datetime
42import json
43import itertools
44import os
45import re
46import sys
47from doctools import html_head
48from vendor import jsontemplate
49
50
51def log(msg, *args):
52 if args:
53 msg = msg % args
54 print(msg, file=sys.stderr)
55
56
57def PrettyTime(now, start_time):
58 """
59 Return a pretty string like 'an hour ago', 'Yesterday', '3 months ago', 'just
60 now', etc
61 """
62 # *** UNUSED because it only makes sense on a dynamic web page! ***
63 # Loosely based on
64 # https://stackoverflow.com/questions/1551382/user-friendly-time-format-in-python
65
66 return 'unused'
67
68
69def _MinutesSeconds(num_seconds):
70 num_seconds = round(num_seconds) # round to integer
71 minutes = num_seconds / 60
72 seconds = num_seconds % 60
73 return '%d:%02d' % (minutes, seconds)
74
75
76LINE_RE = re.compile(r'(\w+)[ ]+([\d.]+)')
77
78def _ParsePullTime(time_p_str):
79 """
80 Given time -p output like
81
82 real 0.01
83 user 0.02
84 sys 0.02
85
86 Return the real time as a string, or - if we don't know it.
87 """
88 for line in time_p_str.splitlines():
89 m = LINE_RE.match(line)
90 if m:
91 name, value = m.groups()
92 if name == 'real':
93 return _MinutesSeconds(float(value))
94
95 return '-' # Not found
96
97
98DETAILS_RUN_T = jsontemplate.Template('''\
99
100<table>
101<tr class="spacer">
102 <td></td>
103</tr>
104
105<tr class="commit-row">
106 <td>
107 <code>
108 {.section github-commit-link}
109 <a href="https://github.com/oilshell/oil/commit/{commit-hash}">{commit-hash-short}</a>
110 {.end}
111
112 {.section sourcehut-commit-link}
113 <a href="https://git.sr.ht/~andyc/oil/commit/{commit-hash}">{commit-hash-short}</a>
114 {.end}
115 </code>
116 </td>
117
118 <td class="commit-line">
119 {.section github-pr}
120 <i>
121 PR <a href="https://github.com/oilshell/oil/pull/{pr-number}">#{pr-number}</a>
122 from <a href="https://github.com/oilshell/oil/tree/{head-ref}">{head-ref}</a>
123 </i>
124 {.end}
125 {.section commit-desc}
126 {@|html}
127 {.end}
128
129 {.section git-branch}
130 <br/>
131 <div style="text-align: right; font-family: monospace">{@}</div>
132 {.end}
133 </td>
134
135</tr>
136<tr class="spacer">
137 <td></td>
138</tr>
139
140</table>
141''')
142
143
144DETAILS_TABLE_T = jsontemplate.Template('''\
145<table class="col1-right col3-right col4-right col5-right col6-right">
146
147 <thead>
148 <tr>
149 <td>ID</td>
150 <td>Job Name</td>
151 <td>Start Time</td>
152 <td>Pull Time</td>
153 <td>Run Time</td>
154 <td>Status</td>
155 </tr>
156 </thead>
157
158 {.repeated section jobs}
159 <tr>
160
161 <td>{job_num}</td>
162
163 <!-- internal link -->
164 <td> <code><a href="#job-{job-name}">{job-name}</a></code> </td>
165
166 <td><a href="{job_url}">{start_time_str}</a></td>
167 <td>
168 {.section pull_time_str}
169 <a href="{run_wwz_href}/_tmp/soil/image.html">{@}</a>
170 {.or}
171 -
172 {.end}
173 </td>
174
175 <td>{run_time_str}</td>
176
177 <td> <!-- status -->
178 {.section passed}
179 <span class="pass">pass</span>
180 {.end}
181
182 {.section failed}
183 <span class="fail">FAIL</span><br/>
184 <span class="fail-detail">
185 {.section one-failure}
186 task <code>{@}</code>
187 {.end}
188
189 {.section multiple-failures}
190 {num-failures} of {num-tasks} tasks
191 {.end}
192 </span>
193 {.end}
194 </td>
195
196 </tr>
197 {.end}
198
199</table>
200''')
201
202
203def ParseJobs(stdin):
204 """
205 Given the output of list-json, open JSON and corresponding TSV, and yield a
206 list of JSON template rows.
207 """
208 for i, line in enumerate(stdin):
209 json_path = line.strip()
210
211 #if i % 20 == 0:
212 # log('job %d = %s', i, json_path)
213
214 try:
215 with open(json_path) as f:
216 meta = json.load(f)
217 except IOError as e:
218 # We do concurrent deletions, and we can't rely on sorting beforehand
219 log('Ignoring file that was probably deleted %s: %s', json_path, e)
220 continue
221 #print(meta)
222
223 tsv_path = json_path[:-5] + '.tsv'
224 wwz_path = json_path[:-5] + '.wwz'
225
226 # For 'cleanup' tasks - delete these three
227 meta['json_path'] = json_path
228 meta['tsv_path'] = tsv_path
229 meta['wwz_path'] = wwz_path
230
231 #log('%s', tsv_path)
232
233 all_tasks = []
234 failed_tasks = []
235 total_elapsed = 0.0
236
237 with open(tsv_path) as f:
238 reader = csv.reader(f, delimiter='\t')
239
240 try:
241 for row in reader:
242 t = {}
243 # Unpack, matching _tmp/soil/INDEX.tsv
244 ( status, elapsed,
245 t['name'], t['script_name'], t['func'], results_url) = row
246
247 t['results_url'] = None if results_url == '-' else results_url
248
249 status = int(status)
250 elapsed = float(elapsed)
251
252 t['elapsed_str'] = _MinutesSeconds(elapsed)
253
254 all_tasks.append(t)
255
256 t['status'] = status
257 if status == 0:
258 t['passed'] = True
259 else:
260 t['failed'] = True
261 failed_tasks.append(t)
262
263 total_elapsed += elapsed
264
265 except (IndexError, ValueError) as e:
266 raise RuntimeError('Error in %r: %s (%r)' % (tsv_path, e, row))
267
268 # So we can print task tables
269 meta['tasks'] = all_tasks
270
271 num_failures = len(failed_tasks)
272
273 if num_failures == 0:
274 meta['passed'] = True
275 else:
276 failed = {}
277 if num_failures == 1:
278 failed['one-failure'] = failed_tasks[0]['name']
279 else:
280 failed['multiple-failures'] = {
281 'num-failures': num_failures,
282 'num-tasks': len(all_tasks),
283 }
284 meta['failed'] = failed
285
286 meta['run_time_str'] = _MinutesSeconds(total_elapsed)
287
288 pull_time = meta.get('image-pull-time')
289 if pull_time is not None:
290 meta['pull_time_str'] = _ParsePullTime(pull_time)
291
292 start_time = meta.get('task-run-start-time')
293 if start_time is None:
294 start_time_str = '?'
295 else:
296 # Note: this is different clock! Could be desynchronized.
297 # Doesn't make sense this is static!
298 #now = time.time()
299 start_time = int(start_time)
300
301 t = datetime.datetime.fromtimestamp(start_time)
302 # %-I avoids leading 0, and is 12 hour date.
303 # lower() for 'pm' instead of 'PM'.
304 start_time_str = t.strftime('%-m/%d at %-I:%M%p').lower()
305
306 #start_time_str = PrettyTime(now, start_time)
307
308 meta['start_time_str'] = start_time_str
309
310 # Metadata for a "run". A run is for a single commit, and consists of many
311 # jobs.
312
313 meta['git-branch'] = meta.get('GITHUB_REF')
314
315 # Show the branch ref/heads/soil-staging or ref/pull/1577/merge (linkified)
316 pr_head_ref = meta.get('GITHUB_PR_HEAD_REF')
317 pr_number = meta.get('GITHUB_PR_NUMBER')
318
319 if pr_head_ref and pr_number:
320 meta['github-pr'] = {
321 'head-ref': pr_head_ref,
322 'pr-number': pr_number,
323 }
324
325 # Show the user's commit, not the merge commit
326 commit_hash = meta.get('GITHUB_PR_HEAD_SHA') or '?'
327
328 else:
329 # From soil/worker.sh save-metadata. This is intended to be
330 # CI-independent, while the environment variables above are from Github.
331 meta['commit-desc'] = meta.get('commit-line', '?')
332 commit_hash = meta.get('commit-hash') or '?'
333
334 commit_link = {
335 'commit-hash': commit_hash,
336 'commit-hash-short': commit_hash[:8],
337 }
338
339 meta['job-name'] = meta.get('job-name') or '?'
340
341 # Metadata for "Job"
342
343 # GITHUB_RUN_NUMBER (project-scoped) is shorter than GITHUB_RUN_ID (global
344 # scope)
345 github_run = meta.get('GITHUB_RUN_NUMBER')
346
347 if github_run:
348 meta['job_num'] = github_run
349 meta['index_run_url'] = '%s/' % github_run
350
351 meta['github-commit-link'] = commit_link
352
353 run_url_prefix = ''
354 else:
355 sourcehut_job_id = meta['JOB_ID']
356 meta['job_num'] = sourcehut_job_id
357 meta['index_run_url'] = 'git-%s/' % meta['commit-hash']
358
359 meta['sourcehut-commit-link'] = commit_link
360
361 # sourcehut doesn't have RUN ID, so we're in
362 # sourcehut-jobs/git-ab01cd/index.html, and need to find sourcehut-jobs/123/foo.wwz
363 run_url_prefix = '../%s/' % sourcehut_job_id
364
365 # For Github, we construct $JOB_URL in soil/github-actions.sh
366 meta['job_url'] = meta.get('JOB_URL') or '?'
367
368 prefix, _ = os.path.splitext(json_path) # x/y/123/myjob
369 parts = prefix.split('/')
370
371 # Paths relative to github-jobs/1234/
372 meta['run_wwz_href'] = run_url_prefix + parts[-1] + '.wwz' # myjob.wwz
373 meta['run_tsv_href'] = run_url_prefix + parts[-1] + '.tsv' # myjob.tsv
374 meta['run_json_href'] = run_url_prefix + parts[-1] + '.json' # myjob.json
375
376 # Relative to github-jobs/
377 last_two_parts = parts[-2:] # ['123', 'myjob']
378 meta['index_wwz_path'] = '/'.join(last_two_parts) + '.wwz' # 123/myjob.wwz
379
380 yield meta
381
382
383HTML_BODY_TOP_T = jsontemplate.Template('''
384 <body class="width50">
385 <p id="home-link">
386 <a href="..">Up</a>
387 | <a href="/">Home</a>
388 | <a href="//oils.pub/">oils.pub</a>
389 </p>
390
391 <h1>{title|html}</h1>
392''')
393
394HTML_BODY_BOTTOM = '''\
395 </body>
396</html>
397'''
398
399INDEX_HEADER = '''\
400<table>
401 <thead>
402 <tr>
403 <td colspan=1> Commit </td>
404 <td colspan=1> Description </td>
405 </tr>
406 </thead>
407'''
408
409INDEX_RUN_ROW_T = jsontemplate.Template('''\
410<tr class="spacer">
411 <td colspan=2></td>
412</tr>
413
414<tr class="commit-row">
415 <td>
416 <code>
417 {.section github-commit-link}
418 <a href="https://github.com/oilshell/oil/commit/{commit-hash}">{commit-hash-short}</a>
419 {.end}
420
421 {.section sourcehut-commit-link}
422 <a href="https://git.sr.ht/~andyc/oil/commit/{commit-hash}">{commit-hash-short}</a>
423 {.end}
424 </code>
425
426 </td>
427 </td>
428
429 <td class="commit-line">
430 {.section github-pr}
431 <i>
432 PR <a href="https://github.com/oilshell/oil/pull/{pr-number}">#{pr-number}</a>
433 from <a href="https://github.com/oilshell/oil/tree/{head-ref}">{head-ref}</a>
434 </i>
435 {.end}
436 {.section commit-desc}
437 {@|html}
438 {.end}
439
440 {.section git-branch}
441 <br/>
442 <div style="text-align: right; font-family: monospace">{@}</div>
443 {.end}
444 </td>
445
446</tr>
447<tr class="spacer">
448 <td colspan=2></td>
449</tr>
450''')
451
452INDEX_JOBS_T = jsontemplate.Template('''\
453<tr>
454 <td>
455 </td>
456 <td>
457 <a href="{index_run_url}">All Jobs and Tasks</a>
458 </td>
459</tr>
460
461{.section jobs-passed}
462 <tr>
463 <td class="pass">
464 Passed
465 </td>
466 <td>
467 {.repeated section @}
468 <code class="pass">{job-name}</code>
469 <!--
470 <span class="pass"> &#x2713; </span>
471 -->
472 {.alternates with}
473 &nbsp; &nbsp;
474 {.end}
475 </td>
476 </tr>
477{.end}
478
479{.section jobs-failed}
480 <tr>
481 <td class="fail">
482 Failed
483 </td>
484 <td>
485 {.repeated section @}
486 <span class="fail"> &#x2717; </span>
487 <code><a href="{index_run_url}#job-{job-name}">{job-name}</a></code>
488
489 <span class="fail-detail">
490 {.section failed}
491 {.section one-failure}
492 - task <code>{@}</code>
493 {.end}
494
495 {.section multiple-failures}
496 - {num-failures} of {num-tasks} tasks
497 {.end}
498 {.end}
499 </span>
500
501 {.alternates with}
502 <br />
503 {.end}
504 </td>
505 </tr>
506{.end}
507
508<tr class="spacer">
509 <td colspan=3> &nbsp; </td>
510</tr>
511
512''')
513
514def PrintIndexHtml(title, groups, f=sys.stdout):
515 # Bust cache (e.g. Safari iPad seems to cache aggressively and doesn't
516 # have Ctrl-F5)
517 html_head.Write(f, title,
518 css_urls=['../web/base.css?cache=0', '../web/soil.css?cache=0'])
519
520 d = {'title': title}
521 print(HTML_BODY_TOP_T.expand(d), file=f)
522
523 print(INDEX_HEADER, file=f)
524
525 for key, jobs in groups.iteritems():
526 # All jobs have run-level metadata, so just use the first
527
528 print(INDEX_RUN_ROW_T.expand(jobs[0]), file=f)
529
530 summary = {
531 'jobs-passed': [],
532 'jobs-failed': [],
533 'index_run_url': jobs[0]['index_run_url'],
534 }
535
536 for job in jobs:
537 if job.get('passed'):
538 summary['jobs-passed'].append(job)
539 else:
540 summary['jobs-failed'].append(job)
541
542 print(INDEX_JOBS_T.expand(summary), file=f)
543
544 print(' </table>', file=f)
545 print(HTML_BODY_BOTTOM, file=f)
546
547
548TASK_TABLE_T = jsontemplate.Template('''\
549
550<h2>All Tasks</h2>
551
552<!-- right justify elapsed and status -->
553<table class="col2-right col3-right col4-right">
554
555{.repeated section jobs}
556
557<tr> <!-- link here -->
558 <td colspan=4>
559 <a name="job-{job-name}"></a>
560 </td>
561</tr>
562
563<tr style="background-color: #EEE">
564 <td colspan=3>
565 <b>{job-name}</b>
566 &nbsp;
567 &nbsp;
568 &nbsp;
569 <a href="{run_wwz_href}/">wwz</a>
570 &nbsp;
571 <a href="{run_tsv_href}">TSV</a>
572 &nbsp;
573 <a href="{run_json_href}">JSON</a>
574 <td>
575 <a href="">Up</a>
576 </td>
577</tr>
578
579<tr class="spacer">
580 <td colspan=4> &nbsp; </td>
581</tr>
582
583<tr style="font-weight: bold">
584 <td>Task</td>
585 <td>Results</td>
586 <td>Elapsed</td>
587 <td>Status</td>
588</tr>
589
590 {.repeated section tasks}
591 <tr>
592 <td>
593 <a href="{run_wwz_href}/_tmp/soil/logs/{name}.txt">{name}</a> <br/>
594 <code>{script_name} {func}</code>
595 </td>
596
597 <td>
598 {.section results_url}
599 <a href="{run_wwz_href}/{@}">Results</a>
600 {.or}
601 {.end}
602 </td>
603
604 <td>{elapsed_str}</td>
605
606 {.section passed}
607 <td>{status}</td>
608 {.end}
609 {.section failed}
610 <td class="fail">status: {status}</td>
611 {.end}
612
613 </tr>
614 {.end}
615
616<tr class="spacer">
617 <td colspan=4> &nbsp; </td>
618</tr>
619
620{.end}
621
622</table>
623
624''')
625
626
627def PrintRunHtml(title, jobs, f=sys.stdout):
628 """Print index for jobs in a single run."""
629
630 # Have to descend an extra level
631 html_head.Write(f, title,
632 css_urls=['../../web/base.css?cache=0', '../../web/soil.css?cache=0'])
633
634 d = {'title': title}
635 print(HTML_BODY_TOP_T.expand(d), file=f)
636
637 print(DETAILS_RUN_T.expand(jobs[0]), file=f)
638
639 d2 = {'jobs': jobs}
640 print(DETAILS_TABLE_T.expand(d2), file=f)
641
642 print(TASK_TABLE_T.expand(d2), file=f)
643
644 print(HTML_BODY_BOTTOM, file=f)
645
646
647def GroupJobs(jobs, key_func):
648 """
649 Expands groupby result into a simple dict
650 """
651 groups = itertools.groupby(jobs, key=key_func)
652
653 d = collections.OrderedDict()
654
655 for key, job_iter in groups:
656 jobs = list(job_iter)
657
658 jobs.sort(key=ByTaskRunStartTime, reverse=True)
659
660 d[key] = jobs
661
662 return d
663
664
665def ByTaskRunStartTime(row):
666 return int(row.get('task-run-start-time', 0))
667
668def ByCommitDate(row):
669 # Written in the shell script
670 # This is in ISO 8601 format (git log %aI), so we can sort by it.
671 return row.get('commit-date', '?')
672
673def ByCommitHash(row):
674 return row.get('commit-hash', '?')
675
676def ByGithubRun(row):
677 return int(row.get('GITHUB_RUN_NUMBER', 0))
678
679
680def main(argv):
681 action = argv[1]
682
683 if action == 'sourcehut-index':
684 index_out = argv[2]
685 run_index_out = argv[3]
686 run_id = argv[4] # looks like git-0101abab
687 num_to_show = int(argv[5])
688
689 assert run_id.startswith('git-'), run_id
690 commit_hash = run_id[4:]
691
692 jobs = list(ParseJobs(sys.stdin))
693
694 # sourcehut doesn't have a build number.
695 # - Sort by descnding commit date. (Minor problem: Committing on a VM with
696 # bad clock can cause commits "in the past")
697 # - Group by commit HASH, because 'git rebase' can crate different commits
698 # with the same date.
699 jobs.sort(key=ByCommitDate, reverse=True)
700 jobs = jobs[:num_to_show]
701
702 groups = GroupJobs(jobs, ByCommitHash)
703
704 title = 'Recent Jobs (sourcehut)'
705 with open(index_out, 'w') as f:
706 PrintIndexHtml(title, groups, f=f)
707
708 jobs = groups[commit_hash]
709 title = 'Jobs for commit %s' % commit_hash
710 with open(run_index_out, 'w') as f:
711 PrintRunHtml(title, jobs, f=f)
712
713 elif action == 'github-index':
714
715 index_out = argv[2]
716 run_index_out = argv[3]
717 run_id = int(argv[4]) # compared as an integer
718 num_to_show = int(argv[5])
719
720 #log('web.py github-index %s %s %d', index_out, run_index_out, run_id)
721
722 # soil/web.sh list-json gives us file system paths
723 jobs = list(ParseJobs(sys.stdin))
724
725 # sort and truncate
726 jobs.sort(key=ByGithubRun, reverse=True)
727 jobs = jobs[:num_to_show]
728
729 groups = GroupJobs(jobs, ByGithubRun)
730
731 title = 'Recent Jobs (Github Actions)'
732 with open(index_out, 'w') as f:
733 PrintIndexHtml(title, groups, f=f)
734
735 try:
736 jobs = groups[run_id]
737 except KeyError:
738 # debug info that shouldn't be needed
739 log('run_id %s', run_id)
740 for k in groups:
741 log('key %s', k)
742 raise
743 title = 'Jobs for run %d' % run_id
744
745 with open(run_index_out, 'w') as f:
746 PrintRunHtml(title, jobs, f=f)
747
748 elif action == 'cleanup':
749 num_to_keep = int(argv[2])
750
751 jobs = list(ParseJobs(sys.stdin))
752 log('%s cleanup: got %d jobs', sys.argv[0], len(jobs))
753
754 # sort and truncate
755 jobs.sort(key=ByGithubRun, reverse=True)
756 to_delete = jobs[num_to_keep:] # everything but the most recent
757
758 log('%s cleanup: keeping %d, deleting %d', sys.argv[0], num_to_keep,
759 len(to_delete))
760
761 for job in to_delete:
762 print(job['json_path'])
763 print(job['tsv_path'])
764 print(job['wwz_path'])
765
766 # TODO: clean up git-$hash dirs
767 #
768 # github-jobs/
769 # $GITHUB_RUN_NUMBER/
770 # cpp-tarball.{json,wwz,tsv}
771 # dummy.{json,wwz,tsv}
772 # git-$hash/
773 # oils-for-unix.tar
774 #
775 # sourcehut-jobs/
776 # 1234/
777 # cpp-tarball.{json,wwz,tsv}
778 # 1235/
779 # dummy.{json,wwz,tsv}
780 # git-$hash/
781 # index.html # HTML for this job
782 # oils-for-unix.tar
783 #
784 # We might have to read the most recent JSON, find the corresponding $hash,
785 # and print that dir.
786 #
787 # Another option is to use a real database, rather than the file system!
788
789 else:
790 raise RuntimeError('Invalid action %r' % action)
791
792
793if __name__ == '__main__':
794 try:
795 main(sys.argv)
796 except RuntimeError as e:
797 print('FATAL: %s' % e, file=sys.stderr)
798 sys.exit(1)