OILS / devtools / services / zulip.py View on Github | oils.pub

128 lines, 57 significant
1#!/usr/bin/env python3
2"""
3Convert Zulip-flavored markdown to CommonMark.
4
5Usage:
6 devtools/services/zulip.py < input.md > output.md
7
8NOTE: This tool and its tests were mostly generated by Claude Code, using a
9"scaffold" I set up with devtools/services/zulip.sh.
10
11Converts Zulip-specific syntax to standard markdown:
12- #**stream>topic** -> [#stream > topic](https://oilshell.zulipchat.com/#narrow/channel/ID-stream/topic/TOPIC)
13- #**stream** -> [#stream](https://oilshell.zulipchat.com/#narrow/channel/ID-stream)
14- https://example.com -> <https://example.com> (for CommonMark auto-linking)
15
16Generates proper Zulip URLs with:
17- Correct channel IDs for known streams
18- Zulip-style URL encoding (.20 for space, .27 for apostrophe, etc.)
19- Full domain URLs that work outside of Zulip
20"""
21
22import sys
23import re
24
25# Stream name to ID mapping extracted from Zulip
26STREAM_MAP = {
27 'oil-dev': '121539',
28 'oil-discuss': '121540',
29 'blog-ideas': '266575',
30 'containers': '308821',
31 'performance': '384167',
32 'language-design': '384942',
33 'blog-comments': '392989',
34 'shell-runtime': '429356',
35 'projects-with-oils': '452107',
36 'osh': '502349',
37 'distros': '522730',
38}
39
40
41def zulip_encode_topic(topic):
42 """Encode topic using Zulip's specific encoding pattern (.XX instead of %XX)."""
43 result = ''
44 for char in topic:
45 if char.isalnum() or char == '-':
46 result += char
47 else:
48 # Convert to hex and replace % with . (like Zulip does)
49 hex_code = f'{ord(char):02X}'
50 result += f'.{hex_code}'
51 return result
52
53
54# Convert stream/topic links: #**stream>topic** -> proper Zulip URL
55def replace_stream_topic(match):
56 content = match.group(1) # Everything between #** and **
57
58 if '>' in content:
59 # Parse stream and topic
60 stream, topic = content.split('>', 1)
61 stream = stream.strip()
62 topic = topic.strip()
63
64 # Get stream ID, fallback to 0 if not found
65 stream_id = STREAM_MAP.get(stream, '0')
66
67 # Encode the topic using Zulip's format
68 topic_encoded = zulip_encode_topic(topic)
69
70 # Construct the Zulip URL (omitting /with/message_id as requested)
71 url = f'https://oilshell.zulipchat.com/#narrow/channel/{stream_id}-{stream}/topic/{topic_encoded}'
72
73 # Render with proper spacing around >
74 display_text = f'{stream} > {topic}'
75 return f'[#{display_text}]({url})'
76 else:
77 # Stream-only reference
78 stream = content.strip()
79 stream_id = STREAM_MAP.get(stream, '0')
80 url = f'https://oilshell.zulipchat.com/#narrow/channel/{stream_id}-{stream}'
81 return f'[#{stream}]({url})'
82
83
84# Convert Zulip stream/topic links
85stream_topic_pattern = re.compile(
86 r'''
87 \#\*\* # Match #**
88 ([^*]+) # Capture everything between the ** (stream>topic or just stream)
89 \*\* # Match closing **
90''', re.VERBOSE)
91
92
93# Convert bare URLs to auto-linkable format for CommonMark
94# Match URLs that aren't already in markdown links or angle brackets
95def replace_bare_url(match):
96 url = match.group(0)
97 return f'<{url}>'
98
99
100# Convert bare URLs to auto-linkable format
101bare_url_pattern = re.compile(
102 r'''
103 (?<![(<]) # Negative lookbehind: not preceded by ( or <
104 (https?:// # Capture group: http:// or https://
105 [^\s)>]+ # Followed by non-whitespace, non-), non-> chars
106 ) # End capture group
107 (?![)>]) # Negative lookahead: not followed by ) or >
108''', re.VERBOSE)
109
110
111def convert_zulip_to_commonmark(content):
112 """Convert Zulip-flavored markdown to CommonMark."""
113
114 content = stream_topic_pattern.sub(replace_stream_topic, content)
115
116 content = bare_url_pattern.sub(replace_bare_url, content)
117
118 return content
119
120
121def main():
122 content = sys.stdin.read()
123 converted = convert_zulip_to_commonmark(content)
124 sys.stdout.write(converted)
125
126
127if __name__ == '__main__':
128 main()