OILS / web / search.js View on Github | oils.pub

269 lines, 221 significant
1// Fetch the search index only when the user wants to search. Then cache it thereafter.
2//
3// We also have to avoid race conditions. On very slow networks, multiple calls to getIndex can be
4// made before the fetch completes. To remedy this, we store the promise returning the index in
5// _index. Then any calls to await getIndex() implicitly wait on this single promise.
6let _index;
7function getIndex() {
8 if (!_index) {
9 _index = (async () => {
10 const res = await fetch(`${window.basePath}/index.json`);
11 _index = await res.json();
12 return _index;
13 })();
14 }
15
16 return _index;
17}
18
19/**
20 * True Damerau–Levenshtein distance (allows multiple transpositions).
21 * Adapted from [0] with help from ChatGPT 5.2.
22 *
23 * Computes the DL distance between textA and textB. This is the number of edits between A and B. The possible edits are:
24 * - Insertion (adding a single char)
25 * - Deletion (removing a single char)
26 * - Substitution (changing a single char)
27 * - Transposition (swapping the locations of 2 chars)
28 *
29 * [0] https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Distance_with_adjacent_transpositions
30 */
31function damerauLevenshteinDistance(textA, textB) {
32 const lenA = textA.length;
33 const lenB = textB.length;
34
35 const INF = lenA + lenB;
36
37 // lastRowByChar == da in the pseudocode:
38 // maps a character -> last row index (1..lenA) where it appeared in textA
39 const lastRowByChar = new Map();
40
41 // dist is the shifted version of d. Size: (lenA+2) x (lenB+2)
42 const dist = Array.from({ length: lenA + 2 }, () => new Array(lenB + 2).fill(0));
43
44 // Initialize sentinel borders
45 dist[0][0] = INF;
46 for (let i = 0; i <= lenA; i++) {
47 dist[i + 1][0] = INF; // d[i, -1]
48 dist[i + 1][1] = i; // d[i, 0]
49 }
50 for (let j = 0; j <= lenB; j++) {
51 dist[0][j + 1] = INF; // d[-1, j]
52 dist[1][j + 1] = j; // d[ 0, j]
53 }
54
55 // Main dynamic programming algorithm
56 for (let i = 1; i <= lenA; i++) {
57 let lastMatchingColInB = 0; // db in the pseudocode (last column j where a[i] matched)
58 const charA = textA[i - 1];
59
60 for (let j = 1; j <= lenB; j++) {
61 const charB = textB[j - 1];
62
63 const lastRowWithCharBInA = lastRowByChar.get(charB) ?? 0; // k := da[b[j]]
64 const lastMatchingColForThisRow = lastMatchingColInB; // ℓ := db
65
66 const cost = (charA === charB) ? 0 : 1;
67 if (cost === 0) lastMatchingColInB = j;
68
69 // Shifted accesses:
70 // d[i-1, j-1] -> dist[i][j]
71 // d[i, j-1] -> dist[i+1][j]
72 // d[i-1, j ] -> dist[i][j+1]
73 const substitution = dist[i][j] + cost;
74 const insertion = dist[i + 1][j] + 1;
75 const deletion = dist[i][j + 1] + 1;
76
77 // Transposition term (shifted):
78 // d[k-1, ℓ-1] in pseudocode -> dist[k][ℓ]
79 const transposition =
80 dist[lastRowWithCharBInA][lastMatchingColForThisRow] +
81 (i - lastRowWithCharBInA - 1) +
82 cost +
83 (j - lastMatchingColForThisRow - 1);
84
85 dist[i + 1][j + 1] = Math.min(substitution, insertion, deletion, transposition);
86 }
87
88 // da[a[i]] := i
89 lastRowByChar.set(charA, i);
90 }
91
92 // return d[lenA, lenB] -> dist[lenA+1][lenB+1]
93 return dist[lenA + 1][lenB + 1];
94}
95
96/**
97 * Compute a match rank for a single label against query.
98 * Lower is better. 0 = exact substring match, 1..5 = fuzzy, Infinity = no match.
99 *
100 * Expects both symbol and query to be all lowercase.
101 */
102function rankSymbol(symbol, query) {
103 // Exact match
104 if (symbol.includes(query)) return 0;
105
106 const d = damerauLevenshteinDistance(query, symbol);
107 return d <= 5 ? d : Infinity;
108}
109
110/**
111 * Recursively filter the index to only branches containing matches.
112 *
113 * `query` must be a lowercase string.
114 */
115function filterAndRank(index, query) {
116 const kept = [];
117 for (const node of index) {
118 const symbol = node.symbol.toLowerCase();
119 const anchor = node.anchor;
120 const children = node.children ?? []; // We omit the children in the index when empty
121
122 const selfRank = rankSymbol(symbol, query);
123 const keptChildren = filterAndRank(children, query);
124
125 // Best descendant rank (if any)
126 let bestChildRank = Infinity;
127 for (const c of keptChildren) {
128 if (c._rank < bestChildRank) bestChildRank = c._rank;
129 }
130
131 // Keep node if it matches or has matching descendants
132 const bestRank = Math.min(selfRank, bestChildRank);
133 if (bestRank !== Infinity) {
134 const copy = { symbol: node.symbol, anchor, children: keptChildren };
135
136 // Store rank for sorting
137 copy._rank = bestRank;
138
139 kept.push(copy);
140 }
141 }
142
143 kept.sort((a, b) => (a._rank ?? Infinity) - (b._rank ?? Infinity));
144
145 return kept.filter((x) => {
146 delete x._rank;
147 return x;
148 });
149}
150
151/**
152 * Trim the pruned tree to at most `limit` total rendered links (nodes).
153 * Keeps structure (parents) only as needed to show kept children.
154 *
155 * This way searching 'a' doesn't fill the entire page with junk.
156 * TODO: pagination / a next button
157 */
158function trimResults(results, limit) {
159 let count = 0;
160
161 function walk(list) {
162 const out = [];
163 for (const node of list) {
164 if (count >= limit) break;
165
166 // Count this node as one rendered link
167 count += 1;
168
169 const trimmedChildren = walk(node.children);
170
171 out.push({
172 symbol: node.symbol,
173 anchor: node.anchor,
174 children: trimmedChildren,
175 });
176 }
177 return out;
178 }
179
180 return walk(results);
181}
182
183/**
184 * Search across the website using the global search index.
185 * Returns a pruned tree (array of nodes) ready to render (see renderResults).
186 *
187 * Matching algorithm:
188 * - Exact substring match: rank 0
189 * - DL distance <= 5: rank 1..5
190 * - Otherwise excluded
191 *
192 * Only the top `limit` rendered nodes are returned (default 25).
193 */
194async function search(query) {
195 if (!query) return [];
196
197 const index = await getIndex();
198 const pruned = filterAndRank(index, query.toLowerCase());
199 return trimResults(pruned, /* limit */ 25);
200}
201
202/**
203 * Render as nested <ul>/<li> where *every* item is a link.
204 */
205function renderResults(nodes) {
206 const ul = document.createElement("ul");
207
208 for (const node of nodes) {
209 const li = document.createElement("li");
210
211 const a = document.createElement("a");
212 a.textContent = String(node.symbol ?? "");
213 a.href = (typeof node.anchor === "string")
214 ? window.basePath + "/" + node.anchor
215 : "#";
216 li.appendChild(a);
217
218 const children = Array.isArray(node.children) ? node.children : [];
219 if (children.length) {
220 li.appendChild(renderResults(children));
221 }
222
223 ul.appendChild(li);
224 }
225
226 return ul;
227}
228
229function searchbar() {
230 const searchDiv = document.getElementById("search");
231
232 // Create the searchbar dynamically so that users without JS enabled don't get
233 // a broken searchbar
234 const searchbar = document.createElement("input");
235 searchbar.id = "searchbar";
236 searchbar.setAttribute("placeholder", "Search");
237 searchbar.setAttribute("title", "Search");
238 searchbar.setAttribute("autocapitalize", "none");
239 searchbar.setAttribute("enterkeyhint", "search");
240 searchDiv.appendChild(searchbar);
241
242 // We show a loading bar on the first fetch.
243 const loading = document.createElement("p");
244 loading.innerText = "Loading...";
245 const showLoading = () => (loading.style.display = "block");
246 const hideLoading = () => (loading.style.display = "none");
247 hideLoading();
248 searchDiv.appendChild(loading);
249
250 let resultsList = null;
251 searchbar.addEventListener("input", async (event) => {
252 showLoading();
253 const query = event.target.value;
254
255 const prunedTree = await search(query);
256
257 hideLoading();
258
259 // We have to clear the previous results, if present
260 if (resultsList) {
261 resultsList.remove();
262 }
263
264 resultsList = renderResults(prunedTree);
265 searchDiv.appendChild(resultsList);
266 });
267}
268
269searchbar();