OILS / web / search.js View on Github | oils.pub

265 lines, 218 significant
1// Fetch the search index only when the user wants to search. Then cache it thereafter.
2//
3// We also have to avoid race conditions. On very slow networks, multiple calls to getIndex can be
4// made before the fetch completes. To remedy this, we store the promise returning the index in
5// _index. Then any calls to await getIndex() implicitly wait on this single promise.
6let _index;
7function getIndex() {
8 if (!_index) {
9 _index = (async () => {
10 const res = await fetch(`${window.basePath}/index.json`);
11 _index = await res.json();
12 return _index;
13 })();
14 }
15
16 return _index;
17}
18
19/**
20 * True Damerau–Levenshtein distance (allows multiple transpositions).
21 * Adapted from [0] with help from ChatGPT 5.2.
22 *
23 * Computes the DL distance between textA and textB. This is the number of edits between A and B. The possible edits are:
24 * - Insertion (adding a single char)
25 * - Deletion (removing a single char)
26 * - Substitution (changing a single char)
27 * - Transposition (swapping the locations of 2 chars)
28 *
29 * [0] https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Distance_with_adjacent_transpositions
30 */
31function damerauLevenshteinDistance(textA, textB) {
32 const lenA = textA.length;
33 const lenB = textB.length;
34
35 const INF = lenA + lenB;
36
37 // lastRowByChar == da in the pseudocode:
38 // maps a character -> last row index (1..lenA) where it appeared in textA
39 const lastRowByChar = new Map();
40
41 // dist is the shifted version of d. Size: (lenA+2) x (lenB+2)
42 const dist = Array.from({ length: lenA + 2 }, () => new Array(lenB + 2).fill(0));
43
44 // Initialize sentinel borders
45 dist[0][0] = INF;
46 for (let i = 0; i <= lenA; i++) {
47 dist[i + 1][0] = INF; // d[i, -1]
48 dist[i + 1][1] = i; // d[i, 0]
49 }
50 for (let j = 0; j <= lenB; j++) {
51 dist[0][j + 1] = INF; // d[-1, j]
52 dist[1][j + 1] = j; // d[ 0, j]
53 }
54
55 // Main dynamic programming algorithm
56 for (let i = 1; i <= lenA; i++) {
57 let lastMatchingColInB = 0; // db in the pseudocode (last column j where a[i] matched)
58 const charA = textA[i - 1];
59
60 for (let j = 1; j <= lenB; j++) {
61 const charB = textB[j - 1];
62
63 const lastRowWithCharBInA = lastRowByChar.get(charB) ?? 0; // k := da[b[j]]
64 const lastMatchingColForThisRow = lastMatchingColInB; // ℓ := db
65
66 const cost = (charA === charB) ? 0 : 1;
67 if (cost === 0) lastMatchingColInB = j;
68
69 // Shifted accesses:
70 // d[i-1, j-1] -> dist[i][j]
71 // d[i, j-1] -> dist[i+1][j]
72 // d[i-1, j ] -> dist[i][j+1]
73 const substitution = dist[i][j] + cost;
74 const insertion = dist[i + 1][j] + 1;
75 const deletion = dist[i][j + 1] + 1;
76
77 // Transposition term (shifted):
78 // d[k-1, ℓ-1] in pseudocode -> dist[k][ℓ]
79 const transposition =
80 dist[lastRowWithCharBInA][lastMatchingColForThisRow] +
81 (i - lastRowWithCharBInA - 1) +
82 cost +
83 (j - lastMatchingColForThisRow - 1);
84
85 dist[i + 1][j + 1] = Math.min(substitution, insertion, deletion, transposition);
86 }
87
88 // da[a[i]] := i
89 lastRowByChar.set(charA, i);
90 }
91
92 // return d[lenA, lenB] -> dist[lenA+1][lenB+1]
93 return dist[lenA + 1][lenB + 1];
94}
95
96/**
97 * Compute a match rank for a single label against query.
98 * Lower is better. 0 = exact substring match, 1..5 = fuzzy, Infinity = no match.
99 *
100 * Expects both symbol and query to be all lowercase.
101 */
102function rankSymbol(symbol, query) {
103 // Exact match
104 if (symbol.includes(query)) return 0;
105
106 const d = damerauLevenshteinDistance(query, symbol);
107 return d <= 5 ? d : Infinity;
108}
109
110/**
111 * Recursively filter the index to only branches containing matches.
112 *
113 * `query` must be a lowercase string.
114 */
115function filterAndRank(index, query) {
116 const kept = [];
117 for (const node of index) {
118 const symbol = node.symbol.toLowerCase();
119 const anchor = node.anchor;
120 const children = node.children ?? []; // We omit the children in the index when empty
121
122 const selfRank = rankSymbol(symbol, query);
123 const keptChildren = filterAndRank(children, query);
124
125 // Best descendant rank (if any)
126 let bestChildRank = Infinity;
127 for (const c of keptChildren) {
128 if (c._rank < bestChildRank) bestChildRank = c._rank;
129 }
130
131 // Keep node if it matches or has matching descendants
132 const bestRank = Math.min(selfRank, bestChildRank);
133 if (bestRank !== Infinity) {
134 const copy = { symbol: node.symbol, anchor, children: keptChildren };
135
136 // Store rank for sorting
137 copy._rank = bestRank;
138
139 kept.push(copy);
140 }
141 }
142
143 kept.sort((a, b) => (a._rank ?? Infinity) - (b._rank ?? Infinity));
144 return kept;
145}
146
147/**
148 * Trim the pruned tree to at most `limit` total rendered links (nodes).
149 * Keeps structure (parents) only as needed to show kept children.
150 *
151 * This way searching 'a' doesn't fill the entire page with junk.
152 * TODO: pagination / a next button
153 */
154function trimResults(results, limit) {
155 let count = 0;
156
157 function walk(list) {
158 const out = [];
159 for (const node of list) {
160 if (count >= limit) break;
161
162 // Count this node as one rendered link
163 count += 1;
164
165 const trimmedChildren = walk(node.children);
166
167 out.push({
168 symbol: node.symbol,
169 anchor: node.anchor,
170 children: trimmedChildren,
171 });
172 }
173 return out;
174 }
175
176 return walk(results);
177}
178
179/**
180 * Search across the website using the global search index.
181 * Returns a pruned tree (array of nodes) ready to render (see renderResults).
182 *
183 * Matching algorithm:
184 * - Exact substring match: rank 0
185 * - DL distance <= 5: rank 1..5
186 * - Otherwise excluded
187 *
188 * Only the top `limit` rendered nodes are returned (default 25).
189 */
190async function search(query) {
191 if (!query) return [];
192
193 const index = await getIndex();
194 const pruned = filterAndRank(index, query.toLowerCase());
195 return trimResults(pruned, /* limit */ 25);
196}
197
198/**
199 * Render as nested <ul>/<li> where *every* item is a link.
200 */
201function renderResults(nodes) {
202 const ul = document.createElement("ul");
203
204 for (const node of nodes) {
205 const li = document.createElement("li");
206
207 const a = document.createElement("a");
208 a.textContent = String(node.symbol ?? "");
209 a.href = (typeof node.anchor === "string")
210 ? window.basePath + "/" + node.anchor
211 : "#";
212 li.appendChild(a);
213
214 const children = Array.isArray(node.children) ? node.children : [];
215 if (children.length) {
216 li.appendChild(renderResults(children));
217 }
218
219 ul.appendChild(li);
220 }
221
222 return ul;
223}
224
225function searchbar() {
226 const searchDiv = document.getElementById("search");
227
228 // Create the searchbar dynamically so that users without JS enabled don't get
229 // a broken searchbar
230 const searchbar = document.createElement("input");
231 searchbar.id = "searchbar";
232 searchbar.setAttribute("placeholder", "Search");
233 searchbar.setAttribute("title", "Search");
234 searchbar.setAttribute("autocapitalize", "none");
235 searchbar.setAttribute("enterkeyhint", "search");
236 searchDiv.appendChild(searchbar);
237
238 // We show a loading bar on the first fetch.
239 const loading = document.createElement("p");
240 loading.innerText = "Loading...";
241 const showLoading = () => (loading.style.display = "block");
242 const hideLoading = () => (loading.style.display = "none");
243 hideLoading();
244 searchDiv.appendChild(loading);
245
246 let resultsList = null;
247 searchbar.addEventListener("input", async (event) => {
248 showLoading();
249 const query = event.target.value;
250
251 const prunedTree = await search(query);
252
253 hideLoading();
254
255 // We have to clear the previous results, if present
256 if (resultsList) {
257 resultsList.remove();
258 }
259
260 resultsList = renderResults(prunedTree);
261 searchDiv.appendChild(resultsList);
262 });
263}
264
265searchbar();