OILS / web / search.js View on Github | oils.pub

186 lines, 156 significant
1// Fetch the search index only when the user wants to search. Then cache it thereafter.
2//
3// We also have to avoid race conditions. On very slow networks, multiple calls to getIndex can be
4// made before the fetch completes. To remedy this, we store the promise returning the index in
5// _index. Then any calls to await getIndex() implicitly wait on this single promise.
6let _index;
7function getIndex() {
8 if (!_index) {
9 _index = (async () => {
10 const res = await fetch(`${window.basePath}/index.json`);
11 _index = await res.json();
12 return _index;
13 })();
14 }
15
16 return _index;
17}
18
19/**
20 * True Damerau–Levenshtein distance (allows multiple transpositions).
21 * Adapted from [0] with help from ChatGPT 5.2.
22 *
23 * Computes the DL distance between textA and textB. This is the number of edits between A and B. The possible edits are:
24 * - Insertion (adding a single char)
25 * - Deletion (removing a single char)
26 * - Substitution (changing a single char)
27 * - Transposition (swapping the locations of 2 chars)
28 *
29 * [0] https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Distance_with_adjacent_transpositions
30 */
31function damerauLevenshteinDistance(textA, textB) {
32 const lenA = textA.length;
33 const lenB = textB.length;
34
35 const INF = lenA + lenB;
36
37 // lastRowByChar == da in the pseudocode:
38 // maps a character -> last row index (1..lenA) where it appeared in textA
39 const lastRowByChar = new Map();
40
41 // dist is the shifted version of d. Size: (lenA+2) x (lenB+2)
42 const dist = Array.from({ length: lenA + 2 }, () => new Array(lenB + 2).fill(0));
43
44 // Initialize sentinel borders
45 dist[0][0] = INF;
46 for (let i = 0; i <= lenA; i++) {
47 dist[i + 1][0] = INF; // d[i, -1]
48 dist[i + 1][1] = i; // d[i, 0]
49 }
50 for (let j = 0; j <= lenB; j++) {
51 dist[0][j + 1] = INF; // d[-1, j]
52 dist[1][j + 1] = j; // d[ 0, j]
53 }
54
55 // Main dynamic programming algorithm
56 for (let i = 1; i <= lenA; i++) {
57 let lastMatchingColInB = 0; // db in the pseudocode (last column j where a[i] matched)
58 const charA = textA[i - 1];
59
60 for (let j = 1; j <= lenB; j++) {
61 const charB = textB[j - 1];
62
63 const lastRowWithCharBInA = lastRowByChar.get(charB) ?? 0; // k := da[b[j]]
64 const lastMatchingColForThisRow = lastMatchingColInB; // ℓ := db
65
66 const cost = (charA === charB) ? 0 : 1;
67 if (cost === 0) lastMatchingColInB = j;
68
69 // Shifted accesses:
70 // d[i-1, j-1] -> dist[i][j]
71 // d[i, j-1] -> dist[i+1][j]
72 // d[i-1, j ] -> dist[i][j+1]
73 const substitution = dist[i][j] + cost;
74 const insertion = dist[i + 1][j] + 1;
75 const deletion = dist[i][j + 1] + 1;
76
77 // Transposition term (shifted):
78 // d[k-1, ℓ-1] in pseudocode -> dist[k][ℓ]
79 const transposition =
80 dist[lastRowWithCharBInA][lastMatchingColForThisRow] +
81 (i - lastRowWithCharBInA - 1) +
82 cost +
83 (j - lastMatchingColForThisRow - 1);
84
85 dist[i + 1][j + 1] = Math.min(substitution, insertion, deletion, transposition);
86 }
87
88 // da[a[i]] := i
89 lastRowByChar.set(charA, i);
90 }
91
92 // return d[lenA, lenB] -> dist[lenA+1][lenB+1]
93 return dist[lenA + 1][lenB + 1];
94}
95
96/**
97 * Search the global `index` against `query`.
98 *
99 * The matching algorithm is as follows, include all items with:
100 * - An exact match
101 * - Damerau-Levenshtein distance <= 5
102 *
103 * Note that all matching is case-insensitive.
104 *
105 * Only the top 25 results are given. We first show the exact matches, and then
106 * the inexact matches ranked by decreasing DL distance.
107 */
108async function search(query) {
109 if (!query) {
110 return [];
111 }
112
113 const index = await getIndex();
114
115 // We do case insensitive matching.
116 query = query.toLowerCase();
117
118 // Results is a list of pairs [entry, rank] where a lower rank is better.
119 const results = [];
120 for (const entry of index) {
121 const symbol = entry.symbol.toLowerCase();
122 if (symbol.includes(query)) {
123 results.push([entry, 0]);
124 continue;
125 }
126
127 const distance = damerauLevenshteinDistance(query, entry.symbol);
128 if (distance > 5) {
129 continue;
130 }
131
132 results.push([entry, distance]);
133 }
134
135 results.sort(([_a, aRank], [_b, bRank]) => aRank - bRank);
136 return results.map(([entry, _rank]) => entry).slice(0, 25);
137}
138
139function searchbar() {
140 const searchDiv = document.getElementById("search");
141
142 // Create the searchbar dynamically so that users without JS enabled don't get
143 // a broken searchbar
144 const searchbar = document.createElement('input');
145 searchbar.id = 'searchbar';
146 searchbar.setAttribute('placeholder', 'Search');
147 searchbar.setAttribute('title', 'Search');
148 searchbar.setAttribute('autocapitalize', 'none');
149 searchbar.setAttribute('enterkeyhint', 'search');
150 searchDiv.appendChild(searchbar);
151
152 // We show a loading bar on the first fetch.
153 const loading = document.createElement('p');
154 loading.innerText = 'Loading...';
155 const showLoading = () => loading.style.display = 'block';
156 const hideLoading = () => loading.style.display = 'none';
157 hideLoading();
158 searchDiv.appendChild(loading);
159
160 let resultsList = null;
161 searchbar.addEventListener('input', async (event) => {
162 showLoading();
163 const query = event.target.value;
164 const results = await search(query);
165 hideLoading();
166
167 // Clear the previous results, if present
168 if (resultsList) {
169 resultsList.remove();
170 }
171
172 resultsList = document.createElement('ul');
173 searchDiv.appendChild(resultsList);
174
175 for (const result of results) {
176 const item = document.createElement('li');
177 const link = document.createElement('a');
178 link.innerHTML = result.symbol;
179 link.href = window.basePath + '/' + result.anchor;
180 item.appendChild(link);
181 resultsList.appendChild(item);
182 }
183 });
184}
185
186searchbar();