diff --git a/.gitignore b/.gitignore index 09bfb38e88e7..b98bdb265323 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .algolia-cache +.search-cache .DS_Store .env /node_modules/ diff --git a/contributing/search.md b/contributing/search.md index 4da0507ebf9b..589c377419b4 100644 --- a/contributing/search.md +++ b/contributing/search.md @@ -90,10 +90,10 @@ Why do we need this? For our daily shipping needs, it's tolerable that search up ### Code files -- [javascripts/search.js](javascripts/search.js) - The browser-side code that enables search using Algolia's [InstantSearch.js](https://github.com/algolia/instantsearch.js/) library. -- [lib/algolia/client.js](lib/algolia/client.js) - A thin wrapper around the [algoliasearch](https://ghub.io/algoliasearch) Node.js module for interacting with the Algolia API. -- [lib/algolia/search-index.js](lib/algolia/search-index.js) - A class for generating structured search data from repository content and syncing it with the remote Algolia service. This class has built-in validation to ensure that all records are valid before they're uploaded. This class also takes care of removing deprecated records, and compares existing remote records with the latest local records to avoid uploading records that haven't changed. -- [script/sync-algolia-search-indices.js](script/sync-algolia-search-indices.js) - The script used by the Actions workflow to update search indices on our Algolia account. This can also be [run in the development environment](#development). +- [javascripts/search.js](javascripts/search.js) - The browser-side code that enables search. +- [lib/search/algolia-client.js](lib/search/algolia-client.js) - A thin wrapper around the [algoliasearch](https://ghub.io/algoliasearch) Node.js module for interacting with the Algolia API. +- [lib/search/algolia-search-index.js](lib/search/algolia-search-index.js) - A class for generating structured search data from repository content and syncing it with the remote Algolia service. This class has built-in validation to ensure that all records are valid before they're uploaded. This class also takes care of removing deprecated records, and compares existing remote records with the latest local records to avoid uploading records that haven't changed. +- [script/sync-search-indices.js](script/sync-search-indices.js) - The script used by the Actions workflow to update search indices on our Algolia account. This can also be [run in the development environment](#development). - [tests/algolia-search.js](tests/algolia-search.js) - Tests! ## Indices @@ -136,4 +136,4 @@ Each record represents a section of a page. Sections are derived by splitting up - It's not strictly necessary to set an `objectID` as Algolia will create one automatically, but by creating our own we have a guarantee that subsequent invocations of this upload script will overwrite existing records instead of creating numerous duplicate records with differing IDs. - Algolia has typo tolerance. Try spelling something wrong and see what you get! - Algolia has lots of controls for customizing each index, so we can add weights to certain attributes and create rules like "title is more important than body", etc. But it works pretty well as-is without any configuration. -- Algolia has support for "advanced query syntax" for exact matching of quoted expressions and exclusion of words preceded by a `-` sign. This is off by default but we have it enabled in our browser client. This and many other settings can be configured in Algolia.com web interface. The settings in the web interface can be overridden by the InstantSearch.js client. See [javascripts/search.js]([javascripts/search.js). +- Algolia has support for "advanced query syntax" for exact matching of quoted expressions and exclusion of words preceded by a `-` sign. This is off by default but we have it enabled in our browser client. This and many other settings can be configured in Algolia.com web interface. The settings in the web interface can be overridden by the search endpoint. See [middleware/search.js]([middleware/search.js). diff --git a/includes/search-form.html b/includes/search-form.html index 34a2e4854005..5749ba0803b0 100644 --- a/includes/search-form.html +++ b/includes/search-form.html @@ -5,8 +5,6 @@ - On all other pages, in the header --> - + diff --git a/javascripts/experiment.js b/javascripts/experiment.js index 200e718455cd..25e7660b4468 100644 --- a/javascripts/experiment.js +++ b/javascripts/experiment.js @@ -1,5 +1,6 @@ import murmur from 'imurmurhash' import { getUserEventsId, sendEvent } from './events' +// import h from './hyperscript' const TREATMENT = 'TREATMENT' const CONTROL = 'CONTROL' @@ -19,23 +20,6 @@ export async function sendSuccess (test) { }) } -const xmlns = 'http://www.w3.org/2000/svg' - -export function h (tagName, attributes = {}, children = []) { - const el = ['svg', 'path'].includes(tagName) - ? document.createElementNS(xmlns, tagName) - : document.createElement(tagName) - Object.entries(attributes).forEach( - ([key, value]) => el.setAttribute(key, value) - ) - children.forEach(child => - typeof child === 'string' - ? el.append(document.createTextNode(child)) - : el.append(child) - ) - return el -} - export default function () { // const testName = '$test-name$' // const xbucket = bucket(testName) diff --git a/javascripts/fake-hogan.js b/javascripts/fake-hogan.js deleted file mode 100644 index 1846a52f38e9..000000000000 --- a/javascripts/fake-hogan.js +++ /dev/null @@ -1,15 +0,0 @@ -// This module overrides "Hogan" that instantsearch.js uses -// Hogan uses `new Function`, -// so we can't use it with our content security policy. -// Turns out, we use all our own templates anyway, -// so we just have to shim out Hogan so it doesn't error! - -export default { - compile (template) { - return { - render (data) { - return '' - } - } - } -} diff --git a/javascripts/hyperscript.js b/javascripts/hyperscript.js new file mode 100644 index 000000000000..7c09d2e33ee6 --- /dev/null +++ b/javascripts/hyperscript.js @@ -0,0 +1,44 @@ +const xmlns = 'http://www.w3.org/2000/svg' + +const plainObjectConstructor = {}.constructor + +function exists (value) { + return value !== null && typeof value !== 'undefined' +} + +function isPlainObject (value) { + return value.constructor === plainObjectConstructor +} + +function isString (value) { + return typeof value === 'string' +} + +function renderChildren (el, children) { + for (const child of children) { + if (isPlainObject(child)) { + Object.entries(child) + .filter(([key, value]) => exists(value)) + .forEach(([key, value]) => el.setAttribute(key, value)) + } else if (Array.isArray(child)) { + renderChildren(el, child) + } else if (isString(child)) { + el.append(document.createTextNode(child)) + } else { + el.append(child) + } + } +} + +export default function h (tagName, ...children) { + const el = ['svg', 'path'].includes(tagName) + ? document.createElementNS(xmlns, tagName) + : document.createElement(tagName) + renderChildren(el, children) + return el +} + +export const tags = Object.fromEntries( + ['div', 'form', 'a', 'input', 'button', 'ol', 'li', 'em'] + .map(tagName => [tagName, (...args) => h(tagName, ...args)]) +) diff --git a/javascripts/search.js b/javascripts/search.js index 2e68ffdbd9e2..607dd1f91afb 100644 --- a/javascripts/search.js +++ b/javascripts/search.js @@ -1,9 +1,6 @@ +import { tags } from './hyperscript' import { sendEvent } from './events' -const instantsearch = require('instantsearch.js').default -const { searchBox, hits, configure, analytics } = require('instantsearch.js/es/widgets') -const algoliasearch = require('algoliasearch') const searchWithYourKeyboard = require('search-with-your-keyboard') -const querystring = require('querystring') const truncate = require('html-truncate') const languages = require('../lib/languages') const allVersions = require('../lib/all-versions') @@ -12,261 +9,96 @@ const nonEnterpriseDefaultVersion = require('../lib/non-enterprise-default-versi const languageCodes = Object.keys(languages) const maxContentLength = 300 -const hasStandaloneSearch = () => document.getElementById('landing') || document.querySelector('body.error-404') !== null - -const resultTemplate = (item) => { - // Attach an `algolia-query` param to each result link so analytics - // can track the search query that led the user to this result - const input = document.querySelector('#search-input-container input') - if (input) { - const url = new URL(item.objectID, window.location.origin) - const queryParams = new URLSearchParams(url.search.slice(1)) - queryParams.append('algolia-query', input.value) - url.search = queryParams.toString() - item.modifiedURL = url.toString() - } +let $searchInputContainer +let $searchResultsContainer +let $searchOverlay +let $searchInput - // Display page title and heading (if present exists) - const title = item._highlightResult.heading - ? [item._highlightResult.title.value, item._highlightResult.heading.value].join(': ') - : item._highlightResult.title.value +let placeholder = 'Search topics, products...' +let version +let language - // Remove redundant title from the end of breadcrumbs - if (item.breadcrumbs && item.breadcrumbs.endsWith(item.title)) { - item.modifiedBreadcrumbs = item.breadcrumbs.replace(' / ' + item.title, '') - } else { - item.modifiedBreadcrumbs = item.breadcrumbs - } +export default function search () { + $searchInputContainer = document.getElementById('search-input-container') + $searchResultsContainer = document.getElementById('search-results-container') - // Truncate and ellipsize the content string without breaking any HTML - // within it, such as the tags added by Algolia for emphasis. - item.modifiedContent = truncate(item._highlightResult.content.value, maxContentLength) - - // Construct the template to return - const html = ` -
- -
${item.modifiedBreadcrumbs}
-
${title}
-
${item.modifiedContent}
-
-
- ` - - // Sanitize the link's href attribute using the DOM API to prevent XSS - const fragment = document.createRange().createContextualFragment(html) - fragment.querySelector('a').setAttribute('href', item.modifiedURL) - const div = document.createElement('div') - div.appendChild(fragment.cloneNode(true)) - - return div.innerHTML -} + if (!$searchInputContainer || !$searchResultsContainer) return -export default function () { - if (!document.querySelector('#search-results-container')) return - - window.initialPageLoad = true - const opts = { - - // https://www.algolia.com/apps/ZI5KPY1HBE/dashboard - // This API key is public. There's also a private API key for writing to the Algolia API - searchClient: algoliasearch('ZI5KPY1HBE', '685df617246c3a10abba589b4599288f'), - - // There's an index for every version/language combination - indexName: `github-docs-${deriveVersionFromPath()}-${deriveLanguageCodeFromPath()}`, - - // allows "phrase queries" and "prohibit operator" - // https://www.algolia.com/doc/api-reference/api-parameters/advancedSyntax/ - advancedSyntax: true, - - // sync query params to search input - routing: true, - - searchFunction: helper => { - // console.log('searchFunction', helper.state) - const query = helper.state.query - const queryPresent = query && query.length > 0 - const results = document.querySelector('.ais-Hits') - // avoid conducting an empty search on page load; - if (window.initialPageLoad && !queryPresent) return - - // after page load, search should be executed (even if the query is empty) - // so as not to upset the default instantsearch.js behaviors like clearing - // the input when [x] is clicked. - helper.search() - - // If on homepage, toggle results container if query is present - if (hasStandaloneSearch()) { - const container = document.getElementById('search-results-container') - // Primer classNames for showing and hiding the results container - const activeClass = container.getAttribute('data-active-class') - const inactiveClass = container.getAttribute('data-inactive-class') - - if (!activeClass) { - console.error('container is missing required `data-active-class` attribute', container) - return - } - - if (!inactiveClass) { - console.error('container is missing required `data-inactive-class` attribute', container) - return - } - - // hide the container when no query is present - container.classList.toggle(activeClass, queryPresent) - container.classList.toggle(inactiveClass, !queryPresent) - } - - // Hack to work around a mysterious bug where the input is not cleared - // when the [x] is clicked. Note: this bug only occurs on pages - // loaded with a ?query=foo param already present - if (!queryPresent) { - setTimeout(() => { - document.querySelector('#search-input-container input').value = '' - }, 50) - results.style.display = 'none' - } - - if (queryPresent && results) results.style.display = 'block' - window.initialPageLoad = false - toggleSearchDisplay() - } - } + $searchOverlay = document.querySelector('.search-overlay-desktop') - const search = instantsearch(opts) + // There's an index for every version/language combination + version = deriveVersionFromPath() + language = deriveLanguageCodeFromPath() // Find search placeholder text in a tag, falling back to a default - const placeholderMeta = document.querySelector('meta[name="site.data.ui.search.placeholder"]') - const placeholder = placeholderMeta ? placeholderMeta.content : 'Search topics, products...' - - search.addWidgets( - [ - hits({ - container: '#search-results-container', - templates: { - empty: 'No results', - item: resultTemplate - }, - // useful for debugging template context, if needed - transformItems: items => { - // console.log(`transformItems`, items) - return items - } - }), - configure({ - analyticsTags: [ - 'site:docs.github.com', - `env:${process.env.NODE_ENV}` - ] - }), - searchBox({ - container: '#search-input-container', - placeholder, - // only autofocus on the homepage, and only if no #hash is present in the URL - autofocus: (hasStandaloneSearch()) && !window.location.hash.length, - showReset: false, - showSubmit: false - }), - analytics({ - pushFunction (params, state, results) { - sendEvent({ - type: 'search', - search_query: results.query - // search_context - }) - } - }) - ] - ) - - // enable for debugging - search.on('render', (...args) => { - // console.log(`algolia render`, args) - }) + const $placeholderMeta = document.querySelector('meta[name="site.data.ui.search.placeholder"]') + if ($placeholderMeta) { + placeholder = $placeholderMeta.content + } - search.on('error', (...args) => { - console.error('algolia error', args) - }) + $searchInputContainer.append(tmplSearchInput()) + $searchInput = $searchInputContainer.querySelector('input') - search.start() searchWithYourKeyboard('#search-input-container input', '.ais-Hits-item') toggleSearchDisplay() - // delay removal of the query param so analytics client code has a chance to track it - setTimeout(() => { removeAlgoliaQueryTrackingParam() }, 500) + $searchInput.addEventListener('keyup', debounce(onSearch)) } -// When a user performs an in-site search an `algolia-query` param is -// added to the URL so analytics can track the queries and the pages -// they lead to. This function strips the query from the URL after page load, -// so the bare article URL can be copied/bookmarked/shared, sans tracking param -function removeAlgoliaQueryTrackingParam () { - if ( - history && - history.replaceState && - location && - location.search && - location.search.includes('algolia-query=') - ) { - // parse the query string, remove the `algolia-query`, and put it all back together - let q = querystring.parse(location.search.replace(/^\?/, '')) - delete q['algolia-query'] - q = Object.keys(q).length ? '?' + querystring.stringify(q) : '' - - // update the URL in the address bar without modifying the history - history.replaceState(null, '', `${location.pathname}${q}${location.hash}`) - } +// The home page and 404 pages have a standalone search +function hasStandaloneSearch () { + return document.getElementById('landing') || + document.querySelector('body.error-404') !== null } -function toggleSearchDisplay (isReset) { - const input = document.querySelector('#search-input-container input') - const overlay = document.querySelector('.search-overlay-desktop') - - // If not on homepage... - if (!hasStandaloneSearch()) { - // Open modal if input is clicked - input.addEventListener('focus', () => { - openSearch() - }) - - // Close modal if overlay is clicked - if (overlay) { - overlay.addEventListener('click', () => { - closeSearch() - }) - } - - // Open modal if page loads with query in the params/input - if (input.value) { - openSearch() - } - } - +function toggleSearchDisplay () { // Clear/close search, if ESC is clicked document.addEventListener('keyup', (e) => { if (e.key === 'Escape') { closeSearch() } }) + + // If not on homepage... + if (hasStandaloneSearch()) return + + const $input = $searchInput + + // Open modal if input is clicked + $input.addEventListener('focus', () => { + openSearch() + }) + + // Close modal if overlay is clicked + if ($searchOverlay) { + $searchOverlay.addEventListener('click', () => { + closeSearch() + }) + } + + // Open modal if page loads with query in the params/input + if ($input.value) { + openSearch() + } } function openSearch () { - document.querySelector('#search-input-container input').classList.add('js-open') - document.querySelector('#search-results-container').classList.add('js-open') - document.querySelector('.search-overlay-desktop').classList.add('js-open') + $searchInput.classList.add('js-open') + $searchResultsContainer.classList.add('js-open') + $searchOverlay.classList.add('js-open') } function closeSearch () { // Close modal if not on homepage if (!hasStandaloneSearch()) { - document.querySelector('#search-input-container input').classList.remove('js-open') - document.querySelector('#search-results-container').classList.remove('js-open') - document.querySelector('.search-overlay-desktop').classList.remove('js-open') + $searchInput.classList.remove('js-open') + $searchResultsContainer.classList.remove('js-open') + $searchOverlay.classList.remove('js-open') } - document.querySelector('.ais-Hits').style.display = 'none' - document.querySelector('#search-input-container input').value = '' - window.history.replaceState({}, 'clear search query', window.location.pathname) + const $hits = $searchResultsContainer.querySelector('.ais-Hits') + if ($hits) $hits.style.display = 'none' + $searchInput.value = '' } function deriveLanguageCodeFromPath () { @@ -277,8 +109,8 @@ function deriveLanguageCodeFromPath () { function deriveVersionFromPath () { // fall back to the non-enterprise default version (FPT currently) on the homepage, 404 page, etc. - const version = location.pathname.split('/')[2] || nonEnterpriseDefaultVersion - const versionObject = allVersions[version] || allVersions[nonEnterpriseDefaultVersion] + const versionStr = location.pathname.split('/')[2] || nonEnterpriseDefaultVersion + const versionObject = allVersions[versionStr] || allVersions[nonEnterpriseDefaultVersion] // if GHES, returns the release number like 2.21, 2.22, etc. // if FPT, returns 'dotcom' @@ -287,3 +119,148 @@ function deriveVersionFromPath () { ? versionObject.currentRelease : versionObject.miscBaseName } + +function debounce (fn, delay = 300) { + let timer + return (...args) => { + clearTimeout(timer) + timer = setTimeout(() => fn.apply(null, args), delay) + } +} + +async function onSearch (evt) { + const query = evt.target.value + + const url = new URL(location.origin) + url.pathname = '/search' + url.search = new URLSearchParams({ query, version, language }).toString() + + const response = await fetch(url, { + method: 'GET', + headers: { + 'Content-Type': 'application/json' + } + }) + const results = response.ok ? await response.json() : [] + + $searchResultsContainer.querySelectorAll('*').forEach(el => el.remove()) + $searchResultsContainer.append( + tmplSearchResults(results) + ) + + toggleStandaloneSearch() + + // Analytics tracking + sendEvent({ + type: 'search', + search_query: query + // search_context + }) +} + +// If on homepage, toggle results container if query is present +function toggleStandaloneSearch () { + if (!hasStandaloneSearch()) return + + const query = $searchInput.value + const queryPresent = query && query.length > 0 + const $results = document.querySelector('.ais-Hits') + + // Primer classNames for showing and hiding the results container + const activeClass = $searchResultsContainer.getAttribute('data-active-class') + const inactiveClass = $searchResultsContainer.getAttribute('data-inactive-class') + + if (!activeClass) { + console.error('container is missing required `data-active-class` attribute', $searchResultsContainer) + return + } + + if (!inactiveClass) { + console.error('container is missing required `data-inactive-class` attribute', $searchResultsContainer) + return + } + + // hide the container when no query is present + $searchResultsContainer.classList.toggle(activeClass, queryPresent) + $searchResultsContainer.classList.toggle(inactiveClass, !queryPresent) + + if (queryPresent && $results) $results.style.display = 'block' +} + +/** * Template functions ***/ + +function tmplSearchInput () { + // only autofocus on the homepage, and only if no #hash is present in the URL + const autofocus = (hasStandaloneSearch() && !location.hash.length) || null + const { div, form, input, button } = tags + return div( + { class: 'ais-SearchBox' }, + form( + { role: 'search', class: 'ais-SearchBox-form', novalidate: true }, + input({ + class: 'ais-SearchBox-input', + type: 'search', + placeholder, + autofocus, + autocomplete: 'off', + autocorrect: 'off', + autocapitalize: 'off', + spellcheck: 'false', + maxlength: '512' + }), + button({ + class: 'ais-SearchBox-submit', + type: 'submit', + title: 'Submit the search query.', + hidden: true + }) + ) + ) +} + +function tmplSearchResults (items) { + const { div, ol, li } = tags + return div( + { class: 'ais-Hits', style: 'display:block' }, + ol( + { class: 'ais-Hits-list' }, + items.map(item => li( + { class: 'ais-Hits-item' }, + tmplSearchResult(item) + )) + ) + ) +} + +function tmplSearchResult ({ url, breadcrumbs, heading, title, content }) { + const { div, a } = tags + return div( + { class: 'search-result border-top border-gray-light py-3 px-2' }, + a( + { href: url, class: 'no-underline' }, + div( + { class: 'search-result-breadcrumbs d-block text-gray-dark opacity-60 text-small pb-1' }, + // Remove redundant title from the end of breadcrumbs + emify((breadcrumbs || '').replace(` / ${title}`, '')) + ), + div( + { class: 'search-result-title d-block h4-mktg text-gray-dark' }, + // Display page title and heading (if present exists) + emify(heading ? `${title}: ${heading}` : title) + ), + div( + { class: 'search-result-content d-block text-gray' }, + // Truncate without breaking inner HTML tags + emify(truncate(content, maxContentLength)) + ) + ) + ) +} + +// Allow em tags in search responses +function emify (text) { + const { em } = tags + return text + .split(/<\/?em>/g) + .map((el, i) => i % 2 ? em(el) : el) +} diff --git a/lib/algolia/client.js b/lib/search/algolia-client.js similarity index 59% rename from lib/algolia/client.js rename to lib/search/algolia-client.js index 16fdb7daea01..dc1f54636bd6 100644 --- a/lib/algolia/client.js +++ b/lib/search/algolia-client.js @@ -3,4 +3,6 @@ require('dotenv').config() const algoliasearch = require('algoliasearch') const { ALGOLIA_APPLICATION_ID, ALGOLIA_API_KEY } = process.env -module.exports = algoliasearch(ALGOLIA_APPLICATION_ID, ALGOLIA_API_KEY) +module.exports = function () { + return algoliasearch(ALGOLIA_APPLICATION_ID, ALGOLIA_API_KEY) +} diff --git a/lib/algolia/get-remote-index-names.js b/lib/search/algolia-get-remote-index-names.js similarity index 60% rename from lib/algolia/get-remote-index-names.js rename to lib/search/algolia-get-remote-index-names.js index d7cad539d1af..fb8334ce140e 100644 --- a/lib/algolia/get-remote-index-names.js +++ b/lib/search/algolia-get-remote-index-names.js @@ -1,13 +1,14 @@ -const algoliaClient = require('./client') -const AlgoliaIndex = require('./search-index') +const { namePrefix } = require('./config') +const getAlgoliaClient = require('./algolia-client') module.exports = async function getRemoteIndexNames () { + const algoliaClient = getAlgoliaClient() const indices = await algoliaClient.listIndexes() // ignore other indices that may be present in the Algolia account like `helphub-`, etc const indexNames = indices.items .map(field => field.name) - .filter(name => name.startsWith(AlgoliaIndex.namePrefix)) + .filter(name => name.startsWith(namePrefix)) return indexNames } diff --git a/lib/algolia/search-index.js b/lib/search/algolia-search-index.js similarity index 59% rename from lib/algolia/search-index.js rename to lib/search/algolia-search-index.js index dcf119764c98..1e43e71ea3f5 100644 --- a/lib/algolia/search-index.js +++ b/lib/search/algolia-search-index.js @@ -1,17 +1,11 @@ -const assert = require('assert') -const { chain, chunk, difference, isArray, isString, inRange } = require('lodash') +const { chain, chunk, difference } = require('lodash') const eventToPromise = require('event-to-promise') const objectHash = require('object-hash') -const countArrayValues = require('count-array-values') -const isURL = require('is-url') const rank = require('./rank') +const validateRecords = require('./validate-records') +const getAlgoliaClient = require('./algolia-client') class AlgoliaIndex { - // records must be truncated to avoid going over Algolia's 10K limit - static get maxRecordLength () { return 8000 } - static get maxContentLength () { return 5000 } - static get namePrefix () { return 'github-docs' } - constructor (name, records) { this.name = name this.records = records @@ -24,52 +18,14 @@ class AlgoliaIndex { } validate () { - assert(isString(this.name) && this.name.length, '`name` is required') - assert(isArray(this.records) && this.records.length, '`records` must be a non-empty array') - - // each ID is unique - const objectIDs = this.records.map(record => record.objectID) - const dupes = countArrayValues(objectIDs) - .filter(({ value, count }) => count > 1) - .map(({ value }) => value) - assert(!dupes.length, `every objectID must be unique. dupes: ${dupes.join('; ')}`) - - this.records.forEach(record => { - assert( - isString(record.objectID) && record.objectID.length, - `objectID must be a string. received: ${record.objectID}, ${JSON.stringify(record)}` - ) - - assert( - isString(record.title) && record.title.length, - `title must be a string. received: ${record.title}, ${JSON.stringify(record)}` - ) - - assert( - isURL(record.url), - `url must be a fully qualified URL. received: ${record.url}, ${JSON.stringify(record)}` - ) - - assert( - inRange(record.customRanking, 0, 4), - `customRanking must be an in-range number. received: ${record.customRanking}, (record: ${record.url})` - ) - - const recordLength = JSON.stringify(record).length - assert( - recordLength <= AlgoliaIndex.maxRecordLength, - `record ${record.url} is too long! ${recordLength} (max: ${AlgoliaIndex.maxRecordLength})` - ) - }) - - return true + return validateRecords(this.name, this.records) } // This method consumes Algolia's `browseAll` event emitter, // aggregating results into an array of all the records // https://www.algolia.com/doc/api-reference/api-methods/browse/ async fetchExistingRecords () { - const client = require('./client') + const client = getAlgoliaClient() // return an empty array if the index does not exist yet const { items: indices } = await client.listIndexes() @@ -97,7 +53,7 @@ class AlgoliaIndex { } async syncWithRemote () { - const client = require('./client') + const client = getAlgoliaClient() console.log('\n\nsyncing %s with remote', this.name) this.validate() diff --git a/lib/algolia/build-records.js b/lib/search/build-records.js similarity index 100% rename from lib/algolia/build-records.js rename to lib/search/build-records.js diff --git a/lib/algolia/cached-index-names.json b/lib/search/cached-index-names.json similarity index 100% rename from lib/algolia/cached-index-names.json rename to lib/search/cached-index-names.json diff --git a/lib/search/config.js b/lib/search/config.js new file mode 100644 index 000000000000..5c54d0e3c00d --- /dev/null +++ b/lib/search/config.js @@ -0,0 +1,6 @@ +module.exports = { + // records must be truncated to avoid going over Algolia's 10K limit + maxRecordLength: 8000, + maxContentLength: 5000, + namePrefix: 'github-docs' +} diff --git a/lib/algolia/find-indexable-pages.js b/lib/search/find-indexable-pages.js similarity index 100% rename from lib/algolia/find-indexable-pages.js rename to lib/search/find-indexable-pages.js diff --git a/lib/algolia/parse-page-sections-into-records.js b/lib/search/parse-page-sections-into-records.js similarity index 93% rename from lib/algolia/parse-page-sections-into-records.js rename to lib/search/parse-page-sections-into-records.js index dbd15ad2ba25..79ac651f83b9 100644 --- a/lib/algolia/parse-page-sections-into-records.js +++ b/lib/search/parse-page-sections-into-records.js @@ -4,11 +4,11 @@ const { chain } = require('lodash') const urlPrefix = 'https://docs.github.com' -const AlgoliaIndex = require('./search-index') const ignoredHeadingSlugs = [ 'in-this-article', 'further-reading' ] +const { maxContentLength } = require('./config') module.exports = function parsePageSectionsIntoRecords (href, $) { const title = $('h1').text().trim() @@ -46,7 +46,7 @@ module.exports = function parsePageSectionsIntoRecords (href, $) { .get() .join(' ') .trim() - .slice(0, AlgoliaIndex.maxContentLength) + .slice(0, maxContentLength) return { objectID, url, @@ -67,7 +67,7 @@ module.exports = function parsePageSectionsIntoRecords (href, $) { .get() .join(' ') .trim() - .slice(0, AlgoliaIndex.maxContentLength) + .slice(0, maxContentLength) records = [{ objectID, diff --git a/lib/algolia/rank.js b/lib/search/rank.js similarity index 100% rename from lib/algolia/rank.js rename to lib/search/rank.js diff --git a/lib/algolia/sync.js b/lib/search/sync.js similarity index 86% rename from lib/algolia/sync.js rename to lib/search/sync.js index c7d6bf43cc69..fa6e03428f91 100644 --- a/lib/algolia/sync.js +++ b/lib/search/sync.js @@ -6,14 +6,17 @@ const chalk = require('chalk') const languages = require('../languages') const buildRecords = require('./build-records') const findIndexablePages = require('./find-indexable-pages') -const getRemoteIndexNames = require('./get-remote-index-names') -const Index = require('./search-index') -const cacheDir = path.join(process.cwd(), './.algolia-cache') +const cacheDir = path.join(process.cwd(), './.search-cache') const allVersions = require('../all-versions') +const { namePrefix } = require('./config') + +// Algolia +const getRemoteIndexNames = require('./algolia-get-remote-index-names') +const AlgoliaIndex = require('./algolia-search-index') // Build a search data file for every combination of product version and language // e.g. `github-docs-dotcom-en.json` and `github-docs-2.14-ja.json` -module.exports = async function syncAlgoliaIndices (opts = {}) { +module.exports = async function syncSearchIndexes (opts = {}) { if (opts.dryRun) { console.log('This is a dry run! The script will build the indices locally but not upload anything.\n') rimraf(cacheDir) @@ -60,11 +63,11 @@ module.exports = async function syncAlgoliaIndices (opts = {}) { : allVersions[pageVersion].miscBaseName // github-docs-dotcom-en, github-docs-2.22-en - const indexName = `${Index.namePrefix}-${indexVersion}-${languageCode}` + const indexName = `${namePrefix}-${indexVersion}-${languageCode}` // The page version will be the new version, e.g., free-pro-team@latest, enterprise-server@2.22 const records = await buildRecords(indexName, indexablePages, pageVersion, languageCode) - const index = new Index(indexName, records) + const index = new AlgoliaIndex(indexName, records) if (opts.dryRun) { const cacheFile = path.join(cacheDir, `${indexName}.json`) @@ -87,7 +90,7 @@ module.exports = async function syncAlgoliaIndices (opts = {}) { ) if (!process.env.CI) { - console.log(chalk.green(`\nCached remote index names in ${path.relative(process.cwd(), cachedIndexNamesFile)}`)) + console.log(chalk.green(`\nCached index names in ${path.relative(process.cwd(), cachedIndexNamesFile)}`)) console.log(chalk.green('(If this file has any changes, please commit them)')) } diff --git a/lib/search/validate-records.js b/lib/search/validate-records.js new file mode 100644 index 000000000000..fbbd4b09cd4d --- /dev/null +++ b/lib/search/validate-records.js @@ -0,0 +1,47 @@ +const assert = require('assert') +const { isArray, isString, inRange } = require('lodash') +const isURL = require('is-url') +const countArrayValues = require('count-array-values') +const { maxRecordLength } = require('./config') + +module.exports = function validateRecords (name, records) { + assert(isString(name) && name.length, '`name` is required') + assert(isArray(records) && records.length, '`records` must be a non-empty array') + + // each ID is unique + const objectIDs = records.map(record => record.objectID) + const dupes = countArrayValues(objectIDs) + .filter(({ value, count }) => count > 1) + .map(({ value }) => value) + assert(!dupes.length, `every objectID must be unique. dupes: ${dupes.join('; ')}`) + + records.forEach(record => { + assert( + isString(record.objectID) && record.objectID.length, + `objectID must be a string. received: ${record.objectID}, ${JSON.stringify(record)}` + ) + + assert( + isString(record.title) && record.title.length, + `title must be a string. received: ${record.title}, ${JSON.stringify(record)}` + ) + + assert( + isURL(record.url), + `url must be a fully qualified URL. received: ${record.url}, ${JSON.stringify(record)}` + ) + + assert( + inRange(record.customRanking, 0, 4), + `customRanking must be an in-range number. received: ${record.customRanking}, (record: ${record.url})` + ) + + const recordLength = JSON.stringify(record).length + assert( + recordLength <= maxRecordLength, + `record ${record.url} is too long! ${recordLength} (max: ${maxRecordLength})` + ) + }) + + return true +} diff --git a/lib/search/versions.js b/lib/search/versions.js new file mode 100644 index 000000000000..479e2b7d18cb --- /dev/null +++ b/lib/search/versions.js @@ -0,0 +1,13 @@ +const allVersions = require('../all-versions') + +module.exports = new Set( + Object.values(allVersions) + .map(version => + // if GHES, resolves to the release number like 2.21, 2.22, etc. + // if FPT, resolves to 'dotcom' + // if GHAE, resolves to 'ghae' + version.plan === 'enterprise-server' + ? version.currentRelease + : version.miscBaseName + ) +) diff --git a/middleware/index.js b/middleware/index.js index ce02a3d347da..4eead804ec36 100644 --- a/middleware/index.js +++ b/middleware/index.js @@ -66,6 +66,7 @@ module.exports = function (app) { app.use('/public', express.static('data/graphql')) app.use('/events', require('./events')) app.use('/csrf', require('./csrf-route')) + app.use('/search', require('./search')) app.use(require('./archived-enterprise-versions')) app.use(require('./robots')) app.use(/(\/.*)?\/early-access$/, require('./contextualizers/early-access-links')) diff --git a/middleware/search.js b/middleware/search.js new file mode 100644 index 000000000000..262d4216df61 --- /dev/null +++ b/middleware/search.js @@ -0,0 +1,57 @@ +const express = require('express') +const algoliasearch = require('algoliasearch') +const { namePrefix } = require('../lib/search/config') +const languages = new Set(Object.keys(require('../lib/languages'))) +const versions = require('../lib/search/versions') + +const router = express.Router() + +// https://www.algolia.com/apps/ZI5KPY1HBE/dashboard +// This API key is public. There's also a private API key for writing to the Algolia API +const searchClient = algoliasearch('ZI5KPY1HBE', '685df617246c3a10abba589b4599288f') + +async function loadAlgoliaResults ({ version, language, query, limit }) { + const indexName = `${namePrefix}-${version}-${language}` + const index = searchClient.initIndex(indexName) + + // allows "phrase queries" and "prohibit operator" + // https://www.algolia.com/doc/api-reference/api-parameters/advancedSyntax/ + const { hits } = await index.search(query, { + hitsPerPage: limit, + advancedSyntax: true + }) + + return hits.map(hit => ({ + url: hit.url, + breadcrumbs: hit._highlightResult.breadcrumbs.value, + heading: hit._highlightResult.heading.value, + title: hit._highlightResult.title.value, + content: hit._highlightResult.content.value + })) +} + +router.get('/', async (req, res) => { + res.set({ + 'surrogate-control': 'private, no-store', + 'cache-control': 'private, no-store' + }) + + const { query, version, language } = req.query + const limit = Math.min(parseInt(req.query.limit, 10) || 10, 100) + if (!versions.has(version) || !languages.has(language)) { + return res.status(400).json([]) + } + if (!query || !limit) { + return res.status(200).json([]) + } + + try { + const results = await loadAlgoliaResults({ version, language, query, limit }) + return res.status(200).json(results) + } catch (err) { + console.error(err) + return res.status(400).json([]) + } +}) + +module.exports = router diff --git a/package.json b/package.json index e61d744f1594..9634afcf35d7 100644 --- a/package.json +++ b/package.json @@ -57,7 +57,6 @@ "html-truncate": "^1.2.2", "hubdown": "^2.6.0", "imurmurhash": "^0.1.4", - "instantsearch.js": "^4.8.2", "ioredis": "^4.19.4", "ioredis-mock": "^5.2.0", "is-url": "^1.2.4", @@ -166,7 +165,7 @@ "sync-search": "start-server-and-test sync-search-server 4002 sync-search-indices", "sync-search-dry-run": "DRY_RUN=1 npm run sync-search", "sync-search-server": "cross-env NODE_ENV=production PORT=4002 node server.js", - "sync-search-indices": "script/sync-algolia-search-indices.js", + "sync-search-indices": "script/sync-search-indices.js", "test-watch": "jest --watch --notify --notifyMode=change --coverage", "check-deps": "node script/check-deps.js", "prevent-pushes-to-main": "node script/prevent-pushes-to-main.js", diff --git a/script/check-deps.js b/script/check-deps.js index 946bc82a3dc7..c5da72b5dc4a 100644 --- a/script/check-deps.js +++ b/script/check-deps.js @@ -32,7 +32,6 @@ const main = async () => { '@babel/*', 'babel-preset-env', '@primer/*', - 'instantsearch.js', 'querystring', 'pa11y-ci', 'sass', diff --git a/script/sync-algolia-search-indices.js b/script/sync-search-indices.js similarity index 59% rename from script/sync-algolia-search-indices.js rename to script/sync-search-indices.js index 105e0defa3fe..047c2a25fd50 100755 --- a/script/sync-algolia-search-indices.js +++ b/script/sync-search-indices.js @@ -2,8 +2,8 @@ // [start-readme] // -// This script is run automatically via GitHub Actions on every push to `master` to generate searchable data -// and upload it to our Algolia account. It can also be run manually. For more info see [contributing/search.md](contributing/search.md) +// This script is run automatically via GitHub Actions on every push to `main` to generate searchable data. +// It can also be run manually. For more info see [contributing/search.md](contributing/search.md) // // [end-readme] @@ -12,7 +12,7 @@ require('make-promises-safe') main() async function main () { - const sync = require('../lib/algolia/sync') + const sync = require('../lib/search/sync') const opts = { dryRun: 'DRY_RUN' in process.env, language: process.env.LANGUAGE, diff --git a/stylesheets/search.scss b/stylesheets/search.scss index 7bbfcb9f357f..2364bb139cd5 100644 --- a/stylesheets/search.scss +++ b/stylesheets/search.scss @@ -3,7 +3,7 @@ /* Global styles Gets applied to both the search input on homepage and in the header nav -Form and inputs using .ais- prefix gets added by Algolia InstantSearch.js */ +Form and inputs using .ais- prefix gets added by search.js */ .ais-SearchBox { position: relative; } diff --git a/tests/browser/browser.js b/tests/browser/browser.js index 956b52f216fe..9f98e1f10aa9 100644 --- a/tests/browser/browser.js +++ b/tests/browser/browser.js @@ -42,84 +42,45 @@ describe('algolia browser search', () => { }) it('sends the correct data to algolia for Enterprise Server', async () => { - expect.assertions(12) // 3 assertions x 4 letters ('test') + expect.assertions(2) const newPage = await browser.newPage() await newPage.goto('http://localhost:4001/ja/enterprise/2.22/admin/installation') await newPage.setRequestInterception(true) newPage.on('request', interceptedRequest => { - if (interceptedRequest.method() === 'POST' && /algolia/i.test(interceptedRequest.url())) { - const data = JSON.parse(interceptedRequest.postData()) - const { indexName, params } = data.requests[0] - const parsedParams = querystring.parse(params) - const analyticsTags = JSON.parse(parsedParams.analyticsTags) - expect(indexName).toBe('github-docs-2.22-ja') - expect(analyticsTags).toHaveLength(2) - // browser tests are run against production build, so we are expecting env:production - expect(analyticsTags).toEqual(expect.arrayContaining(['site:docs.github.com', 'env:production'])) + if (interceptedRequest.method() === 'GET' && /search/i.test(interceptedRequest.url())) { + const { version, language } = querystring.parse(interceptedRequest.url()) + expect(version).toBe('2.22') + expect(language).toBe('ja') } interceptedRequest.continue() }) await newPage.click('#search-input-container input[type="search"]') await newPage.type('#search-input-container input[type="search"]', 'test') + await newPage.waitForSelector('.search-result') }) it('sends the correct data to algolia for GHAE', async () => { - expect.assertions(12) // 3 assertions x 4 letters ('test') + expect.assertions(2) const newPage = await browser.newPage() await newPage.goto('http://localhost:4001/en/github-ae@latest/admin/overview') await newPage.setRequestInterception(true) newPage.on('request', interceptedRequest => { - if (interceptedRequest.method() === 'POST' && /algolia/i.test(interceptedRequest.url())) { - const data = JSON.parse(interceptedRequest.postData()) - const { indexName, params } = data.requests[0] - const parsedParams = querystring.parse(params) - const analyticsTags = JSON.parse(parsedParams.analyticsTags) - expect(indexName).toBe('github-docs-ghae-en') - expect(analyticsTags).toHaveLength(2) - // browser tests are run against production build, so we are expecting env:production - expect(analyticsTags).toEqual(expect.arrayContaining(['site:docs.github.com', 'env:production'])) + if (interceptedRequest.method() === 'GET' && /search/i.test(interceptedRequest.url())) { + const { version, language } = querystring.parse(interceptedRequest.url()) + expect(version).toBe('ghae') + expect(language).toBe('en') } interceptedRequest.continue() }) await newPage.click('#search-input-container input[type="search"]') await newPage.type('#search-input-container input[type="search"]', 'test') - }) - - it('removes `algolia-query` query param after page load', async () => { - await page.goto('http://localhost:4001/en?algolia-query=helpme') - - // check that the query is still present at page load - let location = await getLocationObject(page) - expect(location.search).toBe('?algolia-query=helpme') - - // query removal is in a setInterval, so wait a bit - await sleep(1000) - - // check that the query has been removed after a bit - location = await getLocationObject(page) - expect(location.search).toBe('') - }) - - it('does not remove hash when removing `algolia-query` query', async () => { - await page.goto('http://localhost:4001/en?algolia-query=helpme#some-header') - - // check that the query is still present at page load - let location = await getLocationObject(page) - expect(location.search).toBe('?algolia-query=helpme') - - // query removal is in a setInterval, so wait a bit - await sleep(1000) - - // check that the query has been removed after a bit - location = await getLocationObject(page) - expect(location.search).toBe('') - expect(location.hash).toBe('#some-header') + await newPage.waitForSelector('.search-result') }) }) @@ -166,13 +127,6 @@ describe('csrf meta', () => { }) }) -async function getLocationObject (page) { - const location = await page.evaluate(() => { - return Promise.resolve(JSON.stringify(window.location, null, 2)) - }) - return JSON.parse(location) -} - describe('platform specific content', () => { // from tests/javascripts/user-agent.js const userAgents = [ diff --git a/tests/content/algolia-search.js b/tests/content/algolia-search.js index 75e3d5c8a85d..32989f503535 100644 --- a/tests/content/algolia-search.js +++ b/tests/content/algolia-search.js @@ -1,14 +1,14 @@ const { dates, supported } = require('../../lib/enterprise-server-releases') const languageCodes = Object.keys(require('../../lib/languages')) -const AlgoliaIndex = require('../../lib/algolia/search-index') -const remoteIndexNames = require('../../lib/algolia/cached-index-names.json') +const { namePrefix } = require('../../lib/search/config') +const remoteIndexNames = require('../../lib/search/cached-index-names.json') describe('algolia', () => { test('has remote indexNames in every language for every supported GHE version', () => { expect(supported.length).toBeGreaterThan(1) supported.forEach(version => { languageCodes.forEach(languageCode => { - const indexName = `${AlgoliaIndex.namePrefix}-${version}-${languageCode}` + const indexName = `${namePrefix}-${version}-${languageCode}` // workaround for GHES release branches not in production yet if (!remoteIndexNames.includes(indexName)) { @@ -28,7 +28,7 @@ describe('algolia', () => { test('has remote indexNames in every language for dotcom', async () => { expect(languageCodes.length).toBeGreaterThan(0) languageCodes.forEach(languageCode => { - const indexName = `${AlgoliaIndex.namePrefix}-dotcom-${languageCode}` + const indexName = `${namePrefix}-dotcom-${languageCode}` expect(remoteIndexNames.includes(indexName)).toBe(true) }) }) diff --git a/tests/unit/algolia/parse-page-sections-into-records.js b/tests/unit/algolia/parse-page-sections-into-records.js index 0137185b2c90..e328f846f0b2 100644 --- a/tests/unit/algolia/parse-page-sections-into-records.js +++ b/tests/unit/algolia/parse-page-sections-into-records.js @@ -1,13 +1,13 @@ const fs = require('fs') const path = require('path') const cheerio = require('cheerio') -const parsePageSectionsIntoRecords = require('../../../lib/algolia/parse-page-sections-into-records') +const parsePageSectionsIntoRecords = require('../../../lib/search/parse-page-sections-into-records') const fixtures = { pageWithSections: fs.readFileSync(path.join(__dirname, 'fixtures/page-with-sections.html'), 'utf8'), pageWithoutSections: fs.readFileSync(path.join(__dirname, 'fixtures/page-without-sections.html'), 'utf8') } -describe('algolia parsePageSectionsIntoRecords module', () => { +describe('search parsePageSectionsIntoRecords module', () => { test('works for pages with sections', () => { const html = fixtures.pageWithSections const $ = cheerio.load(html) diff --git a/tests/unit/algolia/rank.js b/tests/unit/algolia/rank.js index e9557ce589ae..c1ba1b80a3a8 100644 --- a/tests/unit/algolia/rank.js +++ b/tests/unit/algolia/rank.js @@ -1,6 +1,6 @@ -const rank = require('../../../lib/algolia/rank') +const rank = require('../../../lib/search/rank') -test('algolia custom rankings', () => { +test('search custom rankings', () => { const expectedRankings = [ ['https://docs.github.com/en/github/actions', 3], ['https://docs.github.com/en/rest/reference', 2], diff --git a/webpack.config.js b/webpack.config.js index 91ca6d7ab6af..74c804125d67 100644 --- a/webpack.config.js +++ b/webpack.config.js @@ -86,12 +86,5 @@ module.exports = { ] }), new EnvironmentPlugin(['NODE_ENV']) - ], - resolve: { - alias: { - // Hogan uses `new Function` which breaks content security policy - // Turns out, we aren't even using it anyways! - 'hogan.js': path.resolve(__dirname, 'javascripts/fake-hogan.js') - } - } + ] }