From 02ee72fa25b01b75d176c4f2eeabf05d4feaa4ba Mon Sep 17 00:00:00 2001 From: Rachael Sewell Date: Wed, 17 Mar 2021 13:11:29 -0700 Subject: [PATCH] add topics attribute to search (#18212) --- content/README.md | 6 +- contributing/search.md | 5 +- data/allowed-topics.js | 84 +++++++++++++++++++ includes/head.html | 3 + javascripts/search.js | 4 +- lib/search/algolia-search.js | 3 +- lib/search/lunr-search-index.js | 1 + lib/search/lunr-search.js | 5 +- .../parse-page-sections-into-records.js | 21 ++++- .../add-tags-to-articles.js | 1 + .../fixtures/page-with-sections.html | 9 +- .../fixtures/page-without-sections.html | 9 +- .../parse-page-sections-into-records.js | 15 ++-- tests/unit/{algolia => search}/rank.js | 0 tests/unit/search/topics.js | 32 +++++++ 15 files changed, 174 insertions(+), 24 deletions(-) create mode 100644 data/allowed-topics.js rename tests/unit/{algolia => search}/fixtures/page-with-sections.html (75%) rename tests/unit/{algolia => search}/fixtures/page-without-sections.html (56%) rename tests/unit/{algolia => search}/parse-page-sections-into-records.js (79%) rename tests/unit/{algolia => search}/rank.js (100%) create mode 100644 tests/unit/search/topics.js diff --git a/content/README.md b/content/README.md index 79aa0fca8b3f..45b571c32317 100644 --- a/content/README.md +++ b/content/README.md @@ -228,9 +228,9 @@ includeGuides: - Optional. ### `topics` -- Purpose: Indicate the topics covered by the article. -- Type: `String` -- Optional. +- Purpose: Indicate the topics covered by the article. The topics are used to filter guides on some landing pages. For example, the guides at the bottom of [this page](https://docs.github.com/en/actions/guides) can be filtered by topics and the topics are listed under the guide intro. Topics are also added to all search records that get created for each page. The search records contain a `topics` property that is used to filter search results by topics. For more information, see the [Search](/contributing/search.md) contributing guide. Refer to the content models for more details around adding topics. A full list of existing topics is located in the [allowed topics file](/data/allowed-topics.js). If topics in article frontmatter and the allow-topics list become out of sync, the [topics CI test](/tests/unit/search/topics.js) will fail. +- Type: Array of `String`s +- Optional: Topics are preferred for each article, but, there may be cases where existing articles don't yet have topics or a adding a topic to a new article may not add value. ### `contributor` - Purpose: Indicate an article is contributed and maintained by a third-party organization, typically a GitHub Technology Partner. diff --git a/contributing/search.md b/contributing/search.md index 093a419341f3..2c2eabf09a9c 100644 --- a/contributing/search.md +++ b/contributing/search.md @@ -15,7 +15,10 @@ To see all existing search-related issues and pull requests, visit [github.com/g ## How to search The site search is part of every version of docs.github.com. On any page, you can use the search box to search the documents we've indexed. -You can also query our search endpoint directly at: https://docs.github.com/search?language=en&version=dotcom&query=jekyll +You can also query our search endpoint directly at: https://docs.github.com/search?language=en&version=dotcom&query=jekyll+topics:actions + +Using the attribute `topics` in your query will only return results that have the matching topic value. You can find a full list of topics in [the allowed topics file](/data/allowed-topics.js). The `topics` attribute is configured as a [`filter only` facet in Algolia](https://www.algolia.com/doc/guides/managing-results/refine-results/filtering/). + This endpoint responds in JSON format, and fronts Algolia and Lunr. We recommend using this endpoint over directly integrating with Algolia or Lunr, as the endpoint will be more stable. ## Production deploys diff --git a/data/allowed-topics.js b/data/allowed-topics.js new file mode 100644 index 000000000000..a309eccd037c --- /dev/null +++ b/data/allowed-topics.js @@ -0,0 +1,84 @@ +// This is an AllowList of topics that are approved for use in `topics` +// frontmatter property. If a new topic is added to a Markdown file it must +// also be added to this file. + +// The purpose of this list is to ensure we prevent typos and put a process in +// place to keep a curated list of topics. This list also serves as a list of +// available topics filters when using the search endpoint +// (see /contributing/search#how-to-search) +// If you'd like to add a new topic, consult the topic guidelines in the +// content model, add the entry to this list, and ensure you loop in the +// content and/or content strategy team for review. + +module.exports = [ + '2fa', + 'Action development', + 'Amazon ECS', + 'Ant', + 'Azure App Service', + 'Azure Pipelines', + 'CD', + 'CI', + 'CircleCI', + 'Containers', + 'Docker', + 'Fundamentals', + 'GitLab', + 'Google Kubernetes Engine', + 'Gradle', + 'Java', + 'JavaScript', + 'Jenkins', + 'Maven', + 'Migration', + 'Node', + 'Packaging', + 'Powershell', + 'Project management', + 'Publishing', + 'Python', + 'Ruby', + 'Security', + 'Travis CI', + 'Workflows', + 'access management', + 'accounts', + 'api', + 'billing', + 'cli', + 'codespaces', + 'community', + 'desktop', + 'device verification', + 'early access', + 'enterprise', + 'events', + 'github', + 'github apps', + 'github search', + 'identity', + 'issues', + 'jobs', + 'legal', + 'marketplace', + 'mobile', + 'notifications', + 'oauth apps', + 'open source', + 'organizations', + 'pages', + 'permissions', + 'policy', + 'profile', + 'profiles', + 'projects', + 'pull requests', + 'repositories', + 'security', + 'sponsors', + 'ssh', + 'sso', + 'teams', + 'usernames', + 'webhooks' +] diff --git a/includes/head.html b/includes/head.html index 5d931a9048b4..0cd6b63fad57 100644 --- a/includes/head.html +++ b/includes/head.html @@ -11,6 +11,9 @@ {% if page.intro %} {% endif %} + {% if page.topics %} + + {% endif %} {% if page.hidden %} {% endif %} diff --git a/javascripts/search.js b/javascripts/search.js index 760bce874bbb..edc7def535f2 100644 --- a/javascripts/search.js +++ b/javascripts/search.js @@ -261,8 +261,8 @@ function tmplSearchResult ({ url, breadcrumbs, heading, title, content }) { { href: url, class: 'no-underline' }, div( { class: 'search-result-breadcrumbs d-block text-gray-dark opacity-60 text-small pb-1' }, - // Remove redundant title from the end of breadcrumbs - markify((breadcrumbs || '').replace(` / ${title}`, '')) + // Breadcrumbs in search records don't include the page title + markify(breadcrumbs || '') ), div( { class: 'search-result-title d-block h4-mktg text-gray-dark' }, diff --git a/lib/search/algolia-search.js b/lib/search/algolia-search.js index e14b4e452409..a5d0a598ca71 100644 --- a/lib/search/algolia-search.js +++ b/lib/search/algolia-search.js @@ -24,6 +24,7 @@ module.exports = async function loadAlgoliaResults ({ version, language, query, breadcrumbs: get(hit, '_highlightResult.breadcrumbs.value'), heading: get(hit, '_highlightResult.heading.value'), title: get(hit, '_highlightResult.title.value'), - content: get(hit, '_highlightResult.content.value') + content: get(hit, '_highlightResult.content.value'), + topics: hit.topics })) } diff --git a/lib/search/lunr-search-index.js b/lib/search/lunr-search-index.js index 05eb88ddc3b0..bde23b7f7e46 100644 --- a/lib/search/lunr-search-index.js +++ b/lib/search/lunr-search-index.js @@ -46,6 +46,7 @@ module.exports = class LunrIndex { this.field('heading') this.field('title') this.field('content') + this.field('topics') this.field('customRanking') this.metadataWhitelist = ['position'] diff --git a/lib/search/lunr-search.js b/lib/search/lunr-search.js index 1a3e65f3198c..1a0d5db2b535 100644 --- a/lib/search/lunr-search.js +++ b/lib/search/lunr-search.js @@ -25,7 +25,9 @@ module.exports = async function loadLunrResults ({ version, language, query, lim breadcrumbs: field(result, record, 'breadcrumbs'), heading: field(result, record, 'heading'), title: field(result, record, 'title'), - content: field(result, record, 'content') + content: field(result, record, 'content'), + // don't highlight the topics array + topics: record.topics } }) return results @@ -48,6 +50,7 @@ async function loadLunrRecords (indexName) { .then(JSON.parse) } +// Highlight a match within an attribute field function field (result, record, name) { const text = record[name] if (!text) return text diff --git a/lib/search/parse-page-sections-into-records.js b/lib/search/parse-page-sections-into-records.js index 79ac651f83b9..3c253ae617a8 100644 --- a/lib/search/parse-page-sections-into-records.js +++ b/lib/search/parse-page-sections-into-records.js @@ -12,7 +12,7 @@ const { maxContentLength } = require('./config') module.exports = function parsePageSectionsIntoRecords (href, $) { const title = $('h1').text().trim() - const breadcrumbs = $('nav.breadcrumbs a') + const breadcrumbsArray = $('nav.breadcrumbs a') .map((i, el) => { return $(el) .text() @@ -21,7 +21,18 @@ module.exports = function parsePageSectionsIntoRecords (href, $) { .replace(/\s+/g, ' ') }) .get() - .join(' / ') + .slice(0, -1) + + const breadcrumbs = breadcrumbsArray.join(' / ') || '' + const metaKeywords = $('meta[name="keywords"]').attr('content') + const topics = metaKeywords ? metaKeywords.split(',') : [] + + const productName = breadcrumbsArray[0] || '' + topics.push(productName) + // Remove "github" to make filter queries shorter + if (productName.includes('GitHub ')) { + topics.push(productName.replace('GitHub ', '')) + } let records @@ -54,7 +65,8 @@ module.exports = function parsePageSectionsIntoRecords (href, $) { breadcrumbs, heading, title, - content + content, + topics } }) .get() @@ -74,7 +86,8 @@ module.exports = function parsePageSectionsIntoRecords (href, $) { url, breadcrumbs, title, - content + content, + topics }] } diff --git a/script/content-migrations/add-tags-to-articles.js b/script/content-migrations/add-tags-to-articles.js index 9a6f43138c20..e98e7fe172ff 100755 --- a/script/content-migrations/add-tags-to-articles.js +++ b/script/content-migrations/add-tags-to-articles.js @@ -48,6 +48,7 @@ function updateFrontmatter (filePath, newTopics) { } else if (Array.isArray(data.topics)) { topics = topics.concat(data.topics) } + newTopics.forEach(topic => { topics.push(topic) }) diff --git a/tests/unit/algolia/fixtures/page-with-sections.html b/tests/unit/search/fixtures/page-with-sections.html similarity index 75% rename from tests/unit/algolia/fixtures/page-with-sections.html rename to tests/unit/search/fixtures/page-with-sections.html index f61318d4c4d5..f6da35c32892 100644 --- a/tests/unit/algolia/fixtures/page-with-sections.html +++ b/tests/unit/search/fixtures/page-with-sections.html @@ -1,8 +1,11 @@ + + +

I am the page title

diff --git a/tests/unit/algolia/fixtures/page-without-sections.html b/tests/unit/search/fixtures/page-without-sections.html similarity index 56% rename from tests/unit/algolia/fixtures/page-without-sections.html rename to tests/unit/search/fixtures/page-without-sections.html index 2c137527e62b..4046e0a66cb9 100644 --- a/tests/unit/algolia/fixtures/page-without-sections.html +++ b/tests/unit/search/fixtures/page-without-sections.html @@ -1,10 +1,13 @@ + + +

I am outside the article and should not be included

A page without sections

diff --git a/tests/unit/algolia/parse-page-sections-into-records.js b/tests/unit/search/parse-page-sections-into-records.js similarity index 79% rename from tests/unit/algolia/parse-page-sections-into-records.js rename to tests/unit/search/parse-page-sections-into-records.js index e328f846f0b2..982a72a086f7 100644 --- a/tests/unit/algolia/parse-page-sections-into-records.js +++ b/tests/unit/search/parse-page-sections-into-records.js @@ -20,19 +20,21 @@ describe('search parsePageSectionsIntoRecords module', () => { objectID: '/example/href#first', url: 'https://docs.github.com/example/href#first', slug: 'first', - breadcrumbs: 'a / b / c', + breadcrumbs: 'GitHub Actions / actions learning path', heading: 'First heading', title: 'I am the page title', - content: "Here's a paragraph. And another." + content: "Here's a paragraph. And another.", + topics: ['topic1', 'topic2', 'GitHub Actions', 'Actions'] }, { objectID: '/example/href#second', url: 'https://docs.github.com/example/href#second', slug: 'second', - breadcrumbs: 'a / b / c', + breadcrumbs: 'GitHub Actions / actions learning path', heading: 'Second heading', title: 'I am the page title', - content: "Here's a paragraph in the second section. And another." + content: "Here's a paragraph in the second section. And another.", + topics: ['topic1', 'topic2', 'GitHub Actions', 'Actions'] } ] @@ -50,9 +52,10 @@ describe('search parsePageSectionsIntoRecords module', () => { { objectID: '/example/href', url: 'https://docs.github.com/example/href', - breadcrumbs: 'x / y / z', + breadcrumbs: 'Education / map topic', title: 'A page without sections', - content: 'First paragraph. Second paragraph.' + content: 'First paragraph. Second paragraph.', + topics: ['key1', 'key2', 'key3', 'Education'] } ] expect(records).toEqual(expected) diff --git a/tests/unit/algolia/rank.js b/tests/unit/search/rank.js similarity index 100% rename from tests/unit/algolia/rank.js rename to tests/unit/search/rank.js diff --git a/tests/unit/search/topics.js b/tests/unit/search/topics.js new file mode 100644 index 000000000000..67e31554bba2 --- /dev/null +++ b/tests/unit/search/topics.js @@ -0,0 +1,32 @@ +const path = require('path') +const fs = require('fs') +const readFrontmatter = require('../../../lib/read-frontmatter') +const walk = require('walk-sync') +const { difference } = require('lodash') +const allowedTopics = require('../../../data/allowed-topics') + +const contentDir = path.join(process.cwd(), 'content') +const topics = walk(contentDir, { includeBasePath: true }) + .filter(filename => filename.endsWith('.md') && !filename.includes('README')) + .map(filename => { + const fileContent = fs.readFileSync(filename, 'utf8') + const { data } = readFrontmatter(fileContent) + return data.topics || [] + }) + .flat() + +const allUsedTopics = [...new Set(topics)].sort() + +describe('Check for allowed frontmatter topics', () => { + test('all used topics are allowed in /data/allowed-topics.js', () => { + expect(allUsedTopics.length).toBeGreaterThan(0) + const unusedTopics = difference(allUsedTopics, allowedTopics) + expect(unusedTopics).toEqual([]) + }) + + test('all allowed topics are used by at least one content file', () => { + expect(allowedTopics.length).toBeGreaterThan(0) + const disallowedTopics = difference(allowedTopics, allUsedTopics) + expect(disallowedTopics).toEqual([]) + }) +})