diff --git a/spec_insert/.gitignore b/spec_insert/.gitignore new file mode 100644 index 0000000000..c9958b86d2 --- /dev/null +++ b/spec_insert/.gitignore @@ -0,0 +1,2 @@ +opensearch-openapi.yaml +rspec_examples.txt diff --git a/spec_insert/.rspec b/spec_insert/.rspec new file mode 100644 index 0000000000..c99d2e7396 --- /dev/null +++ b/spec_insert/.rspec @@ -0,0 +1 @@ +--require spec_helper diff --git a/spec_insert/.rubocop.yml b/spec_insert/.rubocop.yml new file mode 100644 index 0000000000..52c22077f1 --- /dev/null +++ b/spec_insert/.rubocop.yml @@ -0,0 +1,25 @@ +require: rubocop-rake +AllCops: + Include: + - 'lib/**/*.rb' + - 'Rakefile' + NewCops: enable + +Metrics/CyclomaticComplexity: + Enabled: false +Metrics/MethodLength: + Enabled: false +Metrics/ParameterLists: + Enabled: false +Metrics/AbcSize: + Enabled: false +Metrics/PerceivedComplexity: + Enabled: false + +Layout/EmptyLineAfterGuardClause: + Enabled: false + +Style/MultilineBlockChain: + Enabled: false +Style/SingleLineMethods: + Enabled: false diff --git a/spec_insert/.ruby-version b/spec_insert/.ruby-version new file mode 100644 index 0000000000..fd2a01863f --- /dev/null +++ b/spec_insert/.ruby-version @@ -0,0 +1 @@ +3.1.0 diff --git a/spec_insert/README.md b/spec_insert/README.md new file mode 100644 index 0000000000..2c238d1abd --- /dev/null +++ b/spec_insert/README.md @@ -0,0 +1,91 @@ +# README: Spec Insert + - [What is this?](#what-is-this) + - [Installation](#Installation) + - [How to use](#how-to-use) + - [Insert Query Parameters](#insert-query-parameters) + - [Insert Path Parameters](#insert-path-parameters) + - [Insert Paths and HTTP Methods](#insert-paths-and-http-methods) + - [Ignored files and folders](#ignored-files-and-folders) + +## What is this? +This program allows you to insert API components generated from the OpenSearch Specification into this repository's markdown files. It's still underdevelopment, and many features are not yet implemented. This document will be updated as the program evolves. + +## Installation +1. Clone this repository. +2. Change to the `spec_insert` directory. +3. Install Ruby 3.1.0 or later. +4. Install the required gems by running `bundle install`. + +## How to use +Edit your markdown file and insert the following snippet where you want the API components to be inserted: +```markdown + + +This is where the API component will be inserted. +Everything between the `spec_insert_start` and `spec_insert_end` tags will be overwritten. + + +``` + +Then run the following Rake commands to download the latest OpenSearch Specification and insert the API components into the markdown files: +```shell +rake download_spec +rake insert_spec +``` + +### Insert Query Parameters +To insert query parameters table of the `cat.indices` API, use the following snippet: +```markdown + + +``` + +- This will insert the query parameters of the `cat.indices` API into the markdown file 3 default columns: `Parameter`, `Type`, and `Description`. There are 5 columns that can be inserted: `Parameter`, `Type`, `Description`, `Required`, and `Default`. When `Required`/`Default` is not chosen, the info will be written in the `Description` column. +- This component accepts `include_global` (boolean, default to `false`) argument to include global query parameters in the table. +- This component accepts `include_deprecated` (boolean, default to `true`) argument to include deprecated parameters in the table. +- This component accepts `pretty` (boolean, default to `false`) argument to render the table in the pretty format instead of the compact format. + +```markdown + + +``` + +### Insert Path Parameters + +To insert path parameters table of the `indices.create` API, use the following snippet: +```markdown + + +``` + +This table behaves the same as the query parameters table except that it does not accept the `include_global` argument. + +### Insert Paths and HTTP Methods + +To insert paths and HTTP methods of the `search` API, use the following snippet: +```markdown + + +``` + +### Ignored files and folders +The program will ignore all markdown files whose names are in ALL CAPS. On top of that, you can also add files and folders you want to the [ignored.txt](./ignored.txt) file. Each line in the file should be the name of a file or folder you want to ignore. diff --git a/spec_insert/Rakefile b/spec_insert/Rakefile new file mode 100644 index 0000000000..57607d2528 --- /dev/null +++ b/spec_insert/Rakefile @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +require 'rake' + +desc 'Download the OpenSearch API specification' +task :download_spec do + sh 'curl -L -X GET ' \ + 'https://github.com/opensearch-project/opensearch-api-specification' \ + '/releases/download/main-latest/opensearch-openapi.yaml ' \ + '-o opensearch-openapi.yaml' +end + +desc 'Insert the OpenSearch API specification info into the documentation' +task :insert_spec do + require_relative 'lib/spec_inserter' + require_relative 'lib/doc_processor' + + SpecInserter.new( + root_folder: '../', + spec_file: './opensearch-openapi.yaml', + ignored: './ignored.txt' + ).insert_spec +end diff --git a/spec_insert/gemfile b/spec_insert/gemfile new file mode 100644 index 0000000000..d2bdc511de --- /dev/null +++ b/spec_insert/gemfile @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. + +# frozen_string_literal: true + +source 'https://rubygems.org' + +gem 'rake', '~> 13' +gem 'activesupport', '~> 7' +gem 'mustache', '~> 1' + +group :development, :test do + gem 'rspec' + gem 'rubocop', '~> 1.44', require: false + gem 'rubocop-rake', require: false +end diff --git a/spec_insert/ignored.txt b/spec_insert/ignored.txt new file mode 100644 index 0000000000..b553d77240 --- /dev/null +++ b/spec_insert/ignored.txt @@ -0,0 +1,5 @@ +# All files and folders listed below are ignored by the spec_insert program. +# MD files whose names are in ALL CAPS are always ignored. + +spec_insert/ +release-notes/ diff --git a/spec_insert/lib/components/action.rb b/spec_insert/lib/components/action.rb new file mode 100644 index 0000000000..5ad3dded77 --- /dev/null +++ b/spec_insert/lib/components/action.rb @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. + +# frozen_string_literal: true + +require_relative 'parameter' +require_relative 'operation' + +# A collection of operations that comprise a single API Action +# AKA operation-group +class Action + # @param [SpecHash] spec Parsed OpenAPI spec + def self.actions=(spec) + operations = spec.paths.flat_map do |url, ops| + ops.filter_map { |verb, op| Operation.new(op, url, verb) unless op['x-ignorable'] } + end + @actions = operations.group_by(&:group).values.map { |ops| Action.new(ops) }.index_by(&:full_name) + end + + # @return [Hash] API Actions indexed by operation-group + def self.actions + raise 'Actions not set' unless @actions + @actions + end + + # @return [Array] Operations in the action + attr_reader :operations + + # @param [Array] operations + def initialize(operations) + @operations = operations + @operation = operations.first + @spec = @operation&.spec + end + + # @return [Array] Input arguments. + def arguments; @arguments ||= Parameter.from_operations(@operations.map(&:spec)); end + + # @return [String] Full name of the action (i.e. namespace.action) + def full_name; @operation&.group; end + + # return [String] Name of the action + def name; @operation&.action; end + + # @return [String] Namespace of the action + def namespace; @operation&.namespace; end + + # @return [Array] Sorted unique HTTP verbs + def http_verbs; @operations.map(&:http_verb).uniq.sort; end + + # @return [Array] Unique URLs + def urls; @operations.map(&:url).uniq; end + + # @return [String] Description of the action + def description; @spec&.description; end + + # @return [Boolean] Whether the action is deprecated + def deprecated; @spec&.deprecated; end + + # @return [String] Deprecation message + def deprecation_message; @spec['x-deprecation-message']; end + + # @return [String] API reference + def api_reference; @operation&.external_docs&.url; end +end diff --git a/spec_insert/lib/components/operation.rb b/spec_insert/lib/components/operation.rb new file mode 100644 index 0000000000..6f9fb44cc4 --- /dev/null +++ b/spec_insert/lib/components/operation.rb @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. + +# frozen_string_literal: true + +# An API Operation +class Operation + # @return [Openapi3Parser::Node::Operation] Operation Spec + attr_reader :spec + # @return [String] URL + attr_reader :url + # @return [String] HTTP Verb + attr_reader :http_verb + # @return [String] Operation Group + attr_reader :group + # @return [String] API Action + attr_reader :action + # @return [String] API Namespace + attr_reader :namespace + + # @param [Openapi3Parser::Node::Operation] spec Operation Spec + # @param [String] url + # @param [String] http_verb + def initialize(spec, url, http_verb) + @spec = spec + @url = url + @http_verb = http_verb.upcase + @group = spec['x-operation-group'] + @action, @namespace = @group.split('.').reverse + end +end diff --git a/spec_insert/lib/components/parameter.rb b/spec_insert/lib/components/parameter.rb new file mode 100644 index 0000000000..d227be413e --- /dev/null +++ b/spec_insert/lib/components/parameter.rb @@ -0,0 +1,94 @@ +# frozen_string_literal: true + +module ArgLocation + PATH = :path + QUERY = :query +end + +# Represents a parameter of an API action +class Parameter + # @return [String] The name of the parameter + attr_reader :name + # @return [String] The description of the parameter + attr_reader :description + # @return [Boolean] Whether the parameter is required + attr_reader :required + # @return [SpecHash] The JSON schema of the parameter + attr_reader :schema + # @return [String] Argument type in documentation + attr_reader :doc_type + # @return [String] The default value of the parameter + attr_reader :default + # @return [Boolean] Whether the parameter is deprecated + attr_reader :deprecated + # @return [String] The deprecation message + attr_reader :deprecation_message + # @return [String] The OpenSearch version when the parameter was deprecated + attr_reader :version_deprecated + # @return [ArgLocation] The location of the parameter + attr_reader :location + + def initialize(name:, description:, required:, schema:, default:, deprecated:, deprecation_message:, + version_deprecated:, location:) + @name = name + @description = description + @required = required + @schema = schema + @doc_type = get_doc_type(schema).gsub('String / List', 'List').gsub('List / String', 'List') + @default = default + @deprecated = deprecated + @deprecation_message = deprecation_message + @version_deprecated = version_deprecated + @location = location + end + + # @param [SpecHash | nil] schema + # @return [String | nil] Documentation type + def get_doc_type(schema) + return nil if schema.nil? + union = schema.anyOf || schema.oneOf + return union.map { |sch| get_doc_type(sch) }.join(' / ') unless union.nil? + return 'Integer' if schema.type == 'integer' + return 'Float' if schema.type == 'number' + return 'Boolean' if schema.type == 'boolean' + return 'String' if schema.type == 'string' + return 'NULL' if schema.type == 'null' + return 'List' if schema.type == 'array' + 'Object' + end + + # @param [SpecHash] Full OpenAPI spec + def self.global=(spec) + @global = spec.components.parameters.filter { |_, p| p['x-global'] }.map { |_, p| from_parameters([p], 1) } + end + + # @return [Array] Global parameters + def self.global + raise 'Global parameters not set' unless @global + @global + end + + # @param [Array] operations List of operations of the same group + # @return [Array] List of parameters of the operation group + def self.from_operations(operations) + operations.flat_map(&:parameters).filter { |param| !param['x-global'] } + .group_by(&:name).values.map { |params| from_parameters(params, operations.size) } + end + + # @param [Array] params List of parameters of the same name + # @param [Integer] opts_count Number of operations involved + # @return [Parameter] Single parameter distilled from the list + def self.from_parameters(params, opts_count) + param = params.first || SpecHash.new + schema = param&.schema || SpecHash.new + Parameter.new(name: param.name, + description: param.description || schema.description, + required: params.filter(&:required).size >= opts_count, + schema: schema, + default: param.default || schema.default, + deprecated: param.deprecated || schema.deprecated, + deprecation_message: param['x-deprecation-message'] || schema['x-deprecation-message'], + version_deprecated: param['x-version-deprecated'] || schema['x-version-deprecated'], + location: params.any? { |p| p.in == 'path' } ? ArgLocation::PATH : ArgLocation::QUERY) + end +end diff --git a/spec_insert/lib/doc_processor.rb b/spec_insert/lib/doc_processor.rb new file mode 100644 index 0000000000..9fd1c40ef8 --- /dev/null +++ b/spec_insert/lib/doc_processor.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +require_relative 'renderers/spec_insert' + +# Processes a file, replacing spec_insert blocks with rendered content +class DocProcessor + START_MARKER = // + + def initialize(file_path) + @file_path = file_path + @lines = File.readlines(file_path) + end + + # Processes the file, replacing spec_insert blocks with rendered content + # @param [Boolean] write_to_file Whether to write the changes back to the file + def process(write_to_file: true) + insertions = find_insertions + insertions.reverse_each { |start, finish, insert| @lines[start..finish] = insert.render_lines } + content = @lines.join + if insertions.any? && write_to_file + puts "Updating #{@file_path}" + File.write(@file_path, content) + end + content + rescue StandardError => e + puts "Error processing #{@file_path}: #{e.message}" + throw e + end + + private + + def find_insertions + start_indices = @lines.each_with_index + .filter { |line, _index| line.match?(START_MARKER) } + .map { |_line, index| index } + end_indices = start_indices.map do |index| + (index..@lines.length - 1).find { |i| @lines[i].match?(END_MARKER) } || (@lines.length - 1) + end + + raise 'Mismatched start/end markers' if start_indices.length != end_indices.length + + start_indices.zip(end_indices).map do |start, finish| + [start, finish, SpecInsert.new(extract_args(@lines[start..finish]))] + end + end + + def extract_args(lines) + end_index = lines.each_with_index.find { |line, _index| line.match?(/^\s*-->/) }&.last&.- 1 + + lines[1..end_index].filter { |line| line.include?(':') }.to_h do |line| + key, value = line.split(':') + value = value.include?(',') ? value.split(',').map(&:strip) : value.strip + [key.strip, value] + end + end +end diff --git a/spec_insert/lib/renderers/base_mustache_renderer.rb b/spec_insert/lib/renderers/base_mustache_renderer.rb new file mode 100644 index 0000000000..2d0f709982 --- /dev/null +++ b/spec_insert/lib/renderers/base_mustache_renderer.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +# Base Mustache Renderer +class BaseMustacheRenderer < Mustache + self.template_path = './lib/renderers/templates' + + def initialize(output_file) + @output_file = output_file + super + end + + def generate + @output_file.write(render) + end +end diff --git a/spec_insert/lib/renderers/parameter_table_renderer.rb b/spec_insert/lib/renderers/parameter_table_renderer.rb new file mode 100644 index 0000000000..2ff757886d --- /dev/null +++ b/spec_insert/lib/renderers/parameter_table_renderer.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +require_relative 'table_renderer' + +# Renders a table of parameters of an API action +class ParameterTableRenderer + COLUMNS = %w[Parameter Description Required Type Default].freeze + DEFAULT_COLUMNS = %w[Parameter Type Description].freeze + + # @param [Array] parameters + # @param [Boolean] include_global whether to include global arguments + # @param [Boolean] include_deprecated whether to include deprecated arguments + # @param [Boolean] pretty whether to render a pretty table or a compact one + def initialize(parameters, include_global: false, include_deprecated: true, columns: DEFAULT_COLUMNS, pretty: false) + columns ||= DEFAULT_COLUMNS + invalid = columns - COLUMNS + raise ArgumentError, "Invalid column(s): #{invalid.join(', ')}" unless invalid.empty? + + @pretty = pretty + @columns = columns + @parameters = parameters + @parameters = @parameters.reject(&:deprecated) unless include_deprecated + @parameters += Parameter.global if include_global + @parameters = @parameters.sort_by { |arg| [arg.required ? 0 : 1, arg.deprecated ? 1 : 0, arg.name] } + end + + # @return [Array] + def render_lines + columns = @columns.map { |col| TableRenderer::Column.new(col, col) } + rows = @parameters.map { |arg| row(arg) } + TableRenderer.new(columns, rows, pretty: @pretty).render_lines + end + + private + + def row(param) + { + 'Parameter' => "`#{param.name}`#{'
_DEPRECATED_' if param.deprecated}", + 'Description' => description(param), + 'Required' => param.required ? 'Required' : nil, + 'Type' => param.doc_type, + 'Default' => param.default + } + end + + def description(param) + deprecation = deprecation(param) + required = param.required && @columns.exclude?('Required') ? '**(Required)** ' : '' + description = param.description.gsub("\n", ' ') + default = param.default.nil? || @columns.includes('Default') ? '' : " _(Default: #{param.default})_" + + "#{deprecation}#{required}#{description}#{default}" + end + + def deprecation(param) + message = ": #{param.deprecation_message}" if param.deprecation_message.present? + since = " since #{param.version_deprecated}" if param.version_deprecated.present? + "_(Deprecated#{since}#{message})_ " if param.deprecated + end +end diff --git a/spec_insert/lib/renderers/spec_insert.rb b/spec_insert/lib/renderers/spec_insert.rb new file mode 100644 index 0000000000..248479efcd --- /dev/null +++ b/spec_insert/lib/renderers/spec_insert.rb @@ -0,0 +1,65 @@ +# frozen_string_literal: true + +require_relative 'parameter_table_renderer' +require_relative '../components/action' + +# Class to render spec insertions +class SpecInsert + COMPONENTS = Set.new(%w[query_params path_params paths_and_http_methods]).freeze + + # @param [Array] args + def initialize(args) + @args = args + @action = Action.actions[args['api']] + raise ArgumentError, "API Action not found: #{args['api']}" unless @action + end + + def render_lines + lines = [''] + + render_spec_component + + [''] + lines.map { |line| "#{line}\n" } + end + + private + + def render_spec_component + columns = @args['columns'] + pretty = parse_boolean(@args['pretty'], default: false) + include_global = parse_boolean(@args['include_global'], default: false) + include_deprecated = parse_boolean(@args['include_deprecated'], default: true) + + case @args['component'] + when 'query_params', 'query_parameters' + arguments = @action.arguments.select { |arg| arg.location == ArgLocation::QUERY } + ParameterTableRenderer.new(arguments, columns:, include_global:, include_deprecated:, pretty:).render_lines + when 'path_params', 'path_parameters' + arguments = @action.arguments.select { |arg| arg.location == ArgLocation::PATH } + ParameterTableRenderer.new(arguments, columns:, pretty:).render_lines + when 'paths_and_http_methods' + render_paths_and_http_methods + else + raise ArgumentError, "Invalid component: #{@args['component']}" + end + end + + # @param [String] value + # @param [Boolean] default value to return when nil + def parse_boolean(value, default:) + return default if value.nil? + return true if value.in?(%w[true True TRUE yes Yes YES 1]) + return false if value.in?(%w[false False FALSE no No NO 0]) + raise ArgumentError, "Invalid boolean value: #{value}" + end + + # @return [Array] + def render_paths_and_http_methods + ljust = @action.operations.map { |op| op.http_verb.length }.max + signatures = @action.operations + .sort_by { |op| [op.url.length, op.http_verb] } + .map { |op| "#{op.http_verb.ljust(ljust)} #{op.url}" } + ['```json'] + signatures + ['```'] + end +end diff --git a/spec_insert/lib/renderers/table_renderer.rb b/spec_insert/lib/renderers/table_renderer.rb new file mode 100644 index 0000000000..1cabc435bd --- /dev/null +++ b/spec_insert/lib/renderers/table_renderer.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +# TableRenderer renders a markdown table with the given columns and rows +class TableRenderer + # Column object for rendering markdown tables + class Column + attr_reader :title, :key + attr_accessor :width + + # @param [String] title display title + # @param [String | Symbol] key key to access in row hash + def initialize(title, key) + @title = title + @key = key + @width = 0 + end + end + + # @param [Array] columns + # @param [Array] rows + # @param [Boolean] pretty whether to render a pretty table or a compact one + def initialize(columns, rows, pretty:) + @column = columns + @rows = rows + @pretty = pretty + end + + # @return [Array] + def render_lines + calculate_column_widths if @pretty + [render_column, render_divider] + render_rows + end + + private + + def calculate_column_widths + @column.each do |column| + column.width = [@rows.map { |row| row[column.key].to_s.length }.max || 0, column.title.length].max + end + end + + def render_column + columns = @column.map { |column| column.title.ljust(column.width) }.join(' | ') + @pretty ? "| #{columns} |" : columns + end + + def render_divider + dividers = @column.map { |column| ":#{'-' * [column.width + 1, 3].max}" } + @pretty ? "|#{dividers.join('|')}|" : dividers.join(' | ') + end + + def render_rows + @rows.map do |row| + cells = @column.map { |column| row[column.key].to_s.ljust(column.width).gsub('|', '\|') }.join(' | ') + @pretty ? "| #{cells} |" : cells + end + end +end diff --git a/spec_insert/lib/renderers/templates/spec_insert.mustache b/spec_insert/lib/renderers/templates/spec_insert.mustache new file mode 100644 index 0000000000..ba8b34e29e --- /dev/null +++ b/spec_insert/lib/renderers/templates/spec_insert.mustache @@ -0,0 +1,7 @@ + +{{{spec_insert_content}}} + diff --git a/spec_insert/lib/spec_hash.rb b/spec_insert/lib/spec_hash.rb new file mode 100644 index 0000000000..be8deec12e --- /dev/null +++ b/spec_insert/lib/spec_hash.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +require 'yaml' +require_relative 'components/action' +require_relative 'components/parameter' + +# Spec class for parsing OpenAPI spec +# It's basically a wrapper around a Hash that allows for accessing hash values as object attributes +# and resolving of $refs +class SpecHash + def self.load_file(file_path) + @raw = YAML.load_file(file_path) + @parsed = SpecHash.new(@raw, parsed: false) + Action.actions = @parsed + Parameter.global = @parsed + end + + # @return [Hash] Raw OpenAPI Spec + class << self; attr_reader :raw; end + + # @return [Spec] Parsed OpenAPI Spec + class << self; attr_reader :parsed; end + + attr_reader :hash + + # @param [Hash] hash + def initialize(hash = {}, parsed: true) + @hash = parsed ? hash : parse(hash) + end + + def [](key) + parse(@hash[key]) + end + + def respond_to_missing?(name, include_private = false) + @hash.key?(name.to_s) || @hash.respond_to?(name) || super + end + + def method_missing(name, ...) + return @hash.send(name, ...) if @hash.respond_to?(name) + parse(@hash[name.to_s]) + end + + private + + def parse(value) + return value.map { |v| parse(v) } if value.is_a?(Array) + return value unless value.is_a?(Hash) + ref = value.delete('$ref') + value.transform_values! { |v| parse(v) } + return SpecHash.new(value) unless ref + SpecHash.new(parse(resolve(ref)).merge(value)) + end + + def resolve(ref) + parts = ref.split('/') + parts.shift + self.class.raw.dig(*parts) + end +end diff --git a/spec_insert/lib/spec_inserter.rb b/spec_insert/lib/spec_inserter.rb new file mode 100644 index 0000000000..f29dbf3349 --- /dev/null +++ b/spec_insert/lib/spec_inserter.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +require 'active_support/all' +require_relative 'spec_hash' +require_relative 'doc_processor' + +# Insert the OpenSearch API specification info into the documentation repository +class SpecInserter + # @param [String] spec_file Path to the OpenSearch API specification file + # @param [String] root_folder Path to the documentation website root folder + # @param [String] ignored Path to the file containing the list of ignored files and folders + def initialize(spec_file:, root_folder:, ignored:) + SpecHash.load_file(spec_file) + @root_folder = Pathname.new root_folder + @ignored_folders = File.readlines(ignored).map(&:strip).filter_map do |path| + @root_folder.join(path) if path.present? && !path.start_with?('#') + end.map(&:to_s) + puts "Ignored folders: #{@ignored_folders}" + end + + def insert_spec + Dir.glob(@root_folder.join('**/*.md')) + .filter { |file| @ignored_folders.none? { |ignored| file.start_with?(ignored) } } + .filter { |file| File.basename(file) !~ /^[A-Z_]+\.md$/ } + .each do |file| + DocProcessor.new(file).process + end + end +end diff --git a/spec_insert/spec/_fixtures/input/param_tables.md b/spec_insert/spec/_fixtures/input/param_tables.md new file mode 100644 index 0000000000..82f0bea9a3 --- /dev/null +++ b/spec_insert/spec/_fixtures/input/param_tables.md @@ -0,0 +1,131 @@ +Typical Path Params Example + + +Parameter | Type | Description +:--- | :--- | :--- +`index` | List | Comma-separated list of data streams, indices, and aliases to search. Supports wildcards (`*`). To search all data streams and indices, omit this parameter or use `*` or `_all`. + + +Query Params Example with Global Params, Pretty Print, and Custom Columns + + +| Type | Parameter | Description | Required | Default | +|:----------------|:--------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------|:--------| +| Boolean / List | `_source` | Indicates which source fields are returned for matching documents. These fields are returned in the `hits._source` property of the search response. Valid values are: `true` to return the entire document source; `false` to not return the document source; `` to return the source fields that are specified as a comma-separated list (supports wildcard (`*`) patterns). | | | +| List | `_source_excludes` | A comma-separated list of source fields to exclude from the response. You can also use this parameter to exclude fields from the subset specified in `_source_includes` query parameter. If the `_source` parameter is `false`, this parameter is ignored. | | | +| List | `_source_includes` | A comma-separated list of source fields to include in the response. If this parameter is specified, only these source fields are returned. You can exclude fields from this subset using the `_source_excludes` query parameter. If the `_source` parameter is `false`, this parameter is ignored. | | | +| Boolean | `allow_no_indices` | If `false`, the request returns an error if any wildcard expression, index alias, or `_all` value targets only missing or closed indices. This behavior applies even if the request targets other open indices. For example, a request targeting `foo*,bar*` returns an error if an index starts with `foo` but no index starts with `bar`. | | | +| Boolean | `allow_partial_search_results` | If true, returns partial results if there are shard request timeouts or shard failures. If false, returns an error with no partial results. | | | +| Boolean | `analyze_wildcard` | If true, wildcard and prefix queries are analyzed. This parameter can only be used when the q query string parameter is specified. | | | +| String | `analyzer` | Analyzer to use for the query string. This parameter can only be used when the q query string parameter is specified. | | | +| Float | `batched_reduce_size` | The number of shard results that should be reduced at once on the coordinating node. This value should be used as a protection mechanism to reduce the memory overhead per search request if the potential number of shards in the request can be large. | | | +| String | `cancel_after_time_interval` | The time after which the search request will be canceled. Request-level parameter takes precedence over `cancel_after_time_interval` cluster setting. | | | +| Boolean | `ccs_minimize_roundtrips` | If true, network round-trips between the coordinating node and the remote clusters are minimized when executing cross-cluster search (CCS) requests. | | | +| String | `default_operator` | The default operator for query string query: AND or OR. This parameter can only be used when the `q` query string parameter is specified. | | | +| String | `df` | Field to use as default where no field prefix is given in the query string. This parameter can only be used when the q query string parameter is specified. | | | +| List | `docvalue_fields` | A comma-separated list of fields to return as the docvalue representation for each hit. | | | +| Boolean | `error_trace` | Whether to include the stack trace of returned errors. | | | +| List | `expand_wildcards` | Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such as `open,hidden`. | | | +| Boolean | `explain` | If `true`, returns detailed information about score computation as part of a hit. | | | +| List | `filter_path` | Used to reduce the response. This parameter takes a comma-separated list of filters. It supports using wildcards to match any field or part of a field’s name. You can also exclude fields with "-". | | | +| Float | `from` | Starting document offset. Needs to be non-negative. By default, you cannot page through more than 10,000 hits using the `from` and `size` parameters. To page through more hits, use the `search_after` parameter. | | | +| Boolean | `human` | Whether to return human readable values for statistics. | | | +| Boolean | `ignore_throttled` | If `true`, concrete, expanded or aliased indices will be ignored when frozen. | | | +| Boolean | `ignore_unavailable` | If `false`, the request returns an error if it targets a missing or closed index. | | | +| Boolean | `include_named_queries_score` | Indicates whether hit.matched_queries should be rendered as a map that includes the name of the matched query associated with its score (true) or as an array containing the name of the matched queries (false) | | | +| Boolean | `lenient` | If `true`, format-based query failures (such as providing text to a numeric field) in the query string will be ignored. This parameter can only be used when the `q` query string parameter is specified. | | | +| Float | `max_concurrent_shard_requests` | Defines the number of concurrent shard requests per node this search executes concurrently. This value should be used to limit the impact of the search on the cluster in order to limit the number of concurrent shard requests. | | | +| Boolean | `phase_took` | Indicates whether to return phase-level `took` time values in the response. | | | +| Float | `pre_filter_shard_size` | Defines a threshold that enforces a pre-filter roundtrip to prefilter search shards based on query rewriting if the number of shards the search request expands to exceeds the threshold. This filter roundtrip can limit the number of shards significantly if for instance a shard can not match any documents based on its rewrite method (if date filters are mandatory to match but the shard bounds and the query are disjoint). When unspecified, the pre-filter phase is executed if any of these conditions is met: the request targets more than 128 shards; the request targets one or more read-only index; the primary sort of the query targets an indexed field. | | | +| String | `preference` | Nodes and shards used for the search. By default, OpenSearch selects from eligible nodes and shards using adaptive replica selection, accounting for allocation awareness. Valid values are: `_only_local` to run the search only on shards on the local node; `_local` to, if possible, run the search on shards on the local node, or if not, select shards using the default method; `_only_nodes:,` to run the search on only the specified nodes IDs, where, if suitable shards exist on more than one selected node, use shards on those nodes using the default method, or if none of the specified nodes are available, select shards from any available node using the default method; `_prefer_nodes:,` to if possible, run the search on the specified nodes IDs, or if not, select shards using the default method; `_shards:,` to run the search only on the specified shards; `` (any string that does not start with `_`) to route searches with the same `` to the same shards in the same order. | | | +| Boolean | `pretty` | Whether to pretty format the returned JSON response. | | | +| String | `q` | Query in the Lucene query string syntax using query parameter search. Query parameter searches do not support the full OpenSearch Query DSL but are handy for testing. | | | +| Boolean | `request_cache` | If `true`, the caching of search results is enabled for requests where `size` is `0`. Defaults to index level settings. | | | +| Boolean | `rest_total_hits_as_int` | Indicates whether `hits.total` should be rendered as an integer or an object in the rest search response. | | | +| List | `routing` | Custom value used to route operations to a specific shard. | | | +| String | `scroll` | Period to retain the search context for scrolling. See Scroll search results. By default, this value cannot exceed `1d` (24 hours). You can change this limit using the `search.max_keep_alive` cluster-level setting. | | | +| String | `search_pipeline` | Customizable sequence of processing stages applied to search queries. | | | +| String | `search_type` | How distributed term frequencies are calculated for relevance scoring. | | | +| Boolean | `seq_no_primary_term` | If `true`, returns sequence number and primary term of the last modification of each hit. | | | +| Float | `size` | Defines the number of hits to return. By default, you cannot page through more than 10,000 hits using the `from` and `size` parameters. To page through more hits, use the `search_after` parameter. | | | +| List | `sort` | A comma-separated list of : pairs. | | | +| String | `source` | The URL-encoded request definition. Useful for libraries that do not accept a request body for non-POST requests. | | | +| List | `stats` | Specific `tag` of the request for logging and statistical purposes. | | | +| List | `stored_fields` | A comma-separated list of stored fields to return as part of a hit. If no fields are specified, no stored fields are included in the response. If this field is specified, the `_source` parameter defaults to `false`. You can pass `_source: true` to return both source fields and stored fields in the search response. | | | +| String | `suggest_field` | Specifies which field to use for suggestions. | | | +| String | `suggest_mode` | Specifies the suggest mode. This parameter can only be used when the `suggest_field` and `suggest_text` query string parameters are specified. | | | +| Float | `suggest_size` | Number of suggestions to return. This parameter can only be used when the `suggest_field` and `suggest_text` query string parameters are specified. | | | +| String | `suggest_text` | The source text for which the suggestions should be returned. This parameter can only be used when the `suggest_field` and `suggest_text` query string parameters are specified. | | | +| Float | `terminate_after` | Maximum number of documents to collect for each shard. If a query reaches this limit, OpenSearch terminates the query early. OpenSearch collects documents before sorting. Use with caution. OpenSearch applies this parameter to each shard handling the request. When possible, let OpenSearch perform early termination automatically. Avoid specifying this parameter for requests that target data streams with backing indices across multiple data tiers. If set to `0` (default), the query does not terminate early. | | | +| String | `timeout` | Specifies the period of time to wait for a response from each shard. If no response is received before the timeout expires, the request fails and returns an error. | | | +| Boolean | `track_scores` | If `true`, calculate and return document scores, even if the scores are not used for sorting. | | | +| Boolean / Float | `track_total_hits` | Number of hits matching the query to count accurately. If `true`, the exact number of hits is returned at the cost of some performance. If `false`, the response does not include the total number of hits matching the query. | | | +| Boolean | `typed_keys` | If `true`, aggregation and suggester names are be prefixed by their respective types in the response. | | | +| Boolean | `version` | If `true`, returns document version as part of a hit. | | | + + +Query Params Example with only Parameter and Description Columns + + +Parameter | Description +:--- | :--- +`_source` | Indicates which source fields are returned for matching documents. These fields are returned in the `hits._source` property of the search response. Valid values are: `true` to return the entire document source; `false` to not return the document source; `` to return the source fields that are specified as a comma-separated list (supports wildcard (`*`) patterns). +`_source_excludes` | A comma-separated list of source fields to exclude from the response. You can also use this parameter to exclude fields from the subset specified in `_source_includes` query parameter. If the `_source` parameter is `false`, this parameter is ignored. +`_source_includes` | A comma-separated list of source fields to include in the response. If this parameter is specified, only these source fields are returned. You can exclude fields from this subset using the `_source_excludes` query parameter. If the `_source` parameter is `false`, this parameter is ignored. +`allow_no_indices` | If `false`, the request returns an error if any wildcard expression, index alias, or `_all` value targets only missing or closed indices. This behavior applies even if the request targets other open indices. For example, a request targeting `foo*,bar*` returns an error if an index starts with `foo` but no index starts with `bar`. +`allow_partial_search_results` | If true, returns partial results if there are shard request timeouts or shard failures. If false, returns an error with no partial results. +`analyze_wildcard` | If true, wildcard and prefix queries are analyzed. This parameter can only be used when the q query string parameter is specified. +`analyzer` | Analyzer to use for the query string. This parameter can only be used when the q query string parameter is specified. +`batched_reduce_size` | The number of shard results that should be reduced at once on the coordinating node. This value should be used as a protection mechanism to reduce the memory overhead per search request if the potential number of shards in the request can be large. +`cancel_after_time_interval` | The time after which the search request will be canceled. Request-level parameter takes precedence over `cancel_after_time_interval` cluster setting. +`ccs_minimize_roundtrips` | If true, network round-trips between the coordinating node and the remote clusters are minimized when executing cross-cluster search (CCS) requests. +`default_operator` | The default operator for query string query: AND or OR. This parameter can only be used when the `q` query string parameter is specified. +`df` | Field to use as default where no field prefix is given in the query string. This parameter can only be used when the q query string parameter is specified. +`docvalue_fields` | A comma-separated list of fields to return as the docvalue representation for each hit. +`expand_wildcards` | Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such as `open,hidden`. +`explain` | If `true`, returns detailed information about score computation as part of a hit. +`from` | Starting document offset. Needs to be non-negative. By default, you cannot page through more than 10,000 hits using the `from` and `size` parameters. To page through more hits, use the `search_after` parameter. +`ignore_throttled` | If `true`, concrete, expanded or aliased indices will be ignored when frozen. +`ignore_unavailable` | If `false`, the request returns an error if it targets a missing or closed index. +`include_named_queries_score` | Indicates whether hit.matched_queries should be rendered as a map that includes the name of the matched query associated with its score (true) or as an array containing the name of the matched queries (false) +`lenient` | If `true`, format-based query failures (such as providing text to a numeric field) in the query string will be ignored. This parameter can only be used when the `q` query string parameter is specified. +`max_concurrent_shard_requests` | Defines the number of concurrent shard requests per node this search executes concurrently. This value should be used to limit the impact of the search on the cluster in order to limit the number of concurrent shard requests. +`phase_took` | Indicates whether to return phase-level `took` time values in the response. +`pre_filter_shard_size` | Defines a threshold that enforces a pre-filter roundtrip to prefilter search shards based on query rewriting if the number of shards the search request expands to exceeds the threshold. This filter roundtrip can limit the number of shards significantly if for instance a shard can not match any documents based on its rewrite method (if date filters are mandatory to match but the shard bounds and the query are disjoint). When unspecified, the pre-filter phase is executed if any of these conditions is met: the request targets more than 128 shards; the request targets one or more read-only index; the primary sort of the query targets an indexed field. +`preference` | Nodes and shards used for the search. By default, OpenSearch selects from eligible nodes and shards using adaptive replica selection, accounting for allocation awareness. Valid values are: `_only_local` to run the search only on shards on the local node; `_local` to, if possible, run the search on shards on the local node, or if not, select shards using the default method; `_only_nodes:,` to run the search on only the specified nodes IDs, where, if suitable shards exist on more than one selected node, use shards on those nodes using the default method, or if none of the specified nodes are available, select shards from any available node using the default method; `_prefer_nodes:,` to if possible, run the search on the specified nodes IDs, or if not, select shards using the default method; `_shards:,` to run the search only on the specified shards; `` (any string that does not start with `_`) to route searches with the same `` to the same shards in the same order. +`q` | Query in the Lucene query string syntax using query parameter search. Query parameter searches do not support the full OpenSearch Query DSL but are handy for testing. +`request_cache` | If `true`, the caching of search results is enabled for requests where `size` is `0`. Defaults to index level settings. +`rest_total_hits_as_int` | Indicates whether `hits.total` should be rendered as an integer or an object in the rest search response. +`routing` | Custom value used to route operations to a specific shard. +`scroll` | Period to retain the search context for scrolling. See Scroll search results. By default, this value cannot exceed `1d` (24 hours). You can change this limit using the `search.max_keep_alive` cluster-level setting. +`search_pipeline` | Customizable sequence of processing stages applied to search queries. +`search_type` | How distributed term frequencies are calculated for relevance scoring. +`seq_no_primary_term` | If `true`, returns sequence number and primary term of the last modification of each hit. +`size` | Defines the number of hits to return. By default, you cannot page through more than 10,000 hits using the `from` and `size` parameters. To page through more hits, use the `search_after` parameter. +`sort` | A comma-separated list of : pairs. +`stats` | Specific `tag` of the request for logging and statistical purposes. +`stored_fields` | A comma-separated list of stored fields to return as part of a hit. If no fields are specified, no stored fields are included in the response. If this field is specified, the `_source` parameter defaults to `false`. You can pass `_source: true` to return both source fields and stored fields in the search response. +`suggest_field` | Specifies which field to use for suggestions. +`suggest_mode` | Specifies the suggest mode. This parameter can only be used when the `suggest_field` and `suggest_text` query string parameters are specified. +`suggest_size` | Number of suggestions to return. This parameter can only be used when the `suggest_field` and `suggest_text` query string parameters are specified. +`suggest_text` | The source text for which the suggestions should be returned. This parameter can only be used when the `suggest_field` and `suggest_text` query string parameters are specified. +`terminate_after` | Maximum number of documents to collect for each shard. If a query reaches this limit, OpenSearch terminates the query early. OpenSearch collects documents before sorting. Use with caution. OpenSearch applies this parameter to each shard handling the request. When possible, let OpenSearch perform early termination automatically. Avoid specifying this parameter for requests that target data streams with backing indices across multiple data tiers. If set to `0` (default), the query does not terminate early. +`timeout` | Specifies the period of time to wait for a response from each shard. If no response is received before the timeout expires, the request fails and returns an error. +`track_scores` | If `true`, calculate and return document scores, even if the scores are not used for sorting. +`track_total_hits` | Number of hits matching the query to count accurately. If `true`, the exact number of hits is returned at the cost of some performance. If `false`, the response does not include the total number of hits matching the query. +`typed_keys` | If `true`, aggregation and suggester names are be prefixed by their respective types in the response. +`version` | If `true`, returns document version as part of a hit. + diff --git a/spec_insert/spec/_fixtures/input/paths_and_http_methods.md b/spec_insert/spec/_fixtures/input/paths_and_http_methods.md new file mode 100644 index 0000000000..771dbb44d6 --- /dev/null +++ b/spec_insert/spec/_fixtures/input/paths_and_http_methods.md @@ -0,0 +1,13 @@ +### Paths and HTTP Methods + + +```json +GET /_search +POST /_search +GET /{index}/_search +POST /{index}/_search +``` + diff --git a/spec_insert/spec/_fixtures/opensearch_spec.yaml b/spec_insert/spec/_fixtures/opensearch_spec.yaml new file mode 100644 index 0000000000..66f205a240 --- /dev/null +++ b/spec_insert/spec/_fixtures/opensearch_spec.yaml @@ -0,0 +1,120 @@ +openapi: 3.1.0 +info: + title: OpenSearch API Specification + version: 1.0.0 + x-api-version: 2.16.0 +paths: + /_search: + get: + operationId: search.0 + x-operation-group: search + x-version-added: '1.0' + description: Returns results matching a query. + externalDocs: + url: https://opensearch.org/docs/latest/api-reference/search/ + parameters: + - $ref: '#/components/parameters/search::query.analyze_wildcard' + - $ref: '#/components/parameters/search::query.analyzer' + post: + operationId: search.1 + x-operation-group: search + x-version-added: '1.0' + description: Returns results matching a query. + externalDocs: + url: https://opensearch.org/docs/latest/api-reference/search/ + parameters: + - $ref: '#/components/parameters/search::query.analyze_wildcard' + - $ref: '#/components/parameters/search::query.analyzer' + /{index}/_search: + get: + operationId: search.2 + x-operation-group: search + x-version-added: '1.0' + description: Returns results matching a query. + externalDocs: + url: https://opensearch.org/docs/latest/api-reference/search/ + parameters: + - $ref: '#/components/parameters/search::path.index' + - $ref: '#/components/parameters/search::query.analyze_wildcard' + - $ref: '#/components/parameters/search::query.analyzer' + post: + operationId: search.3 + x-operation-group: search + x-version-added: '1.0' + description: Returns results matching a query. + externalDocs: + url: https://opensearch.org/docs/latest/api-reference/search/ + parameters: + - $ref: '#/components/parameters/search::path.index' + - $ref: '#/components/parameters/search::query.analyze_wildcard' + - $ref: '#/components/parameters/search::query.analyzer' +components: + + parameters: + + _global::query.pretty: + name: pretty + in: query + description: Whether to pretty format the returned JSON response. + schema: + type: boolean + default: false + x-global: true + + _global::query.human: + name: human + in: query + description: Whether to return human readable values for statistics. + schema: + type: boolean + default: true + x-global: true + deprecated: true + x-version-deprecated: '3.0' + x-deprecation-message: Use the `format` parameter instead. + + search::path.index: + in: path + name: index + description: |- + Comma-separated list of data streams, indices, and aliases to search. + Supports wildcards (`*`). + To search all data streams and indices, omit this parameter or use `*` or `_all`. + required: true + schema: + $ref: '#/components/schemas/_common:Indices' + style: simple + + search::query.analyze_wildcard: + in: query + name: analyze_wildcard + required: true + description: |- + If true, wildcard and prefix queries are analyzed. + This parameter can only be used when the q query string parameter is specified. + schema: + type: boolean + default: false + style: form + + search::query.analyzer: + in: query + name: analyzer + description: |- + Analyzer to use for the query string. + This parameter can only be used when the q query string parameter is specified. + schema: + type: string + style: form + + schemas: + + _common:Indices: + oneOf: + - $ref: '#/components/schemas/_common:IndexName' + - type: array + items: + $ref: '#/components/schemas/_common:IndexName' + + _common:IndexName: + type: string diff --git a/spec_insert/spec/_fixtures/output/param_tables.md b/spec_insert/spec/_fixtures/output/param_tables.md new file mode 100644 index 0000000000..82f0bea9a3 --- /dev/null +++ b/spec_insert/spec/_fixtures/output/param_tables.md @@ -0,0 +1,131 @@ +Typical Path Params Example + + +Parameter | Type | Description +:--- | :--- | :--- +`index` | List | Comma-separated list of data streams, indices, and aliases to search. Supports wildcards (`*`). To search all data streams and indices, omit this parameter or use `*` or `_all`. + + +Query Params Example with Global Params, Pretty Print, and Custom Columns + + +| Type | Parameter | Description | Required | Default | +|:----------------|:--------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------|:--------| +| Boolean / List | `_source` | Indicates which source fields are returned for matching documents. These fields are returned in the `hits._source` property of the search response. Valid values are: `true` to return the entire document source; `false` to not return the document source; `` to return the source fields that are specified as a comma-separated list (supports wildcard (`*`) patterns). | | | +| List | `_source_excludes` | A comma-separated list of source fields to exclude from the response. You can also use this parameter to exclude fields from the subset specified in `_source_includes` query parameter. If the `_source` parameter is `false`, this parameter is ignored. | | | +| List | `_source_includes` | A comma-separated list of source fields to include in the response. If this parameter is specified, only these source fields are returned. You can exclude fields from this subset using the `_source_excludes` query parameter. If the `_source` parameter is `false`, this parameter is ignored. | | | +| Boolean | `allow_no_indices` | If `false`, the request returns an error if any wildcard expression, index alias, or `_all` value targets only missing or closed indices. This behavior applies even if the request targets other open indices. For example, a request targeting `foo*,bar*` returns an error if an index starts with `foo` but no index starts with `bar`. | | | +| Boolean | `allow_partial_search_results` | If true, returns partial results if there are shard request timeouts or shard failures. If false, returns an error with no partial results. | | | +| Boolean | `analyze_wildcard` | If true, wildcard and prefix queries are analyzed. This parameter can only be used when the q query string parameter is specified. | | | +| String | `analyzer` | Analyzer to use for the query string. This parameter can only be used when the q query string parameter is specified. | | | +| Float | `batched_reduce_size` | The number of shard results that should be reduced at once on the coordinating node. This value should be used as a protection mechanism to reduce the memory overhead per search request if the potential number of shards in the request can be large. | | | +| String | `cancel_after_time_interval` | The time after which the search request will be canceled. Request-level parameter takes precedence over `cancel_after_time_interval` cluster setting. | | | +| Boolean | `ccs_minimize_roundtrips` | If true, network round-trips between the coordinating node and the remote clusters are minimized when executing cross-cluster search (CCS) requests. | | | +| String | `default_operator` | The default operator for query string query: AND or OR. This parameter can only be used when the `q` query string parameter is specified. | | | +| String | `df` | Field to use as default where no field prefix is given in the query string. This parameter can only be used when the q query string parameter is specified. | | | +| List | `docvalue_fields` | A comma-separated list of fields to return as the docvalue representation for each hit. | | | +| Boolean | `error_trace` | Whether to include the stack trace of returned errors. | | | +| List | `expand_wildcards` | Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such as `open,hidden`. | | | +| Boolean | `explain` | If `true`, returns detailed information about score computation as part of a hit. | | | +| List | `filter_path` | Used to reduce the response. This parameter takes a comma-separated list of filters. It supports using wildcards to match any field or part of a field’s name. You can also exclude fields with "-". | | | +| Float | `from` | Starting document offset. Needs to be non-negative. By default, you cannot page through more than 10,000 hits using the `from` and `size` parameters. To page through more hits, use the `search_after` parameter. | | | +| Boolean | `human` | Whether to return human readable values for statistics. | | | +| Boolean | `ignore_throttled` | If `true`, concrete, expanded or aliased indices will be ignored when frozen. | | | +| Boolean | `ignore_unavailable` | If `false`, the request returns an error if it targets a missing or closed index. | | | +| Boolean | `include_named_queries_score` | Indicates whether hit.matched_queries should be rendered as a map that includes the name of the matched query associated with its score (true) or as an array containing the name of the matched queries (false) | | | +| Boolean | `lenient` | If `true`, format-based query failures (such as providing text to a numeric field) in the query string will be ignored. This parameter can only be used when the `q` query string parameter is specified. | | | +| Float | `max_concurrent_shard_requests` | Defines the number of concurrent shard requests per node this search executes concurrently. This value should be used to limit the impact of the search on the cluster in order to limit the number of concurrent shard requests. | | | +| Boolean | `phase_took` | Indicates whether to return phase-level `took` time values in the response. | | | +| Float | `pre_filter_shard_size` | Defines a threshold that enforces a pre-filter roundtrip to prefilter search shards based on query rewriting if the number of shards the search request expands to exceeds the threshold. This filter roundtrip can limit the number of shards significantly if for instance a shard can not match any documents based on its rewrite method (if date filters are mandatory to match but the shard bounds and the query are disjoint). When unspecified, the pre-filter phase is executed if any of these conditions is met: the request targets more than 128 shards; the request targets one or more read-only index; the primary sort of the query targets an indexed field. | | | +| String | `preference` | Nodes and shards used for the search. By default, OpenSearch selects from eligible nodes and shards using adaptive replica selection, accounting for allocation awareness. Valid values are: `_only_local` to run the search only on shards on the local node; `_local` to, if possible, run the search on shards on the local node, or if not, select shards using the default method; `_only_nodes:,` to run the search on only the specified nodes IDs, where, if suitable shards exist on more than one selected node, use shards on those nodes using the default method, or if none of the specified nodes are available, select shards from any available node using the default method; `_prefer_nodes:,` to if possible, run the search on the specified nodes IDs, or if not, select shards using the default method; `_shards:,` to run the search only on the specified shards; `` (any string that does not start with `_`) to route searches with the same `` to the same shards in the same order. | | | +| Boolean | `pretty` | Whether to pretty format the returned JSON response. | | | +| String | `q` | Query in the Lucene query string syntax using query parameter search. Query parameter searches do not support the full OpenSearch Query DSL but are handy for testing. | | | +| Boolean | `request_cache` | If `true`, the caching of search results is enabled for requests where `size` is `0`. Defaults to index level settings. | | | +| Boolean | `rest_total_hits_as_int` | Indicates whether `hits.total` should be rendered as an integer or an object in the rest search response. | | | +| List | `routing` | Custom value used to route operations to a specific shard. | | | +| String | `scroll` | Period to retain the search context for scrolling. See Scroll search results. By default, this value cannot exceed `1d` (24 hours). You can change this limit using the `search.max_keep_alive` cluster-level setting. | | | +| String | `search_pipeline` | Customizable sequence of processing stages applied to search queries. | | | +| String | `search_type` | How distributed term frequencies are calculated for relevance scoring. | | | +| Boolean | `seq_no_primary_term` | If `true`, returns sequence number and primary term of the last modification of each hit. | | | +| Float | `size` | Defines the number of hits to return. By default, you cannot page through more than 10,000 hits using the `from` and `size` parameters. To page through more hits, use the `search_after` parameter. | | | +| List | `sort` | A comma-separated list of : pairs. | | | +| String | `source` | The URL-encoded request definition. Useful for libraries that do not accept a request body for non-POST requests. | | | +| List | `stats` | Specific `tag` of the request for logging and statistical purposes. | | | +| List | `stored_fields` | A comma-separated list of stored fields to return as part of a hit. If no fields are specified, no stored fields are included in the response. If this field is specified, the `_source` parameter defaults to `false`. You can pass `_source: true` to return both source fields and stored fields in the search response. | | | +| String | `suggest_field` | Specifies which field to use for suggestions. | | | +| String | `suggest_mode` | Specifies the suggest mode. This parameter can only be used when the `suggest_field` and `suggest_text` query string parameters are specified. | | | +| Float | `suggest_size` | Number of suggestions to return. This parameter can only be used when the `suggest_field` and `suggest_text` query string parameters are specified. | | | +| String | `suggest_text` | The source text for which the suggestions should be returned. This parameter can only be used when the `suggest_field` and `suggest_text` query string parameters are specified. | | | +| Float | `terminate_after` | Maximum number of documents to collect for each shard. If a query reaches this limit, OpenSearch terminates the query early. OpenSearch collects documents before sorting. Use with caution. OpenSearch applies this parameter to each shard handling the request. When possible, let OpenSearch perform early termination automatically. Avoid specifying this parameter for requests that target data streams with backing indices across multiple data tiers. If set to `0` (default), the query does not terminate early. | | | +| String | `timeout` | Specifies the period of time to wait for a response from each shard. If no response is received before the timeout expires, the request fails and returns an error. | | | +| Boolean | `track_scores` | If `true`, calculate and return document scores, even if the scores are not used for sorting. | | | +| Boolean / Float | `track_total_hits` | Number of hits matching the query to count accurately. If `true`, the exact number of hits is returned at the cost of some performance. If `false`, the response does not include the total number of hits matching the query. | | | +| Boolean | `typed_keys` | If `true`, aggregation and suggester names are be prefixed by their respective types in the response. | | | +| Boolean | `version` | If `true`, returns document version as part of a hit. | | | + + +Query Params Example with only Parameter and Description Columns + + +Parameter | Description +:--- | :--- +`_source` | Indicates which source fields are returned for matching documents. These fields are returned in the `hits._source` property of the search response. Valid values are: `true` to return the entire document source; `false` to not return the document source; `` to return the source fields that are specified as a comma-separated list (supports wildcard (`*`) patterns). +`_source_excludes` | A comma-separated list of source fields to exclude from the response. You can also use this parameter to exclude fields from the subset specified in `_source_includes` query parameter. If the `_source` parameter is `false`, this parameter is ignored. +`_source_includes` | A comma-separated list of source fields to include in the response. If this parameter is specified, only these source fields are returned. You can exclude fields from this subset using the `_source_excludes` query parameter. If the `_source` parameter is `false`, this parameter is ignored. +`allow_no_indices` | If `false`, the request returns an error if any wildcard expression, index alias, or `_all` value targets only missing or closed indices. This behavior applies even if the request targets other open indices. For example, a request targeting `foo*,bar*` returns an error if an index starts with `foo` but no index starts with `bar`. +`allow_partial_search_results` | If true, returns partial results if there are shard request timeouts or shard failures. If false, returns an error with no partial results. +`analyze_wildcard` | If true, wildcard and prefix queries are analyzed. This parameter can only be used when the q query string parameter is specified. +`analyzer` | Analyzer to use for the query string. This parameter can only be used when the q query string parameter is specified. +`batched_reduce_size` | The number of shard results that should be reduced at once on the coordinating node. This value should be used as a protection mechanism to reduce the memory overhead per search request if the potential number of shards in the request can be large. +`cancel_after_time_interval` | The time after which the search request will be canceled. Request-level parameter takes precedence over `cancel_after_time_interval` cluster setting. +`ccs_minimize_roundtrips` | If true, network round-trips between the coordinating node and the remote clusters are minimized when executing cross-cluster search (CCS) requests. +`default_operator` | The default operator for query string query: AND or OR. This parameter can only be used when the `q` query string parameter is specified. +`df` | Field to use as default where no field prefix is given in the query string. This parameter can only be used when the q query string parameter is specified. +`docvalue_fields` | A comma-separated list of fields to return as the docvalue representation for each hit. +`expand_wildcards` | Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such as `open,hidden`. +`explain` | If `true`, returns detailed information about score computation as part of a hit. +`from` | Starting document offset. Needs to be non-negative. By default, you cannot page through more than 10,000 hits using the `from` and `size` parameters. To page through more hits, use the `search_after` parameter. +`ignore_throttled` | If `true`, concrete, expanded or aliased indices will be ignored when frozen. +`ignore_unavailable` | If `false`, the request returns an error if it targets a missing or closed index. +`include_named_queries_score` | Indicates whether hit.matched_queries should be rendered as a map that includes the name of the matched query associated with its score (true) or as an array containing the name of the matched queries (false) +`lenient` | If `true`, format-based query failures (such as providing text to a numeric field) in the query string will be ignored. This parameter can only be used when the `q` query string parameter is specified. +`max_concurrent_shard_requests` | Defines the number of concurrent shard requests per node this search executes concurrently. This value should be used to limit the impact of the search on the cluster in order to limit the number of concurrent shard requests. +`phase_took` | Indicates whether to return phase-level `took` time values in the response. +`pre_filter_shard_size` | Defines a threshold that enforces a pre-filter roundtrip to prefilter search shards based on query rewriting if the number of shards the search request expands to exceeds the threshold. This filter roundtrip can limit the number of shards significantly if for instance a shard can not match any documents based on its rewrite method (if date filters are mandatory to match but the shard bounds and the query are disjoint). When unspecified, the pre-filter phase is executed if any of these conditions is met: the request targets more than 128 shards; the request targets one or more read-only index; the primary sort of the query targets an indexed field. +`preference` | Nodes and shards used for the search. By default, OpenSearch selects from eligible nodes and shards using adaptive replica selection, accounting for allocation awareness. Valid values are: `_only_local` to run the search only on shards on the local node; `_local` to, if possible, run the search on shards on the local node, or if not, select shards using the default method; `_only_nodes:,` to run the search on only the specified nodes IDs, where, if suitable shards exist on more than one selected node, use shards on those nodes using the default method, or if none of the specified nodes are available, select shards from any available node using the default method; `_prefer_nodes:,` to if possible, run the search on the specified nodes IDs, or if not, select shards using the default method; `_shards:,` to run the search only on the specified shards; `` (any string that does not start with `_`) to route searches with the same `` to the same shards in the same order. +`q` | Query in the Lucene query string syntax using query parameter search. Query parameter searches do not support the full OpenSearch Query DSL but are handy for testing. +`request_cache` | If `true`, the caching of search results is enabled for requests where `size` is `0`. Defaults to index level settings. +`rest_total_hits_as_int` | Indicates whether `hits.total` should be rendered as an integer or an object in the rest search response. +`routing` | Custom value used to route operations to a specific shard. +`scroll` | Period to retain the search context for scrolling. See Scroll search results. By default, this value cannot exceed `1d` (24 hours). You can change this limit using the `search.max_keep_alive` cluster-level setting. +`search_pipeline` | Customizable sequence of processing stages applied to search queries. +`search_type` | How distributed term frequencies are calculated for relevance scoring. +`seq_no_primary_term` | If `true`, returns sequence number and primary term of the last modification of each hit. +`size` | Defines the number of hits to return. By default, you cannot page through more than 10,000 hits using the `from` and `size` parameters. To page through more hits, use the `search_after` parameter. +`sort` | A comma-separated list of : pairs. +`stats` | Specific `tag` of the request for logging and statistical purposes. +`stored_fields` | A comma-separated list of stored fields to return as part of a hit. If no fields are specified, no stored fields are included in the response. If this field is specified, the `_source` parameter defaults to `false`. You can pass `_source: true` to return both source fields and stored fields in the search response. +`suggest_field` | Specifies which field to use for suggestions. +`suggest_mode` | Specifies the suggest mode. This parameter can only be used when the `suggest_field` and `suggest_text` query string parameters are specified. +`suggest_size` | Number of suggestions to return. This parameter can only be used when the `suggest_field` and `suggest_text` query string parameters are specified. +`suggest_text` | The source text for which the suggestions should be returned. This parameter can only be used when the `suggest_field` and `suggest_text` query string parameters are specified. +`terminate_after` | Maximum number of documents to collect for each shard. If a query reaches this limit, OpenSearch terminates the query early. OpenSearch collects documents before sorting. Use with caution. OpenSearch applies this parameter to each shard handling the request. When possible, let OpenSearch perform early termination automatically. Avoid specifying this parameter for requests that target data streams with backing indices across multiple data tiers. If set to `0` (default), the query does not terminate early. +`timeout` | Specifies the period of time to wait for a response from each shard. If no response is received before the timeout expires, the request fails and returns an error. +`track_scores` | If `true`, calculate and return document scores, even if the scores are not used for sorting. +`track_total_hits` | Number of hits matching the query to count accurately. If `true`, the exact number of hits is returned at the cost of some performance. If `false`, the response does not include the total number of hits matching the query. +`typed_keys` | If `true`, aggregation and suggester names are be prefixed by their respective types in the response. +`version` | If `true`, returns document version as part of a hit. + diff --git a/spec_insert/spec/_fixtures/output/paths_and_http_methods.md b/spec_insert/spec/_fixtures/output/paths_and_http_methods.md new file mode 100644 index 0000000000..771dbb44d6 --- /dev/null +++ b/spec_insert/spec/_fixtures/output/paths_and_http_methods.md @@ -0,0 +1,13 @@ +### Paths and HTTP Methods + + +```json +GET /_search +POST /_search +GET /{index}/_search +POST /{index}/_search +``` + diff --git a/spec_insert/spec/doc_processor_spec.rb b/spec_insert/spec/doc_processor_spec.rb new file mode 100644 index 0000000000..33dd890ff0 --- /dev/null +++ b/spec_insert/spec/doc_processor_spec.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +require_relative 'spec_helper' +require_relative '../lib/doc_processor' +require_relative '../lib/spec_hash' + +describe DocProcessor do + SpecHash.load_file('spec/_fixtures/opensearch_spec.yaml') + + def test_file(file_name) + expected_output = File.read("./spec/_fixtures/output/#{file_name}.md") + actual_output = described_class.new("spec/_fixtures/input/#{file_name}.md").process(write_to_file: false) + expect(actual_output).to eq(expected_output) + end + + it 'inserts the param tables correctly' do + test_file('param_tables') + end + + it 'inserts the paths and http methods correctly' do + test_file('paths_and_http_methods') + end +end diff --git a/spec_insert/spec/spec_helper.rb b/spec_insert/spec/spec_helper.rb new file mode 100644 index 0000000000..f820447ec0 --- /dev/null +++ b/spec_insert/spec/spec_helper.rb @@ -0,0 +1,102 @@ +# This file was generated by the `rspec --init` command. Conventionally, all +# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`. +# The generated `.rspec` file contains `--require spec_helper` which will cause +# this file to always be loaded, without a need to explicitly require it in any +# files. +# +# Given that it is always loaded, you are encouraged to keep this file as +# light-weight as possible. Requiring heavyweight dependencies from this file +# will add to the boot time of your test suite on EVERY test run, even for an +# individual file that may not need all of that loaded. Instead, consider making +# a separate helper file that requires the additional dependencies and performs +# the additional setup, and require it from the spec files that actually need +# it. +# +# See https://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration +RSpec.configure do |config| + # rspec-expectations config goes here. You can use an alternate + # assertion/expectation library such as wrong or the stdlib/minitest + # assertions if you prefer. + config.expect_with :rspec do |expectations| + # This option will default to `true` in RSpec 4. It makes the `description` + # and `failure_message` of custom matchers include text for helper methods + # defined using `chain`, e.g.: + # be_bigger_than(2).and_smaller_than(4).description + # # => "be bigger than 2 and smaller than 4" + # ...rather than: + # # => "be bigger than 2" + expectations.include_chain_clauses_in_custom_matcher_descriptions = true + end + + # rspec-mocks config goes here. You can use an alternate test double + # library (such as bogus or mocha) by changing the `mock_with` option here. + config.mock_with :rspec do |mocks| + # Prevents you from mocking or stubbing a method that does not exist on + # a real object. This is generally recommended, and will default to + # `true` in RSpec 4. + mocks.verify_partial_doubles = true + end + + # This option will default to `:apply_to_host_groups` in RSpec 4 (and will + # have no way to turn it off -- the option exists only for backwards + # compatibility in RSpec 3). It causes shared context metadata to be + # inherited by the metadata hash of host groups and examples, rather than + # triggering implicit auto-inclusion in groups with matching metadata. + config.shared_context_metadata_behavior = :apply_to_host_groups + + # The settings below are suggested to provide a good initial experience + # with RSpec, but feel free to customize to your heart's content. + + # This allows you to limit a spec run to individual examples or groups + # you care about by tagging them with `:focus` metadata. When nothing + # is tagged with `:focus`, all examples get run. RSpec also provides + # aliases for `it`, `describe`, and `context` that include `:focus` + # metadata: `fit`, `fdescribe` and `fcontext`, respectively. + config.filter_run_when_matching :focus + + # Allows RSpec to persist some state between runs in order to support + # the `--only-failures` and `--next-failure` CLI options. We recommend + # you configure your source control system to ignore this file. + config.example_status_persistence_file_path = 'rspec_examples.txt' + + # Limits the available syntax to the non-monkey patched syntax that is + # recommended. For more details, see: + # https://rspec.info/features/3-12/rspec-core/configuration/zero-monkey-patching-mode/ + config.disable_monkey_patching! + + # This setting enables warnings. It's recommended, but in some cases may + # be too noisy due to issues in dependencies. + config.warnings = true + + # Many RSpec users commonly either run the entire suite or an individual + # file, and it's useful to allow more verbose output when running an + # individual spec file. + if config.files_to_run.one? + # Use the documentation formatter for detailed output, + # unless a formatter has already been configured + # (e.g. via a command-line flag). + config.default_formatter = 'doc' + end + + # Print the 10 slowest examples and example groups at the + # end of the spec run, to help surface which specs are running + # particularly slow. + config.profile_examples = 10 + + # Run specs in random order to surface order dependencies. If you find an + # order dependency and want to debug it, you can fix the order by providing + # the seed, which is printed after each run. + # --seed 1234 + config.order = :random + + # Seed global randomization in this process using the `--seed` CLI option. + # Setting this allows you to use `--seed` to deterministically reproduce + # test failures related to randomization by passing the same `--seed` value + # as the one that triggered the failure. + Kernel.srand config.seed + + config.expose_dsl_globally = true +end + +require 'active_support/all' +require 'rspec'