diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f42554..58a60ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,31 @@ # nextflow-io/nf-validation: Changelog +# Version 2.0.0dev + +:warning: This version contains a number of breaking changes. Please read the changelog carefully before upgrading. :warning: + +To migrate your schemas please follow the [migration guide](https://nextflow-io.github.io/nf-validation/latest/migration_guide/) + +## New features + +- Added the `uniqueEntries` keyword. This keyword takes a list of strings corresponding to names of fields that need to be a unique combination. e.g. `uniqueEntries: ['sample', 'replicate']` will make sure that the combination of the `sample` and `replicate` fields is unique. ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) + +## Changes + +- Changed the used draft for the schema from `draft-07` to `draft-2020-12`. See the [2019-09](https://json-schema.org/draft/2019-09/release-notes) and [2020-12](https://json-schema.org/draft/2020-12/release-notes) release notes for all changes ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) +- Removed all validation code from the `.fromSamplesheet()` channel factory. The validation is now solely done in the `validateParameters()` function. A custom error message will now be displayed if any error has been encountered during the conversion ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) +- Removed the `unique` keyword from the samplesheet schema. You should now use [`uniqueItems`](https://json-schema.org/understanding-json-schema/reference/array#uniqueItems) or `uniqueEntries` instead ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) +- Removed the `skip_duplicate_check` option from the `fromSamplesheet()` channel factory and the `--validationSkipDuplicateCheck` parameter. You should now use the `uniqueEntries` or [`uniqueItems`](https://json-schema.org/understanding-json-schema/reference/array#uniqueItems) keywords in the schema instead ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) +- `.fromSamplesheet()` now does dynamic typecasting instead of using the `type` fields in the JSON schema. This is done due to the complexity of `draft-2020-12` JSON schemas. This should not have that much impact but keep in mind that some types can be different between this and earlier versions because of this ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) +- `.fromSamplesheet()` will now set all missing values as `[]` instead of the type specific defaults (because of the changes in the previous point). This should not change that much as this will also result in `false` when used in conditions. ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) + +## Improvements + +- Setting the `exists` keyword to `false` will now check if the path does not exist ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) +- The `schema` keyword will now work in all schemas. ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) +- Improved the error messages ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) +- `.fromSamplesheet()` now supports deeply nested samplesheets ([#141](https://github.com/nextflow-io/nf-validation/pull/141)) + # Version 1.1.3 - Asahikawa ## Improvements diff --git a/README.md b/README.md index f2d611e..6fb3aad 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ This [Nextflow plugin](https://www.nextflow.io/docs/latest/plugins.html#plugins) - 📋 Validate the contents of supplied sample sheet files - 🛠️ Create a Nextflow channel with a parsed sample sheet -Supported sample sheet formats are CSV, TSV and YAML (simple). +Supported sample sheet formats are CSV, TSV, JSON and YAML. ## Quick Start @@ -31,7 +31,7 @@ This is all that is needed - Nextflow will automatically fetch the plugin code a > [!NOTE] > The snippet above will always try to install the latest version, good to make sure > that the latest bug fixes are included! However, this can cause difficulties if running -> offline. You can pin a specific release using the syntax `nf-validation@0.3.2` +> offline. You can pin a specific release using the syntax `nf-validation@2.0.0` You can now include the plugin helper functions into your Nextflow pipeline: @@ -58,7 +58,7 @@ ch_input = Channel.fromSamplesheet("input") ## Dependencies - Java 11 or later -- +- ## Slack channel @@ -75,3 +75,4 @@ We would like to thank the key contributors who include (but are not limited to) - Nicolas Vannieuwkerke ([@nvnieuwk](https://github.com/nvnieuwk)) - Kevin Menden ([@KevinMenden](https://github.com/KevinMenden)) - Phil Ewels ([@ewels](https://github.com/ewels)) +- Arthur ([@awgymer](https://github.com/awgymer)) diff --git a/docs/migration_guide.md b/docs/migration_guide.md new file mode 100644 index 0000000..01864c2 --- /dev/null +++ b/docs/migration_guide.md @@ -0,0 +1,68 @@ +--- +title: Migration guide +description: Guide to migrate pipelines using nf-validation pre v2.0.0 to after v2.0.0 +hide: + - toc +--- + +# Migration guide + +This guide is intended to help you migrate your pipeline from older versions of the plugin to version 2.0.0 and later. + +## Major changes in the plugin + +Following list shows the major breaking changes introduced in version 2.0.0: + +1. The JSON schema draft has been updated from `draft-07` to `draft-2020-12`. See [JSON Schema draft 2020-12 release notes](https://json-schema.org/draft/2020-12/release-notes) and [JSON schema draft 2019-09 release notes](https://json-schema.org/draft/2019-09/release-notes) for more information. +2. The `unique` keyword for samplesheet schemas has been removed. Please use [`uniqueItems`](https://json-schema.org/understanding-json-schema/reference/array#uniqueItems) or [`uniqueEntries`](nextflow_schema/nextflow_schema_specification.md#uniqueentries) now instead. +3. The `dependentRequired` keyword now works as it's supposed to work in JSON schema. See [`dependentRequired`](https://json-schema.org/understanding-json-schema/reference/conditionals#dependentRequired) for more information + +A full list of changes can be found in the [changelog](../CHANGELOG.md). + +## Updating your pipeline + +If you aren't using any special features in your schemas, you can simply update your `nextflow_schema.json` file using the following command: + +```bash +sed -i -e 's/http:\/\/json-schema.org\/draft-07\/schema/https:\/\/json-schema.org\/draft\/2020-12\/schema/g' -e 's/definitions/defs/g' nextflow_schema.json +``` + +This will replace the old schema draft specification (`draft-07`) by the new one (`2020-12`), and the old keyword `definitions` by the new notation `defs`. + +!!! note +Repeat this command for every JSON schema you use in your pipeline. e.g. for the default samplesheet schema: +`bash sed -i -e 's/http:\/\/json-schema.org\/draft-07\/schema/https:\/\/json-schema.org\/draft\/2020-12\/schema/g' -e 's/definitions/defs/g' assets/schema_input.json ` + +If you are using any special features in your schemas, you will need to update your schemas manually. Please refer to the [JSON Schema draft 2020-12 release notes](https://json-schema.org/draft/2020-12/release-notes) and [JSON schema draft 2019-09 release notes](https://json-schema.org/draft/2019-09/release-notes) for more information. + +However here are some guides to the more common migration patterns: + +### Updating `unique` keyword + +When you use `unique` in your schemas, you should update it to use `uniqueItems` or `uniqueEntries` instead. + +If you used the `unique:true` field, you should update it to use `uniqueItems` like this: + +=== "Before v2.0" +`json hl_lines="9" { "$schema": "http://json-schema.org/draft-07/schema", "type": "array", "items": { "type": "object", "properties": { "sample": { "type": "string", "unique": true } } } } ` + +=== "After v2.0" +`json hl_lines="12" { "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "array", "items": { "type": "object", "properties": { "sample": { "type": "string" } } }, "uniqueItems": true } ` + +If you used the `unique: ["field1", "field2"]` field, you should update it to use `uniqueEntries` like this: + +=== "Before v2.0" +`json hl_lines="9" { "$schema": "http://json-schema.org/draft-07/schema", "type": "array", "items": { "type": "object", "properties": { "sample": { "type": "string", "unique": ["sample"] } } } } ` + +=== "After v2.0" +`json hl_lines="12" { "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "array", "items": { "type": "object", "properties": { "sample": { "type": "string" } } }, "uniqueEntries": ["sample"] } ` + +### Updating `dependentRequired` keyword + +When you use `dependentRequired` in your schemas, you should update it like this: + +=== "Before v2.0" +`json hl_lines="12" { "$schema": "http://json-schema.org/draft-07/schema", "type": "object", "properties": { "fastq_1": { "type": "string", "format": "file-path" }, "fastq_2": { "type": "string", "format": "file-path" "dependentRequired": ["fastq_1"] } } } ` + +=== "After v2.0" +`json hl_lines="14 15 16" { "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object", "properties": { "fastq_1": { "type": "string", "format": "file-path" }, "fastq_2": { "type": "string", "format": "file-path" } }, "dependentRequired": { "fastq_2": ["fastq_1"] } } ` diff --git a/docs/nextflow_schema/create_schema.md b/docs/nextflow_schema/create_schema.md index 44a311f..14316f0 100644 --- a/docs/nextflow_schema/create_schema.md +++ b/docs/nextflow_schema/create_schema.md @@ -46,6 +46,15 @@ go to the pipeline root and run the following: nf-core schema build ``` +!!! warning + + The current version of `nf-core` tools (v2.12.1) does not support the new schema draft used in `nf-validation`. Running this command after building the schema will convert the schema to the right draft: + + ```bash + sed -i -e 's/http:\/\/json-schema.org\/draft-07\/schema/https:\/\/json-schema.org\/draft\/2020-12\/schema/g' -e 's/definitions/defs/g' nextflow_schema.json + ``` + A new version of the nf-core schema builder will be available soon. Keep an eye out! + The tool will run the `nextflow config` command to extract your pipeline's configuration and compare the output to your `nextflow_schema.json` file (if it exists). It will prompt you to update the schema file with any changes, then it will ask if you diff --git a/docs/nextflow_schema/nextflow_schema_specification.md b/docs/nextflow_schema/nextflow_schema_specification.md index 586dd11..6375679 100644 --- a/docs/nextflow_schema/nextflow_schema_specification.md +++ b/docs/nextflow_schema/nextflow_schema_specification.md @@ -30,24 +30,24 @@ You can find more information about JSON Schema here: ## Definitions -A slightly strange use of a JSON schema standard that we use for Nextflow schema is `definitions`. +A slightly strange use of a JSON schema standard that we use for Nextflow schema is `defs`. JSON schema can group variables together in an `object`, but then the validation expects this structure to exist in the data that it is validating. In reality, we have a very long "flat" list of parameters, all at the top level of `params.foo`. -In order to give some structure to log outputs, documentation and so on, we group parameters into `definitions`. -Each `definition` is an object with a title, description and so on. -However, as they are under `definitions` scope they are effectively ignored by the validation and so their nested nature is not a problem. +In order to give some structure to log outputs, documentation and so on, we group parameters into `defs`. +Each `def` is an object with a title, description and so on. +However, as they are under `defs` scope they are effectively ignored by the validation and so their nested nature is not a problem. We then bring the contents of each definition object back to the "flat" top level for validation using a series of `allOf` statements at the end of the schema, which reference the specific definition keys. ```json { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object", // Definition groups - "definitions": { // (1)! + "defs": { // (1)! "my_group_of_params": { // (2)! "title": "A virtual grouping used for docs and pretty-printing", "type": "object", @@ -64,7 +64,7 @@ which reference the specific definition keys. }, // Contents of each definition group brought into main schema for validation "allOf": [ - { "$ref": "#/definitions/my_group_of_params" } // (6)! + { "$ref": "#/defs/my_group_of_params" } // (6)! ] } ``` @@ -77,7 +77,7 @@ which reference the specific definition keys. 5. Shortened here for the example, see below for full parameter specification. 6. A `$ref` line like this needs to be added for every definition group -Parameters can be described outside of the `definitions` scope, in the regular JSON Schema top-level `properties` scope. +Parameters can be described outside of the `defs` scope, in the regular JSON Schema top-level `properties` scope. However, they will be displayed as ungrouped in tools working off the schema. ## Nested parameters @@ -115,8 +115,7 @@ Any parameters that _must_ be specified should be set as `required` in the schem !!! tip - Make sure you do not set a default value for the parameter, as then it will have - a value even if not supplied by the pipeline user and the required property will have no effect. + Make sure you do set `null` as a default value for the parameter, otherwise it will have a value even if not supplied by the pipeline user and the required property will have no effect. This is not done with a property key like other things described below, but rather by naming the parameter in the `required` array in the definition object / top-level object. @@ -164,13 +163,13 @@ Variable type, taken from the [JSON schema keyword vocabulary](https://json-sche - `number` (float) - `integer` - `boolean` (true / false) +- `object` (currently only supported for file validation, see [Nested paramters](#nested-parameters)) +- `array` (currently only supported for file validation, see [Nested paramters](#nested-parameters)) Validation checks that the supplied parameter matches the expected type, and will fail with an error if not. -These JSON schema types are _not_ supported (see [Nested paramters](#nested-parameters)): +This JSON schema type is _not_ supported: -- `object` -- `array` - `null` ### `default` @@ -223,7 +222,7 @@ If validation fails, this `errorMessage` is printed instead, and the raw JSON sc For example, instead of printing: ``` -ERROR ~ * --input: string [samples.yml] does not match pattern ^\S+\.csv$ (samples.yml) +* --input (samples.yml): "samples.yml" does not match regular expression [^\S+\.csv$] ``` We can set @@ -239,9 +238,21 @@ We can set and get: ``` -ERROR ~ * --input: File name must end in '.csv' cannot contain spaces (samples.yml) +* --input (samples.yml): File name must end in '.csv' cannot contain spaces ``` +### `deprecated` + +!!! example "Extended key" + +A boolean JSON flag that instructs anything using the schema that this parameter/field is deprecated and should not be used. This can be useful to generate messages telling the user that a parameter has changed between versions. + +JSON schema states that this is an informative key only, but in `nf-validation` this will cause a validation error if the parameter/field is used. + +!!! tip + + Using the [`errorMessage`](#errormessage) keyword can be useful to provide more information about the deprecation and what to use instead. + ### `enum` An array of enumerated values: the parameter must match one of these values exactly to pass validation. @@ -325,11 +336,6 @@ Formats can be used to give additional validation checks against `string` values The `format` key is a [standard JSON schema key](https://json-schema.org/understanding-json-schema/reference/string.html#format), however we primarily use it for validating file / directory path operations with non-standard schema values. -!!! note - - In addition to _validating_ the strings as the provided format type, nf-validation also _coerces_ the parameter variable type. - That is: if the schema defines `params.input` as a `file-path`, nf-validation will convert the parameter from a `String` into a `Nextflow.File`. - Example usage is as follows: ```json @@ -342,7 +348,7 @@ Example usage is as follows: The available `format` types are below: `file-path` -: States that the provided value is a file. Does not check its existence, but it does check that the path is not a directory. +: States that the provided value is a file. Does not check its existence, but it does check if the path is not a directory. `directory-path` : States that the provided value is a directory. Does not check its existence, but if it exists, it does check that the path is not a file. @@ -351,11 +357,11 @@ The available `format` types are below: : States that the provided value is a path (file or directory). Does not check its existence. `file-path-pattern` -: States that the provided value is a globbing pattern that will be used to fetch files. Checks that the pattern is valid and that at least one file is found. +: States that the provided value is a glob pattern that will be used to fetch files. Checks that the pattern is valid and that at least one file is found. ### `exists` -When a format is specified for a value, you can provide the key `exists` set to true in order to validate that the provided path exists. +When a format is specified for a value, you can provide the key `exists` set to true in order to validate that the provided path exists. Set this to `false` to validate that the path does not exist. Example usage is as follows: @@ -367,18 +373,9 @@ Example usage is as follows: } ``` -!!! note - - If `exists` is set to `false`, this validation is ignored. Does not check if the path exists. - -!!! note - - If the parameter is set to `null`, `false` or an empty string, this validation is ignored. It does not check if the path exists. - !!! note If the parameter is an S3 URL path, this validation is ignored. - Use `--validationS3PathCheck` or set `params.validationS3PathCheck = true` to validate them. ### `mimetype` @@ -404,8 +401,7 @@ Should only be set when `format` is `file-path`. !!! tip - Setting this field is key to working with sample sheet validation and channel generation, - as described in the next section of the nf-validation docs. + Setting this field is key to working with sample sheet validation and channel generation, as described in the next section of the nf-validation docs. These schema files are typically stored in the pipeline `assets` directory, but can be anywhere. @@ -448,3 +444,41 @@ Specify a minimum / maximum value for an integer or float number length with `mi The JSON schema doc also mention `exclusiveMinimum`, `exclusiveMaximum` and `multipleOf` keys. Because nf-validation uses stock JSON schema validation libraries, these _should_ work for validating keys. However, they are not officially supported within the Nextflow schema ecosystem and so some interfaces may not recognise them. + +## Array-specific keys + +### `uniqueItems` + +All items in the array should be unique. + +- See the [JSON schema docs](https://json-schema.org/understanding-json-schema/reference/array#uniqueItems) + for details. + +```json +{ + "type": "array", + "uniqueItems": true +} +``` + +### `uniqueEntries` + +!!! example "Non-standard key" + +The combination of all values in the given keys should be unique. For this key to work you need to make sure the array items are of type `object` and contains the keys in the `uniqueEntries` list. + +```json +{ + "type": "array", + "items": { + "type": "object", + "uniqueEntries": ["foo", "bar"], + "properties": { + "foo": { "type": "string" }, + "bar": { "type": "string" } + } + } +} +``` + +This schema tells `nf-validation` that the combination of `foo` and `bar` should be unique across all objects in the array. diff --git a/docs/nextflow_schema/sample_sheet_schema_examples.md b/docs/nextflow_schema/sample_sheet_schema_examples.md index 95595db..a28ce44 100644 --- a/docs/nextflow_schema/sample_sheet_schema_examples.md +++ b/docs/nextflow_schema/sample_sheet_schema_examples.md @@ -20,7 +20,7 @@ You can see this, used for validating sample sheets with `--input` here: [`asset ```json { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/rnaseq/master/assets/schema_input.json", "title": "nf-core/rnaseq pipeline - params.input schema", "description": "Schema for the file provided with params.input", @@ -42,17 +42,9 @@ You can see this, used for validating sample sheets with `--input` here: [`asset }, "fastq_2": { "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "format": "file-path" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "format": "file-path" }, "strandedness": { "type": "string", diff --git a/docs/nextflow_schema/sample_sheet_schema_specification.md b/docs/nextflow_schema/sample_sheet_schema_specification.md index 86a44e5..db0afd1 100644 --- a/docs/nextflow_schema/sample_sheet_schema_specification.md +++ b/docs/nextflow_schema/sample_sheet_schema_specification.md @@ -18,21 +18,17 @@ Validation by the plugin works by parsing the supplied file contents into a groo then passing this to the JSON schema validation library. As such, the structure of the schema must match the structure of the parsed file. -Typically, sample sheets are CSV files, with fields represented as columns and samples as rows. TSV and simple unnested YAML files are also supported by the plugin. - -!!! warning - - Nested YAML files can be validated with the `validateParameters()` function, but cannot be converted to a channel with `.fromSamplesheet()`. +Typically, samplesheets are CSV files, with fields represented as columns and samples as rows. TSV, JSON and YAML samplesheets are also supported by this plugin In this case, the parsed object will be an `array` (see [JSON schema docs](https://json-schema.org/understanding-json-schema/reference/array.html#items)). The array type is associated with an `items` key which in our case contains a single `object`. The object has `properties`, where the keys must match the headers of the CSV file. -So, for CSV sample sheets, the top-level schema should look something like this: +So, for CSV samplesheets, the top-level schema should look something like this: ```json { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "array", "items": { "type": "object", @@ -44,7 +40,7 @@ So, for CSV sample sheets, the top-level schema should look something like this: } ``` -If your sample sheet has a different format (for example, a simple YAML file), +If your sample sheet has a different format (for example, a nested YAML file), you will need to build your schema to match the parsed structure. ## Properties @@ -75,10 +71,8 @@ Please refer to the [Nextflow schema specification](../nextflow_schema/nextflow_ !!! tip Sample sheets are commonly used to define input file paths. - Be sure to set `"type": "string"` and `"format": "file-path"` for these properties, - so that nf-validation correctly returns this sample sheet field as a `Nextflow.file` object. - -When using the `file-path-pattern` format for a globbing pattern, a list will be created with all files found by the globbing pattern. See [here](../nextflow_schema/nextflow_schema_specification.md#file-path-pattern) for more information. + Be sure to set `"type": "string"`, `exists: true`, `"format": "file-path"` and `"schema":"path/to/samplesheet/schema.json"` for these properties, + so that samplesheets are correctly validated and `fromSamplesheet` does not result in any errors. ## Sample sheet keys @@ -87,111 +81,17 @@ These exist in addition to those described in the [Nextflow schema specification ### `meta` -Type: `List` +Type: `List` or `String` -The current field will be considered a meta value when this parameter is present. This parameter should contain a list of the meta fields to assign this value to. The default is no meta for each field. +The current field will be considered a meta value when this parameter is present. This parameter should contain a list of the meta fields or a string stating a single meta field to assign this value to. The default is no meta for each field. For example: ```json { - "meta": ["id", "sample"] + "meta": "id" } ``` -will convert the `field` value to a meta value, resulting in the channel `[[id:value, sample:value]...]` +will convert the `field` value to a meta value, resulting in the channel `[[id:value]...]` See [here](https://github.com/nextflow-io/nf-validation/blob/ce3aef60e5103ea4798375fe6c59bae41b7d2a25/plugins/nf-validation/src/testResources/schema_input.json#L10-L25) for an example in the sample sheet. - -### `unique` - -Type: `Boolean` or `List` - -Whether or not the field should contain a unique value over the entire sample sheet. - -Default: `false` - -- Can be `true`, in which case the value for this field should be unique for all samples in the sample sheet. -- Can be supplied with a list of field names, containing _other field names_ that should be unique _in combination with_ the current field. - -!!! example - - Consider the following example: - - ```json - "properties": { - "field1": { "unique": true }, - "field2": { "unique": ["field1"] } - } - ``` - - `field1` needs to be unique in this example. `field2` needs to be unique in combination with `field1`. So for a sample sheet like this: - - ```csv linenums="1" - field1,field2 - value1,value2 - value1,value3 - value1,value2 - ``` - - ..both checks will fail. - - * `field1` isn't unique since `value1` has been found more than once. - * `field2` isn't unique in combination with `field1` because the `value1,value2` combination has been found more than once. - - See [`schema_input.json#L48-L55`](https://github.com/nextflow-io/nf-validation/blob/ce3aef60e5103ea4798375fe6c59bae41b7d2a25/plugins/nf-validation/src/testResources/schema_input.json#L48-L55) - for an example in one of the plugin test-fixture sample sheets. - -### `deprecated` - -Type: `Boolean` - -A boolean variable stating that the field is deprecated and will be removed in the nearby future. This will throw a warning to the user that the current field is deprecated. The default value is `false`. - -Example: - -```json -"field": { - "deprecated": true -} -``` - -will show a warning stating that the use of `field` is deprecated: - -```console -The 'field' field is deprecated and -will no longer be used in the future. -Please check the official documentation -of the pipeline for more information. -``` - -### `dependentRequired` - -Type: `List` - -- See [JSON Schema docs](https://json-schema.org/understanding-json-schema/reference/conditionals.html#dependentrequired) - -A list containing names of other fields. The validator will check if these fields are filled in and throw an error if they aren't, but only when the field `dependentRequired` belongs to is filled in. - -!!! example - - ```json - "field1": { - "dependentRequired": ["field2"] - }, - "field2": {} - ``` - - will check if `field2` is given when `field1` has a value. So for example: - - ```csv linenums="1" - field1,field2 - value1,value2 - value1, - ,value2 - ``` - - - [x] The first row will pass the check because both fields are set. - - [ ] The second row will fail because `field1` is set, but `field2` isn't and `field1` is dependent on `field2`. - - [x] The third row will pass the check because `field1` isn't set. - - See [here](https://github.com/nextflow-io/nf-validation/blob/ce3aef60e5103ea4798375fe6c59bae41b7d2a25/plugins/nf-validation/src/testResources/schema_input.json#L10-L25) for an example in the sample sheet. diff --git a/docs/parameters/summary_log.md b/docs/parameters/summary_log.md index 98cc213..d9e91a2 100644 --- a/docs/parameters/summary_log.md +++ b/docs/parameters/summary_log.md @@ -10,7 +10,7 @@ This function returns a string that can be logged to the terminal, summarizing t !!! note - The summary prioritizes displaying only the parameters that are **different** the default schema values. + The summary prioritizes displaying only the parameters that are **different** than the default schema values. Parameters which don't have a default in the JSON Schema and which have a value of `null`, `""`, `false` or `'false'` won't be returned in the map. This is to streamline the extensive parameter lists often associated with pipelines, and highlight the customized elements. This feature is essential for users to verify their configurations, like checking for typos or confirming proper resolution, diff --git a/docs/parameters/validation.md b/docs/parameters/validation.md index 25a275f..1627ce0 100644 --- a/docs/parameters/validation.md +++ b/docs/parameters/validation.md @@ -129,7 +129,7 @@ Sometimes, a parameter that you want to set may not be described in the pipeline Maybe it's something you're using in your Nextflow configuration setup for your compute environment, or it's a complex parameter that cannot be handled in the schema, such as [nested parameters](../nextflow_schema/nextflow_schema_specification.md#nested-parameters). -In these cases, to avoid getting warnings when that unrecognised parameter is set, +In these cases, to avoid getting warnings when an unrecognised parameter is set, you can use `--validationSchemaIgnoreParams` / `params.validationSchemaIgnoreParams`. This should be a comma-separated list of strings that correspond to parameter names. @@ -147,8 +147,6 @@ For example, providing an integer as a string will no longer fail validation. The validation does not affect the parameter variable types in your pipeline. It attempts to cast a temporary copy of the params only, during the validation step. - You can find more information about how this works in the [JSON schema validation library docs](https://github.com/everit-org/json-schema#lenient-mode). - To enable lenient validation mode, set `params.validationLenientMode`: ```bash diff --git a/docs/samplesheets/examples.md b/docs/samplesheets/examples.md index ba90bb6..5c11ce4 100644 --- a/docs/samplesheets/examples.md +++ b/docs/samplesheets/examples.md @@ -12,12 +12,12 @@ Understanding channel structure and manipulation is critical for getting the mos ### Glossary - A channel is the Nextflow object, referenced in the code -- An item is each thing passing through the channel, equivalent to one row in the samplesheet +- An item is each thing passing through the channel, equivalent to one row in the sample sheet - An element is each thing in the item, e.g., the meta value, fastq_1 etc. It may be a file or value ## Default mode -Each item in the channel emitted by `.fromSamplesheet()` is a flat tuple, corresponding with each row of the samplesheet. Each item will be composed of a meta value (if present) and any additional elements from columns in the samplesheet, e.g.: +Each item in the channel emitted by `.fromSamplesheet()` is a tuple, corresponding with each row of the sample sheet. Each item will be composed of a meta value (if present) and any additional elements from columns in the sample sheet, e.g.: ```csv sample,fastq_1,fastq_2,bed @@ -33,7 +33,7 @@ Might create a channel where each element consists of 4 items, a map value follo // Resulting in: [ [ id: "sample" ], fastq1.R1.fq.gz, fastq1.R2.fq.gz, sample1.bed] -[ [ id: "sample2" ], fastq2.R1.fq.gz, fastq2.R2.fq.gz, [] ] // A missing value from the samplesheet is an empty list +[ [ id: "sample2" ], fastq2.R1.fq.gz, fastq2.R2.fq.gz, [] ] // A missing value from the sample sheet is an empty list ``` This channel can be used as input of a process where the input declaration is: @@ -44,22 +44,17 @@ tuple val(meta), path(fastq_1), path(fastq_2), path(bed) It may be necessary to manipulate this channel to fit your process inputs. For more documentation, check out the [Nextflow operator docs](https://www.nextflow.io/docs/latest/operator.html), however here are some common use cases with `.fromSamplesheet()`. -## Using a samplesheet with no headers +## Using a sample sheet with no headers -Sometimes you only have one possible input in the pipeline samplesheet. In this case it doesn't make sense to have a header in the samplesheet. This can be done by creating a samplesheet with an empty string as input key: +Sometimes you only have one possible input in the pipeline sample sheet. In this case it doesn't make sense to have a header in the sample sheet. This can be done by removing the `properties` section from the sample sheet and changing the type of the element from `object` the desired type: ```json { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "description": "Schema for the file provided with params.input", "type": "array", "items": { - "type": "object", - "properties": { - "": { - "type": "string" - } - } + "type": "string" } } ``` @@ -81,15 +76,15 @@ or this YAML file: The output of `.fromSamplesheet()` will look like this: ```bash -[test_1] -[test_2] +test_1 +test_2 ``` ## Changing the structure of channel items -Each item in the channel will be a flat tuple, but some processes will use multiple files as a list in their input channel, this is common in nf-core modules. For example, consider the following input declaration in a process, where FASTQ could be > 1 file: +Each item in the channel will be a tuple, but some processes will use multiple files as a list in their input channel, this is common in nf-core modules. For example, consider the following input declaration in a process, where FASTQ could be > 1 file: -```nextflow +```groovy process ZCAT_FASTQS { input: tuple val(meta), path(fastq) @@ -102,13 +97,13 @@ process ZCAT_FASTQS { The output of `.fromSamplesheet()` can be used by default with a process with the following input declaration: -```nextflow +```groovy val(meta), path(fastq_1), path(fastq_2) ``` To manipulate each item within a channel, you should use the [Nextflow `.map()` operator](https://www.nextflow.io/docs/latest/operator.html#map). This will apply a function to each element of the channel in turn. Here, we convert the flat tuple into a tuple composed of a meta and a list of FASTQ files: -```nextflow +```groovy Channel.fromSamplesheet("input") .map { meta, fastq_1, fastq_2 -> tuple(meta, [ fastq_1, fastq_2 ]) } .set { input } @@ -118,7 +113,7 @@ input.view() // Channel has 2 elements: meta, fastqs This is now compatible with the process defined above and will not raise a warning about input cardinality: -```nextflow +```groovy ZCAT_FASTQS(input) ``` @@ -126,7 +121,7 @@ ZCAT_FASTQS(input) For example, to remove the BED file from the channel created above, we could not return it from the map. Note the absence of the `bed` item in the return of the closure below: -```nextflow +```groovy Channel.fromSamplesheet("input") .map { meta, fastq_1, fastq_2, bed -> tuple(meta, fastq_1, fastq_2) } .set { input } @@ -140,7 +135,7 @@ In this way you can drop items from a channel. We could perform this twice to create one channel containing the FASTQs and one containing the BED files, however Nextflow has a native operator to separate channels called [`.multiMap()`](https://www.nextflow.io/docs/latest/operator.html#multimap). Here, we separate the FASTQs and BEDs into two separate channels using `multiMap`. Note, the channels are both contained in `input` and accessed as an attribute using dot notation: -```nextflow +```groovy Channel.fromSamplesheet("input") .multiMap { meta, fastq_1, fastq_2, bed -> fastq: tuple(meta, fastq_1, fastq_2) @@ -151,7 +146,7 @@ Channel.fromSamplesheet("input") The channel has two attributes, `fastq` and `bed`, which can be accessed separately. -```nextflow +```groovy input.fastq.view() // Channel has 3 elements: meta, fastq_1, fastq_2 input.bed.view() // Channel has 2 elements: meta, bed ``` @@ -164,7 +159,7 @@ You can use the [`.branch()` operator](https://www.nextflow.io/docs/latest/opera This example shows a channel which can have entries for WES or WGS data. WES data includes a BED file denoting the target regions, but WGS data does not. These analysis are different so we want to separate the WES and WGS entries from each other. We can separate the two using `.branch` based on the presence of the BED file: -```nextflow +```groovy // Channel with four elements - see docs for examples params.input = "samplesheet.csv" @@ -208,13 +203,13 @@ It's useful to determine the count of channel entries with similar values when y This example contains a channel where multiple samples can be in the same family. Later on in the pipeline we want to merge the analyzed files so one file gets created for each family. The result will be a channel with an extra meta field containing the count of channel entries with the same family name. -```nextflow +```groovy // channel created by fromSamplesheet() previous to modification: // [[id:example1, family:family1], example1.txt] // [[id:example2, family:family1], example2.txt] // [[id:example3, family:family2], example3.txt] -params.input = "samplesheet.csv" +params.input = "sample sheet.csv" Channel.fromSamplesheet("input") .tap { ch_raw } // Create a copy of the original channel diff --git a/docs/samplesheets/fromSamplesheet.md b/docs/samplesheets/fromSamplesheet.md index c11f187..eec3904 100644 --- a/docs/samplesheets/fromSamplesheet.md +++ b/docs/samplesheets/fromSamplesheet.md @@ -7,9 +7,9 @@ description: Channel factory to create a channel from a sample sheet. ## `fromSamplesheet` -This function validates and converts a samplesheet to a ready-to-use Nextflow channel. This is done using information encoded within a sample sheet schema (see the [docs](../nextflow_schema/sample_sheet_schema_specification.md)). +This function validates and converts a sample sheet to a ready-to-use Nextflow channel. This is done using information encoded within a sample sheet schema (see the [docs](../nextflow_schema/sample_sheet_schema_specification.md)). -The function has one mandatory argument: the name of the parameter which specifies the input samplesheet. The parameter specified must have the format `file-path` and include additional field `schema`: +The function has one mandatory argument: the name of the parameter which specifies the input sample sheet. The parameter specified must have the format `file-path` and include additional field `schema`: ```json hl_lines="4" { @@ -19,28 +19,23 @@ The function has one mandatory argument: the name of the parameter which specifi } ``` -The path specified in the `schema` key determines the JSON used for validation of the samplesheet. +The path specified in the `schema` key determines the JSON used for validation of the sample sheet. -When using the `.fromSamplesheet` channel factory, some additional optional arguments can be used: +When using the `.fromSamplesheet` channel factory, one optional arguments can be used: - `parameters_schema`: File name for the pipeline parameters schema. (Default: `nextflow_schema.json`) -- `skip_duplicate_check`: Skip the checking for duplicates. Can also be skipped with the `--validationSkipDuplicateCheck` parameter. (Default: `false`) ```groovy Channel.fromSamplesheet('input') ``` ```groovy -Channel.fromSamplesheet( - 'input', - parameters_schema: 'custom_nextflow_schema.json', - skip_duplicate_check: false -) +Channel.fromSamplesheet('input', parameters_schema: 'custom_nextflow_schema.json') ``` ## Basic example -In [this example](../../examples/fromSamplesheetBasic/), we create a simple channel from a CSV samplesheet. +In [this example](../../examples/fromSamplesheetBasic/), we create a simple channel from a CSV sample sheet. ``` --8<-- "examples/fromSamplesheetBasic/log.txt" @@ -52,10 +47,10 @@ In [this example](../../examples/fromSamplesheetBasic/), we create a simple chan --8<-- "examples/fromSamplesheetBasic/pipeline/main.nf" ``` -=== "samplesheet.csv" +=== "sample sheet.csv" ```csv - --8<-- "examples/fromSamplesheetBasic/samplesheet.csv" + --8<-- "examples/fromSamplesheetBasic/sample sheet.csv" ``` === "nextflow.config" @@ -82,7 +77,7 @@ In [this example](../../examples/fromSamplesheetBasic/), we create a simple chan !!! danger - It is the order of fields **in the sample sheet JSON schema** which defines the order of items in the channel returned by `fromSamplesheet()`, _not_ the order of fields in the CSV file. + It is the order of fields **in the sample sheet JSON schema** which defines the order of items in the channel returned by `fromSamplesheet()`, _not_ the order of fields in the sample sheet file. ``` --8<-- "examples/fromSamplesheetOrder/log.txt" diff --git a/docs/samplesheets/validate_sample_sheet.md b/docs/samplesheets/validate_sample_sheet.md index fdad579..8a94ff2 100644 --- a/docs/samplesheets/validate_sample_sheet.md +++ b/docs/samplesheets/validate_sample_sheet.md @@ -7,7 +7,7 @@ description: Validate the contents of a sample sheet file. When a parameter provides the `schema` field, the `validateParameters()` function will automatically parse and validate the provided file contents using this JSON schema. -It can validate CSV, TSV and simple YAML files. +It can validate CSV, TSV, JSON and YAML files. The path of the schema file must be relative to the root of the pipeline directory. See an example in the `input` field from the [example schema.json](https://raw.githubusercontent.com/nextflow-io/nf-validation/master/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet.json#L20). @@ -26,4 +26,4 @@ See an example in the `input` field from the [example schema.json](https://raw.g } ``` -For more information about the samplesheet JSON schema refer to [samplesheet docs](../nextflow_schema/nextflow_schema_specification.md). +For more information about the sample sheet JSON schema refer to [sample sheet docs](../nextflow_schema/nextflow_schema_specification.md). diff --git a/docs/schema_input.json b/docs/schema_input.json index 2673539..a3c9208 100644 --- a/docs/schema_input.json +++ b/docs/schema_input.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nextflow-io/nf-validation/master/plugins/nf-validation/src/testResources/schema_input.json", "title": "Samplesheet validation schema", "description": "Schema for the samplesheet used in this pipeline", diff --git a/examples/fromSamplesheetBasic/pipeline/assets/schema_input.json b/examples/fromSamplesheetBasic/pipeline/assets/schema_input.json index 06b0bb1..d99e614 100644 --- a/examples/fromSamplesheetBasic/pipeline/assets/schema_input.json +++ b/examples/fromSamplesheetBasic/pipeline/assets/schema_input.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-validation/example/master/assets/schema_input.json", "title": "nf-validation example - params.input schema", "description": "Schema for the file provided with params.input", diff --git a/examples/fromSamplesheetBasic/pipeline/nextflow.config b/examples/fromSamplesheetBasic/pipeline/nextflow.config index c06ab6b..efbc97a 100644 --- a/examples/fromSamplesheetBasic/pipeline/nextflow.config +++ b/examples/fromSamplesheetBasic/pipeline/nextflow.config @@ -1,5 +1,5 @@ plugins { - id 'nf-validation@0.2.1' + id 'nf-validation@2.0.0' } params { diff --git a/examples/fromSamplesheetBasic/pipeline/nextflow_schema.json b/examples/fromSamplesheetBasic/pipeline/nextflow_schema.json index 3efe8c4..6096ceb 100644 --- a/examples/fromSamplesheetBasic/pipeline/nextflow_schema.json +++ b/examples/fromSamplesheetBasic/pipeline/nextflow_schema.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", "title": "nf-core/testpipeline pipeline parameters", "description": "this is a test", "type": "object", - "definitions": { + "defs": { "input_output_options": { "title": "Input/output options", "type": "object", @@ -33,7 +33,7 @@ }, "allOf": [ { - "$ref": "#/definitions/input_output_options" + "$ref": "#/defs/input_output_options" } ] } diff --git a/examples/fromSamplesheetMeta/pipeline/assets/schema_input.json b/examples/fromSamplesheetMeta/pipeline/assets/schema_input.json index a0d8a02..78b9c00 100644 --- a/examples/fromSamplesheetMeta/pipeline/assets/schema_input.json +++ b/examples/fromSamplesheetMeta/pipeline/assets/schema_input.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-validation/example/master/assets/schema_input.json", "title": "nf-validation example - params.input schema", "description": "Schema for the file provided with params.input", diff --git a/examples/fromSamplesheetMeta/pipeline/nextflow.config b/examples/fromSamplesheetMeta/pipeline/nextflow.config index c06ab6b..efbc97a 100644 --- a/examples/fromSamplesheetMeta/pipeline/nextflow.config +++ b/examples/fromSamplesheetMeta/pipeline/nextflow.config @@ -1,5 +1,5 @@ plugins { - id 'nf-validation@0.2.1' + id 'nf-validation@2.0.0' } params { diff --git a/examples/fromSamplesheetMeta/pipeline/nextflow_schema.json b/examples/fromSamplesheetMeta/pipeline/nextflow_schema.json index 3efe8c4..6096ceb 100644 --- a/examples/fromSamplesheetMeta/pipeline/nextflow_schema.json +++ b/examples/fromSamplesheetMeta/pipeline/nextflow_schema.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", "title": "nf-core/testpipeline pipeline parameters", "description": "this is a test", "type": "object", - "definitions": { + "defs": { "input_output_options": { "title": "Input/output options", "type": "object", @@ -33,7 +33,7 @@ }, "allOf": [ { - "$ref": "#/definitions/input_output_options" + "$ref": "#/defs/input_output_options" } ] } diff --git a/examples/fromSamplesheetOrder/pipeline/assets/schema_input.json b/examples/fromSamplesheetOrder/pipeline/assets/schema_input.json index 8d1ea9d..9e0f28e 100644 --- a/examples/fromSamplesheetOrder/pipeline/assets/schema_input.json +++ b/examples/fromSamplesheetOrder/pipeline/assets/schema_input.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-validation/example/master/assets/schema_input.json", "title": "nf-validation example - params.input schema", "description": "Schema for the file provided with params.input", diff --git a/examples/fromSamplesheetOrder/pipeline/nextflow.config b/examples/fromSamplesheetOrder/pipeline/nextflow.config index c06ab6b..efbc97a 100644 --- a/examples/fromSamplesheetOrder/pipeline/nextflow.config +++ b/examples/fromSamplesheetOrder/pipeline/nextflow.config @@ -1,5 +1,5 @@ plugins { - id 'nf-validation@0.2.1' + id 'nf-validation@2.0.0' } params { diff --git a/examples/fromSamplesheetOrder/pipeline/nextflow_schema.json b/examples/fromSamplesheetOrder/pipeline/nextflow_schema.json index 3efe8c4..6096ceb 100644 --- a/examples/fromSamplesheetOrder/pipeline/nextflow_schema.json +++ b/examples/fromSamplesheetOrder/pipeline/nextflow_schema.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", "title": "nf-core/testpipeline pipeline parameters", "description": "this is a test", "type": "object", - "definitions": { + "defs": { "input_output_options": { "title": "Input/output options", "type": "object", @@ -33,7 +33,7 @@ }, "allOf": [ { - "$ref": "#/definitions/input_output_options" + "$ref": "#/defs/input_output_options" } ] } diff --git a/examples/paramsHelp/pipeline/nextflow.config b/examples/paramsHelp/pipeline/nextflow.config index 0e1bf1f..c907af1 100644 --- a/examples/paramsHelp/pipeline/nextflow.config +++ b/examples/paramsHelp/pipeline/nextflow.config @@ -1,5 +1,5 @@ plugins { - id 'nf-validation@0.2.1' + id 'nf-validation@2.0.0' } params { diff --git a/examples/paramsHelp/pipeline/nextflow_schema.json b/examples/paramsHelp/pipeline/nextflow_schema.json index 3efe8c4..6096ceb 100644 --- a/examples/paramsHelp/pipeline/nextflow_schema.json +++ b/examples/paramsHelp/pipeline/nextflow_schema.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", "title": "nf-core/testpipeline pipeline parameters", "description": "this is a test", "type": "object", - "definitions": { + "defs": { "input_output_options": { "title": "Input/output options", "type": "object", @@ -33,7 +33,7 @@ }, "allOf": [ { - "$ref": "#/definitions/input_output_options" + "$ref": "#/defs/input_output_options" } ] } diff --git a/examples/paramsSummaryLog/pipeline/nextflow.config b/examples/paramsSummaryLog/pipeline/nextflow.config index 0e1bf1f..c907af1 100644 --- a/examples/paramsSummaryLog/pipeline/nextflow.config +++ b/examples/paramsSummaryLog/pipeline/nextflow.config @@ -1,5 +1,5 @@ plugins { - id 'nf-validation@0.2.1' + id 'nf-validation@2.0.0' } params { diff --git a/examples/paramsSummaryLog/pipeline/nextflow_schema.json b/examples/paramsSummaryLog/pipeline/nextflow_schema.json index 3efe8c4..6096ceb 100644 --- a/examples/paramsSummaryLog/pipeline/nextflow_schema.json +++ b/examples/paramsSummaryLog/pipeline/nextflow_schema.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", "title": "nf-core/testpipeline pipeline parameters", "description": "this is a test", "type": "object", - "definitions": { + "defs": { "input_output_options": { "title": "Input/output options", "type": "object", @@ -33,7 +33,7 @@ }, "allOf": [ { - "$ref": "#/definitions/input_output_options" + "$ref": "#/defs/input_output_options" } ] } diff --git a/examples/paramsSummaryMap/pipeline/nextflow.config b/examples/paramsSummaryMap/pipeline/nextflow.config index 0e1bf1f..c907af1 100644 --- a/examples/paramsSummaryMap/pipeline/nextflow.config +++ b/examples/paramsSummaryMap/pipeline/nextflow.config @@ -1,5 +1,5 @@ plugins { - id 'nf-validation@0.2.1' + id 'nf-validation@2.0.0' } params { diff --git a/examples/paramsSummaryMap/pipeline/nextflow_schema.json b/examples/paramsSummaryMap/pipeline/nextflow_schema.json index 3efe8c4..6096ceb 100644 --- a/examples/paramsSummaryMap/pipeline/nextflow_schema.json +++ b/examples/paramsSummaryMap/pipeline/nextflow_schema.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", "title": "nf-core/testpipeline pipeline parameters", "description": "this is a test", "type": "object", - "definitions": { + "defs": { "input_output_options": { "title": "Input/output options", "type": "object", @@ -33,7 +33,7 @@ }, "allOf": [ { - "$ref": "#/definitions/input_output_options" + "$ref": "#/defs/input_output_options" } ] } diff --git a/examples/validateParameters/log.txt b/examples/validateParameters/log.txt index a294ef4..65f59cf 100644 --- a/examples/validateParameters/log.txt +++ b/examples/validateParameters/log.txt @@ -1,10 +1,10 @@ N E X T F L O W ~ version 23.04.1 Launching `pipeline/main.nf` [amazing_crick] DSL2 - revision: 53bd9eac20 -ERROR ~ ERROR: Validation of pipeline parameters failed! +ERROR ~ Validation of pipeline parameters failed! -- Check '.nextflow.log' file for details The following invalid input values have been detected: -* --input: string [samplesheet.txt] does not match pattern ^\S+\.(csv|tsv|yaml|json)$ (samplesheet.txt) -* --input: the file 'samplesheet.txt' does not exist (samplesheet.txt) +* --input (samplesheet.txt): "samplesheet.txt" does not match regular expression [^\S+\.(csv|tsv|yml|yaml)$] +* --input (samplesheet.txt): the file or directory 'samplesheet.txt' does not exist diff --git a/examples/validateParameters/pipeline/nextflow.config b/examples/validateParameters/pipeline/nextflow.config index 7227d6a..da71bcc 100644 --- a/examples/validateParameters/pipeline/nextflow.config +++ b/examples/validateParameters/pipeline/nextflow.config @@ -1,5 +1,5 @@ plugins { - id 'nf-validation@0.2.1' + id 'nf-validation@2.0.0' } params { diff --git a/examples/validateParameters/pipeline/nextflow_schema.json b/examples/validateParameters/pipeline/nextflow_schema.json index 3efe8c4..c0df520 100644 --- a/examples/validateParameters/pipeline/nextflow_schema.json +++ b/examples/validateParameters/pipeline/nextflow_schema.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", "title": "nf-core/testpipeline pipeline parameters", "description": "this is a test", "type": "object", - "definitions": { + "defs": { "input_output_options": { "title": "Input/output options", "type": "object", @@ -18,6 +18,7 @@ "mimetype": "text/csv", "schema": "assets/schema_input.json", "pattern": "^\\S+\\.(csv|tsv|yaml|json)$", + "exists": true, "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" @@ -33,7 +34,7 @@ }, "allOf": [ { - "$ref": "#/definitions/input_output_options" + "$ref": "#/defs/input_output_options" } ] } diff --git a/examples/validationFailUnrecognisedParams/pipeline/nextflow.config b/examples/validationFailUnrecognisedParams/pipeline/nextflow.config index 9a0e564..0a1816c 100644 --- a/examples/validationFailUnrecognisedParams/pipeline/nextflow.config +++ b/examples/validationFailUnrecognisedParams/pipeline/nextflow.config @@ -1,5 +1,5 @@ plugins { - id 'nf-validation@0.2.1' + id 'nf-validation@2.0.0' } params { diff --git a/examples/validationFailUnrecognisedParams/pipeline/nextflow_schema.json b/examples/validationFailUnrecognisedParams/pipeline/nextflow_schema.json index 3efe8c4..6096ceb 100644 --- a/examples/validationFailUnrecognisedParams/pipeline/nextflow_schema.json +++ b/examples/validationFailUnrecognisedParams/pipeline/nextflow_schema.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", "title": "nf-core/testpipeline pipeline parameters", "description": "this is a test", "type": "object", - "definitions": { + "defs": { "input_output_options": { "title": "Input/output options", "type": "object", @@ -33,7 +33,7 @@ }, "allOf": [ { - "$ref": "#/definitions/input_output_options" + "$ref": "#/defs/input_output_options" } ] } diff --git a/examples/validationWarnUnrecognisedParams/pipeline/nextflow.config b/examples/validationWarnUnrecognisedParams/pipeline/nextflow.config index e30a871..d4c7f3d 100644 --- a/examples/validationWarnUnrecognisedParams/pipeline/nextflow.config +++ b/examples/validationWarnUnrecognisedParams/pipeline/nextflow.config @@ -1,5 +1,5 @@ plugins { - id 'nf-validation@0.2.1' + id 'nf-validation@2.0.0' } params { diff --git a/examples/validationWarnUnrecognisedParams/pipeline/nextflow_schema.json b/examples/validationWarnUnrecognisedParams/pipeline/nextflow_schema.json index 3efe8c4..6096ceb 100644 --- a/examples/validationWarnUnrecognisedParams/pipeline/nextflow_schema.json +++ b/examples/validationWarnUnrecognisedParams/pipeline/nextflow_schema.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", "title": "nf-core/testpipeline pipeline parameters", "description": "this is a test", "type": "object", - "definitions": { + "defs": { "input_output_options": { "title": "Input/output options", "type": "object", @@ -33,7 +33,7 @@ }, "allOf": [ { - "$ref": "#/definitions/input_output_options" + "$ref": "#/defs/input_output_options" } ] } diff --git a/mkdocs.yml b/mkdocs.yml index 98ec5a2..ae6a4a7 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -8,6 +8,7 @@ nav: - Home: - index.md - background.md + - migration_guide.md - Schema: - nextflow_schema/index.md - nextflow_schema/create_schema.md @@ -19,7 +20,7 @@ nav: - parameters/validation.md - parameters/help_text.md - parameters/summary_log.md - - Sample sheets: + - Samplesheets: - samplesheets/validate_sample_sheet.md - samplesheets/fromSamplesheet.md - samplesheets/examples.md diff --git a/parameters_meta_schema.json b/parameters_meta_schema.json index 5d5f58b..cf26192 100644 --- a/parameters_meta_schema.json +++ b/parameters_meta_schema.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://nextflow.io", "title": "Nextflow Schema Meta-schema", "description": "Meta-schema to validate Nextflow parameter schema files", @@ -30,7 +30,7 @@ "type": "string", "const": "object" }, - "definitions": { + "defs": { "title": "Parameter groups", "type": "object", "patternProperties": { @@ -139,7 +139,7 @@ "properties": { "$ref": { "type": "string", - "pattern": "^#/definitions/" + "pattern": "^#/defs/" } } } diff --git a/plugins/nf-validation/build.gradle b/plugins/nf-validation/build.gradle index ae2652f..1247983 100644 --- a/plugins/nf-validation/build.gradle +++ b/plugins/nf-validation/build.gradle @@ -54,7 +54,8 @@ dependencies { compileOnly "io.nextflow:nextflow:$nextflowVersion" compileOnly 'org.slf4j:slf4j-api:1.7.10' compileOnly 'org.pf4j:pf4j:3.4.1' - api 'com.github.everit-org.json-schema:org.everit.json.schema:1.14.1' + implementation 'org.json:json:20230227' + implementation 'dev.harrel:json-schema:1.5.0' // test configuration testImplementation "io.nextflow:nextflow:$nextflowVersion" diff --git a/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluatorFactory.groovy b/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluatorFactory.groovy new file mode 100644 index 0000000..c3bc08a --- /dev/null +++ b/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluatorFactory.groovy @@ -0,0 +1,53 @@ +package nextflow.validation + +import nextflow.Global +import nextflow.Session +import dev.harrel.jsonschema.EvaluatorFactory +import dev.harrel.jsonschema.Evaluator +import dev.harrel.jsonschema.SchemaParsingContext +import dev.harrel.jsonschema.JsonNode + +/** + * @author : nvnieuwk + */ + +class CustomEvaluatorFactory implements EvaluatorFactory { + + private Boolean lenientMode + private String baseDir + + CustomEvaluatorFactory() { + def Session session = Global.getSession() + this.lenientMode = session.params.validationLenientMode ?: false + this.baseDir = session.baseDir.toString() + } + + @Override + public Optional create(SchemaParsingContext ctx, String fieldName, JsonNode schemaNode) { + if (fieldName == "format" && schemaNode.isString()) { + def String schemaString = schemaNode.asString() + switch (schemaString) { + case "directory-path": + return Optional.of(new FormatDirectoryPathEvaluator()) + case "file-path": + return Optional.of(new FormatFilePathEvaluator()) + case "path": + return Optional.of(new FormatPathEvaluator()) + case "file-path-pattern": + return Optional.of(new FormatFilePathPatternEvaluator()) + } + } else if (fieldName == "exists" && schemaNode.isBoolean()) { + return Optional.of(new ExistsEvaluator(schemaNode.asBoolean())) + } else if (fieldName == "schema" && schemaNode.isString()) { + return Optional.of(new SchemaEvaluator(schemaNode.asString(), this.baseDir)) + } else if (fieldName == "uniqueEntries" && schemaNode.isArray()) { + return Optional.of(new UniqueEntriesEvaluator(schemaNode.asArray())) + } else if (fieldName == "type" && (schemaNode.isString() || schemaNode.isArray()) && lenientMode) { + return Optional.of(new LenientTypeEvaluator(schemaNode)) + } else if (fieldName == "deprecated" && schemaNode.isBoolean()) { + return Optional.of(new DeprecatedEvaluator(schemaNode.asBoolean())) + } + + return Optional.empty() + } +} \ No newline at end of file diff --git a/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/DeprecatedEvaluator.groovy b/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/DeprecatedEvaluator.groovy new file mode 100644 index 0000000..ff9c18e --- /dev/null +++ b/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/DeprecatedEvaluator.groovy @@ -0,0 +1,35 @@ +package nextflow.validation + +import dev.harrel.jsonschema.Evaluator +import dev.harrel.jsonschema.EvaluationContext +import dev.harrel.jsonschema.JsonNode +import nextflow.Nextflow + +import groovy.util.logging.Slf4j +import java.nio.file.Path + +/** + * @author : nvnieuwk + */ + +@Slf4j +class DeprecatedEvaluator implements Evaluator { + // Checks if the use of this value is deprecated + + private final Boolean deprecated + + DeprecatedEvaluator(Boolean deprecated) { + this.deprecated = deprecated + } + + @Override + public Evaluator.Result evaluate(EvaluationContext ctx, JsonNode node) { + // Checks if the value should be deprecated + if (!this.deprecated) { + return Evaluator.Result.success() + } + + return Evaluator.Result.failure("This option is deprecated") + + } +} \ No newline at end of file diff --git a/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/ExistsEvaluator.groovy b/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/ExistsEvaluator.groovy new file mode 100644 index 0000000..100789b --- /dev/null +++ b/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/ExistsEvaluator.groovy @@ -0,0 +1,55 @@ +package nextflow.validation + +import dev.harrel.jsonschema.Evaluator +import dev.harrel.jsonschema.EvaluationContext +import dev.harrel.jsonschema.JsonNode +import nextflow.Nextflow + +import groovy.util.logging.Slf4j +import java.nio.file.Path + +/** + * @author : nvnieuwk + */ + +@Slf4j +class ExistsEvaluator implements Evaluator { + // The file should or should not exist + + private final Boolean exists + + ExistsEvaluator(Boolean exists) { + this.exists = exists + } + + @Override + public Evaluator.Result evaluate(EvaluationContext ctx, JsonNode node) { + // To stay consistent with other keywords, types not applicable to this keyword should succeed + if (!node.isString()) { + return Evaluator.Result.success() + } + + def String value = node.asString() + + // Skip validation of S3 paths for now + if (value.startsWith('s3://')) { + log.debug("S3 paths are not supported by 'ExistsEvaluator': '${value}'") + return Evaluator.Result.success() + } + + // Actual validation logic + def Path file = Nextflow.file(value) as Path + + // Don't evaluate file path patterns + if (file instanceof List) { + return Evaluator.Result.success() + } + + if (!file.exists() && this.exists == true) { + return Evaluator.Result.failure("the file or directory '${value}' does not exist" as String) + } else if(file.exists() && this.exists == false) { + return Evaluator.Result.failure("the file or directory '${value}' should not exist" as String) + } + return Evaluator.Result.success() + } +} \ No newline at end of file diff --git a/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/FormatDirectoryPathEvaluator.groovy b/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/FormatDirectoryPathEvaluator.groovy new file mode 100644 index 0000000..7499aaa --- /dev/null +++ b/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/FormatDirectoryPathEvaluator.groovy @@ -0,0 +1,41 @@ +package nextflow.validation + +import dev.harrel.jsonschema.Evaluator +import dev.harrel.jsonschema.EvaluationContext +import dev.harrel.jsonschema.JsonNode +import nextflow.Nextflow + +import groovy.util.logging.Slf4j +import java.nio.file.Path + +/** + * @author : nvnieuwk + */ + +@Slf4j +class FormatDirectoryPathEvaluator implements Evaluator { + // The string should be a directory + + @Override + public Evaluator.Result evaluate(EvaluationContext ctx, JsonNode node) { + // To stay consistent with other keywords, types not applicable to this keyword should succeed + if (!node.isString()) { + return Evaluator.Result.success() + } + + def String value = node.asString() + + // Skip validation of S3 paths for now + if (value.startsWith('s3://')) { + log.debug("S3 paths are not supported by 'FormatDirectoryPathEvaluator': '${value}'") + return Evaluator.Result.success() + } + + // Actual validation logic + def Path file = Nextflow.file(value) as Path + if (file.exists() && !file.isDirectory()) { + return Evaluator.Result.failure("'${value}' is not a directory, but a file" as String) + } + return Evaluator.Result.success() + } +} \ No newline at end of file diff --git a/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/FormatFilePathEvaluator.groovy b/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/FormatFilePathEvaluator.groovy new file mode 100644 index 0000000..47dd527 --- /dev/null +++ b/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/FormatFilePathEvaluator.groovy @@ -0,0 +1,41 @@ +package nextflow.validation + +import dev.harrel.jsonschema.Evaluator +import dev.harrel.jsonschema.EvaluationContext +import dev.harrel.jsonschema.JsonNode +import nextflow.Nextflow + +import groovy.util.logging.Slf4j +import java.nio.file.Path + +/** + * @author : nvnieuwk + */ + +@Slf4j +class FormatFilePathEvaluator implements Evaluator { + // The string should be a file + + @Override + public Evaluator.Result evaluate(EvaluationContext ctx, JsonNode node) { + // To stay consistent with other keywords, types not applicable to this keyword should succeed + if (!node.isString()) { + return Evaluator.Result.success() + } + + def String value = node.asString() + + // Skip validation of S3 paths for now + if (value.startsWith('s3://')) { + log.debug("S3 paths are not supported by 'FormatFilePathEvaluator': '${value}'") + return Evaluator.Result.success() + } + + // Actual validation logic + def Path file = Nextflow.file(value) as Path + if (file.exists() && file.isDirectory()) { + return Evaluator.Result.failure("'${value}' is not a file, but a directory" as String) + } + return Evaluator.Result.success() + } +} \ No newline at end of file diff --git a/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/FormatFilePathPatternEvaluator.groovy b/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/FormatFilePathPatternEvaluator.groovy new file mode 100644 index 0000000..75c4dad --- /dev/null +++ b/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/FormatFilePathPatternEvaluator.groovy @@ -0,0 +1,51 @@ +package nextflow.validation + +import dev.harrel.jsonschema.Evaluator +import dev.harrel.jsonschema.EvaluationContext +import dev.harrel.jsonschema.JsonNode +import nextflow.Nextflow + +import groovy.util.logging.Slf4j +import java.nio.file.Path + +/** + * @author : nvnieuwk + */ + +@Slf4j +class FormatFilePathPatternEvaluator implements Evaluator { + // The string should be a path pattern + + @Override + public Evaluator.Result evaluate(EvaluationContext ctx, JsonNode node) { + // To stay consistent with other keywords, types not applicable to this keyword should succeed + if (!node.isString()) { + return Evaluator.Result.success() + } + + def String value = node.asString() + + // Skip validation of S3 paths for now + if (value.startsWith('s3://')) { + log.debug("S3 paths are not supported by 'FormatFilePathPatternEvaluator': '${value}'") + return Evaluator.Result.success() + } + + // Actual validation logic + def List files = Nextflow.files(value) + def List errors = [] + + if(files.size() == 0) { + return Evaluator.Result.failure("No files were found using the glob pattern '${value}'" as String) + } + for( file : files ) { + if (file.isDirectory()) { + errors.add("'${file.toString()}' is not a file, but a directory" as String) + } + } + if(errors.size() > 0) { + return Evaluator.Result.failure(errors.join('\n')) + } + return Evaluator.Result.success() + } +} \ No newline at end of file diff --git a/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/FormatPathEvaluator.groovy b/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/FormatPathEvaluator.groovy new file mode 100644 index 0000000..6d3ac92 --- /dev/null +++ b/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/FormatPathEvaluator.groovy @@ -0,0 +1,38 @@ +package nextflow.validation + +import dev.harrel.jsonschema.Evaluator +import dev.harrel.jsonschema.EvaluationContext +import dev.harrel.jsonschema.JsonNode +import nextflow.Nextflow + +import groovy.util.logging.Slf4j +import java.nio.file.Path + +/** + * @author : nvnieuwk + */ + +@Slf4j +class FormatPathEvaluator implements Evaluator { + // The string should be a path + + @Override + public Evaluator.Result evaluate(EvaluationContext ctx, JsonNode node) { + // To stay consistent with other keywords, types not applicable to this keyword should succeed + if (!node.isString()) { + return Evaluator.Result.success() + } + + def String value = node.asString() + + // Skip validation of S3 paths for now + if (value.startsWith('s3://')) { + log.debug("S3 paths are not supported by 'FormatPathEvaluator': '${value}'") + return Evaluator.Result.success() + } + + // Actual validation logic + def Path file = Nextflow.file(value) as Path + return Evaluator.Result.success() + } +} \ No newline at end of file diff --git a/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/LenientTypeEvaluator.groovy b/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/LenientTypeEvaluator.groovy new file mode 100644 index 0000000..6977c05 --- /dev/null +++ b/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/LenientTypeEvaluator.groovy @@ -0,0 +1,56 @@ +package nextflow.validation + +import dev.harrel.jsonschema.Evaluator +import dev.harrel.jsonschema.EvaluationContext +import dev.harrel.jsonschema.JsonNode +import dev.harrel.jsonschema.SimpleType +import nextflow.Nextflow + +import groovy.util.logging.Slf4j +import java.nio.file.Path +import java.util.stream.Collectors +import static java.util.Collections.singleton +import static java.util.Collections.unmodifiableList + +/** + * @author : nvnieuwk + */ + +@Slf4j +class LenientTypeEvaluator implements Evaluator { + // Validate against the type + + private final Set types + private final List lenientTypes = [ + SimpleType.STRING, + SimpleType.INTEGER, + SimpleType.NUMBER, + SimpleType.BOOLEAN, + SimpleType.NULL + ] + + LenientTypeEvaluator(JsonNode node) { + if (node.isString()) { + this.types = singleton(SimpleType.fromName(node.asString())) + } else { + this.types = node.asArray().stream() + .map(JsonNode::asString) + .map(SimpleType::fromName) + .collect(Collectors.toSet()) + } + } + + @Override + public Result evaluate(EvaluationContext ctx, JsonNode node) { + def SimpleType nodeType = node.getNodeType() + if (types.contains(SimpleType.STRING) && lenientTypes.contains(nodeType)) { + return Result.success() + } + if (types.contains(nodeType) || nodeType == SimpleType.INTEGER && types.contains(SimpleType.NUMBER)) { + return Result.success() + } else { + def List typeNames = unmodifiableList(types.stream().map(SimpleType::getName).collect(Collectors.toList())) + return Result.failure(String.format("Value is [%s] but should be %s", nodeType.getName(), typeNames)) + } + } +} diff --git a/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/SchemaEvaluator.groovy b/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/SchemaEvaluator.groovy new file mode 100644 index 0000000..ad9cfb5 --- /dev/null +++ b/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/SchemaEvaluator.groovy @@ -0,0 +1,65 @@ +package nextflow.validation + +import dev.harrel.jsonschema.Evaluator +import dev.harrel.jsonschema.EvaluationContext +import dev.harrel.jsonschema.JsonNode +import nextflow.Nextflow +import nextflow.Global +import org.json.JSONArray +import org.json.JSONObject + +import groovy.util.logging.Slf4j +import java.nio.file.Path +import java.nio.file.Files + +/** + * @author : nvnieuwk + */ + +@Slf4j +class SchemaEvaluator implements Evaluator { + // Evaluate the file using the given schema + + private final String schema + private final String baseDir + + SchemaEvaluator(String schema, String baseDir) { + this.baseDir = baseDir + this.schema = schema + } + + @Override + public Evaluator.Result evaluate(EvaluationContext ctx, JsonNode node) { + // To stay consistent with other keywords, types not applicable to this keyword should succeed + if (!node.isString()) { + return Evaluator.Result.success() + } + + def String value = node.asString() + + // Actual validation logic + def Path file = Nextflow.file(value) + // Don't validate if the file does not exist or is a directory + if(!file.exists() || file.isDirectory()) { + log.debug("Could not validate the file ${file.toString()}") + return Evaluator.Result.success() + } + + log.debug("Started validating ${file.toString()}") + + def String schemaFull = Utils.getSchemaPath(this.baseDir, this.schema) + def JSONArray arrayJSON = Utils.fileToJsonArray(file, Path.of(schemaFull)) + def String schemaContents = Files.readString( Path.of(schemaFull) ) + def validator = new JsonSchemaValidator() + + def List validationErrors = validator.validate(arrayJSON, schemaContents) + if (validationErrors) { + def List errors = ["Validation of file failed:"] + validationErrors.collect { "\t${it}" as String} + return Evaluator.Result.failure(errors.join("\n")) + } + + log.debug("Validation of file '${value}' passed!") + return Evaluator.Result.success() + } + +} \ No newline at end of file diff --git a/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/UniqueEntriesEvaluator.groovy b/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/UniqueEntriesEvaluator.groovy new file mode 100644 index 0000000..999c567 --- /dev/null +++ b/plugins/nf-validation/src/main/nextflow/validation/CustomEvaluators/UniqueEntriesEvaluator.groovy @@ -0,0 +1,55 @@ +package nextflow.validation + +import dev.harrel.jsonschema.Evaluator +import dev.harrel.jsonschema.EvaluationContext +import dev.harrel.jsonschema.JsonNode +import dev.harrel.jsonschema.providers.OrgJsonNode +import org.json.JSONObject + +import groovy.json.JsonBuilder +import groovy.util.logging.Slf4j +import java.nio.file.Path + +/** + * @author : nvnieuwk + */ + +@Slf4j +class UniqueEntriesEvaluator implements Evaluator { + // Combinations of these columns should be unique + + private final List uniqueEntries + + UniqueEntriesEvaluator(List uniqueEntries) { + this.uniqueEntries = uniqueEntries.collect { it.asString() } + } + + @Override + public Evaluator.Result evaluate(EvaluationContext ctx, JsonNode node) { + // To stay consistent with other keywords, types not applicable to this keyword should succeed + if (!node.isArray()) { + return Evaluator.Result.success() + } + + def List> uniques = [] + def Integer count = 0 + for(nodeEntry : node.asArray()) { + count++ + if(!nodeEntry.isObject()) { + return Evaluator.Result.success() + } + def Map filteredNodes = nodeEntry + .asObject() + .dropWhile { k,v -> !uniqueEntries.contains(k) } + .collectEntries { k,v -> [k, v.asString()] } + for (uniqueNode : uniques) { + if(filteredNodes.equals(uniqueNode)) { + return Evaluator.Result.failure("Entry ${count}: Detected non-unique combination of the following fields: ${uniqueEntries}" as String) + } + } + uniques.add(filteredNodes) + } + + return Evaluator.Result.success() + } +} \ No newline at end of file diff --git a/plugins/nf-validation/src/main/nextflow/validation/FormatValidators/DirectoryPathValidator.groovy b/plugins/nf-validation/src/main/nextflow/validation/FormatValidators/DirectoryPathValidator.groovy deleted file mode 100644 index 0d935b5..0000000 --- a/plugins/nf-validation/src/main/nextflow/validation/FormatValidators/DirectoryPathValidator.groovy +++ /dev/null @@ -1,24 +0,0 @@ -package nextflow.validation - -import java.nio.file.Path -import groovy.util.logging.Slf4j - -import org.everit.json.schema.FormatValidator -import nextflow.Nextflow - -@Slf4j -public class DirectoryPathValidator implements FormatValidator { - - @Override - public Optional validate(final String subject) { - if (subject.startsWith('s3://')) { - log.debug("S3 paths are not supported by 'DirectoryPathValidator': '${subject}'") - return Optional.empty() - } - Path file = Nextflow.file(subject) as Path - if (file.exists() && !file.isDirectory()) { - return Optional.of("'${subject}' is not a directory, but a file" as String) - } - return Optional.empty() - } -} \ No newline at end of file diff --git a/plugins/nf-validation/src/main/nextflow/validation/FormatValidators/FilePathPatternValidator.groovy b/plugins/nf-validation/src/main/nextflow/validation/FormatValidators/FilePathPatternValidator.groovy deleted file mode 100644 index 4ddaaea..0000000 --- a/plugins/nf-validation/src/main/nextflow/validation/FormatValidators/FilePathPatternValidator.groovy +++ /dev/null @@ -1,34 +0,0 @@ -package nextflow.validation - -import java.nio.file.Path -import groovy.util.logging.Slf4j - -import org.everit.json.schema.FormatValidator -import nextflow.Nextflow - -@Slf4j -public class FilePathPatternValidator implements FormatValidator { - - @Override - public Optional validate(final String subject) { - if (subject.startsWith('s3://')) { - log.debug("S3 paths are not supported by 'FilePathPatternValidator': '${subject}'") - return Optional.empty() - } - ArrayList files = Nextflow.files(subject) - ArrayList errors = [] - - if(files.size() == 0) { - return Optional.of("No files were found using the globbing pattern '${subject}'" as String) - } - for( file : files ) { - if (file.isDirectory()) { - errors.add("'${file.toString()}' is not a file, but a directory" as String) - } - } - if(errors.size() > 0) { - return Optional.of(errors.join('\n')) - } - return Optional.empty() - } -} \ No newline at end of file diff --git a/plugins/nf-validation/src/main/nextflow/validation/FormatValidators/FilePathValidator.groovy b/plugins/nf-validation/src/main/nextflow/validation/FormatValidators/FilePathValidator.groovy deleted file mode 100644 index a49ec6c..0000000 --- a/plugins/nf-validation/src/main/nextflow/validation/FormatValidators/FilePathValidator.groovy +++ /dev/null @@ -1,24 +0,0 @@ -package nextflow.validation - -import java.nio.file.Path -import groovy.util.logging.Slf4j - -import org.everit.json.schema.FormatValidator -import nextflow.Nextflow - -@Slf4j -public class FilePathValidator implements FormatValidator { - - @Override - public Optional validate(final String subject) { - if (subject.startsWith('s3://')) { - log.debug("S3 paths are not supported by 'FilePathValidator': '${subject}'") - return Optional.empty() - } - Path file = Nextflow.file(subject) as Path - if (file.isDirectory()) { - return Optional.of("'${subject}' is not a file, but a directory" as String) - } - return Optional.empty() - } -} \ No newline at end of file diff --git a/plugins/nf-validation/src/main/nextflow/validation/FormatValidators/PathValidator.groovy b/plugins/nf-validation/src/main/nextflow/validation/FormatValidators/PathValidator.groovy deleted file mode 100644 index afe82e0..0000000 --- a/plugins/nf-validation/src/main/nextflow/validation/FormatValidators/PathValidator.groovy +++ /dev/null @@ -1,21 +0,0 @@ -package nextflow.validation - -import java.nio.file.Path -import groovy.util.logging.Slf4j - -import org.everit.json.schema.FormatValidator -import nextflow.Nextflow - -@Slf4j -public class PathValidator implements FormatValidator { - - @Override - public Optional validate(final String subject) { - if (subject.startsWith('s3://')) { - log.debug("S3 paths are not supported by 'PathValidator': '${subject}'") - return Optional.empty() - } - Path file = Nextflow.file(subject) as Path - return Optional.empty() - } -} \ No newline at end of file diff --git a/plugins/nf-validation/src/main/nextflow/validation/JsonSchemaValidator.groovy b/plugins/nf-validation/src/main/nextflow/validation/JsonSchemaValidator.groovy new file mode 100644 index 0000000..7ca6406 --- /dev/null +++ b/plugins/nf-validation/src/main/nextflow/validation/JsonSchemaValidator.groovy @@ -0,0 +1,111 @@ +package nextflow.validation + +import groovy.util.logging.Slf4j +import groovy.transform.CompileStatic +import org.json.JSONObject +import org.json.JSONArray +import dev.harrel.jsonschema.ValidatorFactory +import dev.harrel.jsonschema.Validator +import dev.harrel.jsonschema.EvaluatorFactory +import dev.harrel.jsonschema.FormatEvaluatorFactory +import dev.harrel.jsonschema.JsonNode +import dev.harrel.jsonschema.providers.OrgJsonNode + +import java.util.regex.Pattern +import java.util.regex.Matcher + +/** + * @author : nvnieuwk + */ + +@Slf4j +@CompileStatic +public class JsonSchemaValidator { + + private static ValidatorFactory validator + private static Pattern uriPattern = Pattern.compile('^#/(\\d*)?/?(.*)$') + + JsonSchemaValidator() { + this.validator = new ValidatorFactory() + .withJsonNodeFactory(new OrgJsonNode.Factory()) + // .withDialect() // TODO define the dialect + .withEvaluatorFactory(EvaluatorFactory.compose(new CustomEvaluatorFactory(), new FormatEvaluatorFactory())) + } + + private static List validateObject(JsonNode input, String validationType, Object rawJson, String schemaString) { + def JSONObject schema = new JSONObject(schemaString) + def String draft = Utils.getValueFromJson("#/\$schema", schema) + if(draft != "https://json-schema.org/draft/2020-12/schema") { + log.error("""Failed to load the meta schema: +The used schema draft (${draft}) is not correct, please use \"https://json-schema.org/draft/2020-12/schema\" instead. +See here for more information: https://json-schema.org/specification#migrating-from-older-drafts +""") + throw new SchemaValidationException("", []) + } + + def Validator.Result result = this.validator.validate(schema, input) + def List errors = [] + for (error : result.getErrors()) { + def String errorString = error.getError() + // Skip double error in the parameter schema + if (errorString.startsWith("Value does not match against the schemas at indexes") && validationType == "parameter") { + continue + } + + def String instanceLocation = error.getInstanceLocation() + def String value = Utils.getValueFromJson(instanceLocation, rawJson) + + // Get the custom errorMessage if there is one and the validation errors are not about the content of the file + def String schemaLocation = error.getSchemaLocation().replaceFirst(/^[^#]+/, "") + def String customError = "" + if (!errorString.startsWith("Validation of file failed:")) { + customError = Utils.getValueFromJson("${schemaLocation}/errorMessage", schema) as String + } + + // Change some error messages to make them more clear + if (customError == "") { + def String keyword = error.getKeyword() + if (keyword == "required") { + def Matcher matcher = errorString =~ ~/\[\[([^\[\]]*)\]\]$/ + def String missingKeywords = matcher.findAll().flatten().last() + customError = "Missing required ${validationType}(s): ${missingKeywords}" + } + } + + def String[] locationList = instanceLocation.split("/").findAll { it != "" } + + if (locationList.size() > 0 && Utils.isInteger(locationList[0]) && validationType == "field") { + def Integer entryInteger = locationList[0] as Integer + def String entryString = "Entry ${entryInteger + 1}" as String + def String fieldError = "" + if(locationList.size() > 1) { + fieldError = "Error for ${validationType} '${locationList[1..-1].join("/")}' (${value}): ${customError ?: errorString}" + } else { + fieldError = "${customError ?: errorString}" as String + } + errors.add("-> ${entryString}: ${fieldError}" as String) + } else if (validationType == "parameter") { + def String fieldName = locationList.join("/") + if(fieldName != "") { + errors.add("* --${fieldName} (${value}): ${customError ?: errorString}" as String) + } else { + errors.add("* ${customError ?: errorString}" as String) + } + } else { + errors.add("-> ${customError ?: errorString}" as String) + } + + } + return errors + } + + public static List validate(JSONArray input, String schemaString) { + def JsonNode jsonInput = new OrgJsonNode.Factory().wrap(input) + return this.validateObject(jsonInput, "field", input, schemaString) + } + + public static List validate(JSONObject input, String schemaString) { + def JsonNode jsonInput = new OrgJsonNode.Factory().wrap(input) + return this.validateObject(jsonInput, "parameter", input, schemaString) + } +} \ No newline at end of file diff --git a/plugins/nf-validation/src/main/nextflow/validation/SamplesheetConverter.groovy b/plugins/nf-validation/src/main/nextflow/validation/SamplesheetConverter.groovy index 566153a..5fe8b21 100644 --- a/plugins/nf-validation/src/main/nextflow/validation/SamplesheetConverter.groovy +++ b/plugins/nf-validation/src/main/nextflow/validation/SamplesheetConverter.groovy @@ -1,344 +1,237 @@ package nextflow.validation import groovy.json.JsonSlurper -import groovy.json.JsonOutput import groovy.transform.CompileStatic import groovy.util.logging.Slf4j -import groovyx.gpars.dataflow.DataflowReadChannel -import groovyx.gpars.dataflow.DataflowWriteChannel import java.nio.file.Path -import java.util.concurrent.CompletableFuture -import java.util.concurrent.CompletionException - -import org.yaml.snakeyaml.Yaml -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.PrimitiveValidationStrategy -import org.everit.json.schema.ValidationException -import org.everit.json.schema.SchemaException -import org.everit.json.schema.Schema -import org.json.JSONArray -import org.json.JSONObject -import org.json.JSONTokener - -import nextflow.Channel -import nextflow.Global + import nextflow.Nextflow -import nextflow.plugin.extension.Function -import nextflow.Session +/** + * @author : mirpedrol + * @author : nvnieuwk + * @author : awgymer + */ @Slf4j @CompileStatic class SamplesheetConverter { - private static List errors = [] - private static List schemaErrors = [] - private static List warnings = [] - - private static List rows = [] - - static boolean hasErrors() { errors.size()>0 } - static Set getErrors() { errors.sort().collect { "\t${it}".toString() } as Set } + private static Path samplesheetFile + private static Path schemaFile - static boolean hasSchemaErrors() { schemaErrors.size()>0 } - static Set getSchemaErrors() { schemaErrors.sort().collect { "\t${it}".toString() } as Set } + SamplesheetConverter(Path samplesheetFile, Path schemaFile) { + this.samplesheetFile = samplesheetFile + this.schemaFile = schemaFile + } - static boolean hasWarnings() { warnings.size()>0 } - static Set getWarnings() { warnings.sort().collect { "\t${it}".toString() } as Set } + private static List rows = [] + private static Map meta = [:] - private static Integer sampleCount = 0 + private static Map getMeta() { + this.meta + } - static resetCount(){ sampleCount = 0 } - static increaseCount(){ sampleCount++ } - static Integer getCount(){ sampleCount } + private static Map resetMeta() { + this.meta = [:] + } + private static addMeta(Map newEntries) { + this.meta = this.meta + newEntries + } - static List convertToList( - Path samplesheetFile, - Path schemaFile, - Boolean skipDuplicateCheck - ) { + private static Boolean isMeta() { + this.meta.size() > 0 + } - def Map schemaMap = (Map) new JsonSlurper().parseText(schemaFile.text) - def Map> schemaFields = (Map) schemaMap["items"]["properties"] - def Set allFields = schemaFields.keySet() - def List requiredFields = (List) schemaMap["items"]["required"] - def Boolean containsHeader = !(allFields.size() == 1 && allFields[0] == "") + private static List unusedHeaders = [] - def String fileType = getFileType(samplesheetFile) - def String delimiter = fileType == "csv" ? "," : fileType == "tsv" ? "\t" : null - def List> samplesheetList + private static addUnusedHeader (String header) { + this.unusedHeaders.add(header) + } - if(fileType == "yaml"){ - samplesheetList = new Yaml().load((samplesheetFile.text)).collect { - if(containsHeader) { - return it as Map - } - return ["empty": it] as Map - } - } - else if(fileType == "json"){ - samplesheetList = new JsonSlurper().parseText(samplesheetFile.text).collect { - if(containsHeader) { - return it as Map - } - return ["empty": it] as Map - } + private static logUnusedHeadersWarning(String fileName) { + def Set unusedHeaders = this.unusedHeaders as Set + if(unusedHeaders.size() > 0) { + def String processedHeaders = unusedHeaders.collect { "\t- ${it}" }.join("\n") + log.warn("Found the following unidentified headers in ${fileName}:\n${processedHeaders}" as String) } - else { - Path fileSamplesheet = Nextflow.file(samplesheetFile) as Path - samplesheetList = fileSamplesheet.splitCsv(header:containsHeader ?: ["empty"], strip:true, sep:delimiter, quote:'\"') - } - - // Field checks + returning the channels - def Map> booleanUniques = [:] - def Map>> listUniques = [:] - def Boolean headerCheck = true - this.rows = [] - resetCount() + } - def List outputs = samplesheetList.collect { Map fullRow -> - increaseCount() + /* + Convert the samplesheet to a list of entries based on a schema + */ + public static List convertToList() { - Map row = fullRow.findAll { it.value != "" } - def Set rowKeys = containsHeader ? row.keySet() : ["empty"].toSet() - def String entryInfo = fileType in ["yaml", "json"] ? " for entry ${this.getCount()}." : "" + def LinkedHashMap schemaMap = new JsonSlurper().parseText(this.schemaFile.text) as LinkedHashMap + def List samplesheetList = Utils.fileToList(this.samplesheetFile, this.schemaFile) - // Check the header (CSV/TSV) or present fields (YAML) - if(headerCheck) { - def unexpectedFields = containsHeader ? rowKeys - allFields : [] - if(unexpectedFields.size() > 0) { - this.warnings << "The samplesheet contains following unchecked field(s): ${unexpectedFields}${entryInfo}".toString() - } + this.rows = [] - if(fileType != 'yaml'){ - headerCheck = false + def List channelFormat = samplesheetList.collect { entry -> + resetMeta() + def Object result = formatEntry(entry, schemaMap["items"] as LinkedHashMap) + if(isMeta()) { + if(result instanceof List) { + result.add(0,getMeta()) + } else { + result = [getMeta(), result] } } + return result + } + logUnusedHeadersWarning(this.samplesheetFile.toString()) + return channelFormat + } - // Check for row uniqueness - if(!skipDuplicateCheck && this.rows.contains(row)) { - def Integer firstDuplicate = this.rows.findIndexOf { it == row } - this.errors << "The samplesheet contains duplicate rows for entry ${firstDuplicate + 1} and entry ${getCount()} (${row})".toString() - } - this.rows.add(row) - - def Map meta = [:] - def ArrayList output = [] - - for( Map.Entry field : schemaFields ){ - def String key = containsHeader ? field.key : "empty" - def Object input = row[key] + /* + This function processes an input value based on a schema. + The output will be created for addition to the output channel. + */ + private static Object formatEntry(Object input, LinkedHashMap schema, String headerPrefix = "") { - // Check if the field is deprecated - if(field['value']['deprecated']){ - this.warnings << "The '${key}' field is deprecated and will no longer be used in the future. Please check the official documentation of the pipeline for more information.".toString() - } + // Add default values for missing entries + input = input != null ? input : schema.containsKey("default") ? schema.default : [] - // Check required dependencies - def List dependencies = field['value']["dependentRequired"] as List - if(input && dependencies) { - def List missingValues = [] - for( dependency in dependencies ){ - if(row[dependency] == "" || !(row[dependency])) { - missingValues.add(dependency) - } - } - if (missingValues) { - this.errors << addSample("${dependencies} field(s) should be defined when '${key}' is specified, but the field(s) ${missingValues} is/are not defined.".toString()) - } - } + if (input instanceof Map) { + def List result = [] + def LinkedHashMap properties = schema["properties"] + def Set unusedKeys = input.keySet() - properties.keySet() + + // Check for properties in the samplesheet that have not been defined in the schema + unusedKeys.each{addUnusedHeader("${headerPrefix}${it}" as String)} + + // Loop over every property to maintain the correct order + properties.each { property, schemaValues -> + def value = input[property] + def List metaIds = schemaValues["meta"] instanceof List ? schemaValues["meta"] as List : schemaValues["meta"] instanceof String ? [schemaValues["meta"]] : [] + def String prefix = headerPrefix ? "${headerPrefix}${property}." : "${property}." - // Check if the field is unique - def unique = field['value']['unique'] - def Boolean uniqueIsList = unique instanceof ArrayList - if(unique && !uniqueIsList){ - if(!(key in booleanUniques)){ - booleanUniques[key] = [] - } - if(input in booleanUniques[key] && input){ - this.errors << addSample("The '${key}' value needs to be unique. '${input}' was found at least twice in the samplesheet.".toString()) - } - booleanUniques[key].add(input as String) - } - else if(unique && uniqueIsList) { - def Map newMap = (Map) row.subMap((List) [key] + (List) unique) - if(!(key in listUniques)){ - listUniques[key] = [] + // Add the value to the meta map if needed + if (metaIds) { + metaIds.each { + meta["${it}"] = processMeta(value, schemaValues as LinkedHashMap, prefix) } - if(newMap in listUniques[key] && input){ - this.errors << addSample("The combination of '${key}' with fields ${unique} needs to be unique. ${newMap} was found at least twice.".toString()) - } - listUniques[key].add(newMap) - } - - // Convert field to a meta field or add it as an input to the channel - def List metaNames = field['value']['meta'] as List - if(metaNames) { - for(name : metaNames) { - meta[name] = (input != '' && input != null) ? - castToNFType(input, field) : - field['value']['default'] != null ? - castToNFType(field['value']['default'], field) : - null - } - } + } + // return the correctly casted value else { - def inputVal = (input != '' && input != null) ? - castToNFType(input, field) : - field['value']['default'] != null ? - castToNFType(field['value']['default'], field) : - [] - output.add(inputVal) + result.add(formatEntry(value, schemaValues as LinkedHashMap, prefix)) } } - // Add meta to the output when a meta field has been created - if(meta != [:]) { output.add(0, meta) } - return output + return result + } else if (input instanceof List) { + def List result = [] + def Integer count = 0 + input.each { + // return the correctly casted value + def String prefix = headerPrefix ? "${headerPrefix}${count}." : "${count}." + result.add(formatEntry(it, schema["items"] as LinkedHashMap, prefix)) + count++ + } + return result + } else { + // Cast value to path type if needed and return the value + return processValue(input, schema) } - // check for samplesheet errors - if (this.hasErrors()) { - String message = "Samplesheet errors:\n" + this.getErrors().join("\n") - throw new SchemaValidationException(message, this.getErrors() as List) - } + } - // check for schema errors - if (this.hasSchemaErrors()) { - String message = "Samplesheet schema errors:\n" + this.getSchemaErrors().join("\n") - throw new SchemaValidationException(message, this.getSchemaErrors() as List) - } + private static List validPathFormats = ["file-path", "path", "directory-path", "file-path-pattern"] + private static List schemaOptions = ["anyOf", "oneOf", "allOf"] - // check for warnings - if( this.hasWarnings() ) { - def msg = "Samplesheet warnings:\n" + this.getWarnings().join('\n') - log.warn(msg) + /* + This function processes a value that's not a map or list and casts it to a file type if necessary. + When there is uncertainty if the value should be a path, some simple logic is applied that tries + to guess if it should be a file type + */ + private static Object processValue(Object value, Map schemaEntry) { + if(!(value instanceof String)) { + return value } - return outputs - } - - // Function to infer the file type of the samplesheet - public static String getFileType( - Path samplesheetFile - ) { - def String extension = samplesheetFile.getExtension() - if (extension in ["csv", "tsv", "yml", "yaml", "json"]) { - return extension == "yml" ? "yaml" : extension - } + def String defaultFormat = schemaEntry.format ?: "" - def String header = getHeader(samplesheetFile) + // A valid path format has been found in the schema + def Boolean foundStringFileFormat = false - def Integer commaCount = header.count(",") - def Integer tabCount = header.count("\t") + // Type string has been found without a valid path format + def Boolean foundStringNoFileFormat = false - if ( commaCount == tabCount ){ - throw new Exception("Could not derive file type from ${samplesheetFile}. Please specify the file extension (CSV, TSV, YML and YAML are supported).".toString()) - } - if ( commaCount > tabCount ){ - return "csv" - } - else { - return "tsv" - } - } - - // Function to get the header from a CSV or TSV file - public static String getHeader( - Path samplesheetFile - ) { - def String header - samplesheetFile.withReader { header = it.readLine() } - return header - } - - // Function to transform an input field from the samplesheet to its desired type - private static castToNFType( - Object input, - Map.Entry field - ) { - def String type = field['value']['type'] - def String key = field.key - - // Recursively call this function for each item in the array if the field is an array-type - // The returned values are collected into a single array - if (type == "array") { - def Map.Entry subfield = (Map.Entry) Map.entry(field.key, field['value']['items']) - log.debug "subfield = $subfield" - def ArrayList result = input.collect{ castToNFType(it, subfield) } as ArrayList - return result + if ((schemaEntry.type ?: "") == "string") { + if (validPathFormats.contains(schemaEntry.format ?: defaultFormat)) { + foundStringFileFormat = true + } else { + foundStringNoFileFormat = true + } } - def String inputStr = input as String - // Convert string values - if(type == "string" || !type) { - def String result = inputStr as String - - // Check and convert to the desired format - def String format = field['value']['format'] - if(format) { - if(format == "file-path-pattern") { - def ArrayList inputFiles = Nextflow.file(inputStr) as ArrayList - return inputFiles - } - if(format.contains("path")) { - def Path inputFile = Nextflow.file(inputStr) as Path - return inputFile + schemaOptions.each { option -> + schemaEntry[option]?.each { subSchema -> + if ((subSchema["type"] ?: "" ) == "string") { + if (validPathFormats.contains(subSchema["format"] ?: defaultFormat)) { + foundStringFileFormat = true + } else { + foundStringNoFileFormat = true + } } } - - - // Return the plain string value - return result } - // Convert number values - else if(type == "number") { - try { - def int result = inputStr as int - return result - } - catch (NumberFormatException e) { - log.debug("Could not convert ${input} to an integer. Trying to convert to a float.") - } - - try { - def float result = inputStr as float - return result - } - catch (NumberFormatException e) { - log.debug("Could not convert ${inputStr} to a float. Trying to convert to a double.") + if(foundStringFileFormat && !foundStringNoFileFormat) { + return Nextflow.file(value) + } else if(foundStringFileFormat && foundStringNoFileFormat) { + // Do a simple check if the object could be a path + // This check looks for / in the filename or if a dot is + // present in the last 7 characters (possibly indicating an extension) + if( + value.contains("/") || + (value.size() >= 7 && value[-7..-1].contains(".")) || + (value.size() < 7 && value.contains(".")) + ) { + return Nextflow.file(value) } - - def double result = inputStr as double - return result } + return value + } - // Convert integer values - else if(type == "integer") { - - def int result = inputStr as int + /* + This function processes an input value based on a schema. + The output will be created for addition to the meta map. + */ + private static Object processMeta(Object input, LinkedHashMap schema, String headerPrefix) { + // Add default values for missing entries + input = input != null ? input : schema.containsKey("default") ? schema.default : [] + + if (input instanceof Map) { + def Map result = [:] + def LinkedHashMap properties = schema["properties"] + def Set unusedKeys = input.keySet() - properties.keySet() + + // Check for properties in the samplesheet that have not been defined in the schema + unusedKeys.each{addUnusedHeader("${headerPrefix}${it}" as String)} + + // Loop over every property to maintain the correct order + properties.each { property, schemaValues -> + def value = input[property] + def String prefix = headerPrefix ? "${headerPrefix}${property}." : "${property}." + result[property] = processMeta(value, schemaValues as LinkedHashMap, prefix) + } return result - } - - // Convert boolean values - else if(type == "boolean") { - - if(inputStr.toLowerCase() == "true") { - return true + } else if (input instanceof List) { + def List result = [] + def Integer count = 0 + input.each { + // return the correctly casted value + def String prefix = headerPrefix ? "${headerPrefix}${count}." : "${count}." + result.add(processMeta(it, schema["items"] as LinkedHashMap, prefix)) + count++ } - return false - } - - else if(type == "null") { - return null + return result + } else { + // Cast value to path type if needed and return the value + return processValue(input, schema) } } - private static String addSample ( - String message - ) { - return "Entry ${this.getCount()}: ${message}".toString() - } } diff --git a/plugins/nf-validation/src/main/nextflow/validation/SchemaValidator.groovy b/plugins/nf-validation/src/main/nextflow/validation/SchemaValidator.groovy index 0e07b70..81573b2 100644 --- a/plugins/nf-validation/src/main/nextflow/validation/SchemaValidator.groovy +++ b/plugins/nf-validation/src/main/nextflow/validation/SchemaValidator.groovy @@ -21,19 +21,17 @@ import nextflow.script.WorkflowMetadata import nextflow.Session import nextflow.util.Duration import nextflow.util.MemoryUnit -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.PrimitiveValidationStrategy -import org.everit.json.schema.ValidationException -import org.everit.json.schema.Validator -import org.everit.json.schema.Schema import org.json.JSONException import org.json.JSONArray import org.json.JSONObject import org.json.JSONTokener import org.yaml.snakeyaml.Yaml -import static SamplesheetConverter.getHeader -import static SamplesheetConverter.getFileType +/** + * @author : mirpedrol + * @author : nvnieuwk + * @author : KevinMenden + */ @Slf4j @CompileStatic @@ -127,17 +125,6 @@ class SchemaValidator extends PluginExtensionPoint { boolean hasWarnings() { warnings.size()>0 } List getWarnings() { warnings } - // - // Resolve Schema path relative to main workflow directory - // - static String getSchemaPath(String baseDir, String schemaFilename='nextflow_schema.json') { - if (Path.of(schemaFilename).exists()) { - return schemaFilename - } else { - return "${baseDir}/${schemaFilename}" - } - } - // // Find a value in a nested map // @@ -151,79 +138,64 @@ class SchemaValidator extends PluginExtensionPoint { Map options = null, String samplesheetParam ) { - def String baseDir = session.baseDir def Map params = session.params // Set defaults for optional inputs def String schemaFilename = options?.containsKey('parameters_schema') ? options.parameters_schema as String : 'nextflow_schema.json' - def Boolean skipDuplicateCheck = options?.containsKey('skip_duplicate_check') ? options.skip_duplicate_check as Boolean : params.validationSkipDuplicateCheck ? params.validationSkipDuplicateCheck as Boolean : false + def String baseDir = session.baseDir.toString() + // Get the samplesheet schema from the parameters schema def slurper = new JsonSlurper() - def Map parsed = (Map) slurper.parse( Path.of(getSchemaPath(baseDir, schemaFilename)) ) + def Map parsed = (Map) slurper.parse( Path.of(Utils.getSchemaPath(baseDir, schemaFilename)) ) def Map samplesheetValue = (Map) findDeep(parsed, samplesheetParam) def Path samplesheetFile = params[samplesheetParam] as Path - if (samplesheetFile == null) { - log.error "Parameter '--$samplesheetParam' was not provided. Unable to create a channel from it." + + // Some safeguard to make sure the channel factory runs correctly + if (samplesheetValue == null) { + log.error """ +Parameter '--$samplesheetParam' was not found in the schema ($schemaFilename). +Unable to create a channel from it. + +Please make sure you correctly specified the inputs to `.fromSamplesheet`: + +-------------------------------------------------------------------------------------- +Channel.fromSamplesheet("input") +-------------------------------------------------------------------------------------- + +This would create a channel from params.input using the schema specified in the parameters JSON schema for this parameter. +""" throw new SchemaValidationException("", []) } - def Path schemaFile = null - if (samplesheetValue == null) { - log.error "Parameter '--$samplesheetParam' was not found in the schema ($schemaFilename). Unable to create a channel from it." + else if (samplesheetFile == null) { + log.error "Parameter '--$samplesheetParam' was not provided. Unable to create a channel from it." throw new SchemaValidationException("", []) } - else if (samplesheetValue.containsKey('schema')) { - schemaFile = Path.of(getSchemaPath(baseDir, samplesheetValue['schema'].toString())) - } else { + else if (!samplesheetValue.containsKey('schema')) { log.error "Parameter '--$samplesheetParam' does not contain a schema in the parameter schema ($schemaFilename). Unable to create a channel from it." throw new SchemaValidationException("", []) } + + // Convert to channel + final channel = CH.create() + def List arrayChannel = [] + try { + def Path schemaFile = Path.of(Utils.getSchemaPath(baseDir, samplesheetValue['schema'].toString())) + def SamplesheetConverter converter = new SamplesheetConverter(samplesheetFile, schemaFile) + arrayChannel = converter.convertToList() + } catch (Exception e) { + log.error( + """ Following error has been found during samplesheet conversion: + ${e} + ${e.getStackTrace().join("\n\t")} - log.debug "Starting validation: '$samplesheetFile' with '$schemaFile'" - - // Validate samplesheet - def String fileType = SamplesheetConverter.getFileType(samplesheetFile) - def String delimiter = fileType == "csv" ? "," : fileType == "tsv" ? "\t" : null - def List> fileContent - def List> fileContentCasted - def Boolean s3PathCheck = params.validationS3PathCheck ? params.validationS3PathCheck : false - def Map types = variableTypes(schemaFile.toString(), baseDir) - if (types.find{ it.value == "array" } as Boolean && fileType in ["csv", "tsv"]){ - def msg = "Using \"type\": \"array\" in schema with a \".$fileType\" samplesheet is not supported\n" - log.error("ERROR: Validation of pipeline parameters failed!") - throw new SchemaValidationException(msg, []) - } - def Boolean containsHeader = !(types.keySet().size() == 1 && types.keySet()[0] == "") +Please run validateParameters() first before trying to convert a samplesheet to a channel. +Reference: https://nextflow-io.github.io/nf-validation/parameters/validation/ - if(!containsHeader){ - types = ["empty": types[""]] - } - if(fileType == "yaml"){ - fileContentCasted = new Yaml().load((samplesheetFile.text)).collect { - if(containsHeader) { - return it as Map - } - return ["empty": it] as Map - } - } - else if(fileType == "json"){ - fileContentCasted = new JsonSlurper().parseText(samplesheetFile.text).collect { - if(containsHeader) { - return it as Map - } - return ["empty": it] as Map - } - } - else { - fileContent = samplesheetFile.splitCsv(header:containsHeader ?: ["empty"], strip:true, sep:delimiter, quote:'\"') - fileContentCasted = castToType(fileContent, types) - } - if (validateFile(false, samplesheetFile.toString(), fileContentCasted, schemaFile.toString(), baseDir, s3PathCheck)) { - log.debug "Validation passed: '$samplesheetFile' with '$schemaFile'" +Also make sure that the same schema is used for validation and conversion of the samplesheet +""" as String + ) } - // Convert to channel - final channel = CH.create() - List arrayChannel = SamplesheetConverter.convertToList(samplesheetFile, schemaFile, skipDuplicateCheck) session.addIgniter { arrayChannel.each { channel.bind(it) @@ -253,9 +225,6 @@ class SchemaValidator extends PluginExtensionPoint { if( !params.containsKey("validationSchemaIgnoreParams") ) { params.validationSchemaIgnoreParams = false } - if( !params.containsKey("validationSkipDuplicateCheck") ) { - params.validationSkipDuplicateCheck = false - } if( !params.containsKey("validationS3PathCheck") ) { params.validationS3PathCheck = false } @@ -299,7 +268,7 @@ class SchemaValidator extends PluginExtensionPoint { ) { def Map params = initialiseExpectedParams(session.params) - def String baseDir = session.baseDir + def String baseDir = session.baseDir.toString() def Boolean s3PathCheck = params.validationS3PathCheck ? params.validationS3PathCheck : false def Boolean useMonochromeLogs = options?.containsKey('monochrome_logs') ? options.monochrome_logs as Boolean : params.monochrome_logs ? params.monochrome_logs as Boolean : @@ -307,7 +276,7 @@ class SchemaValidator extends PluginExtensionPoint { false def String schemaFilename = options?.containsKey('parameters_schema') ? options.parameters_schema as String : 'nextflow_schema.json' log.debug "Starting parameters validation" - + // Clean the parameters def cleanedParams = cleanParameters(params) // Convert to JSONObject @@ -316,15 +285,15 @@ class SchemaValidator extends PluginExtensionPoint { //=====================================================================// // Check for nextflow core params and unexpected params def slurper = new JsonSlurper() - def Map parsed = (Map) slurper.parse( Path.of(getSchemaPath(baseDir, schemaFilename)) ) - def Map schemaParams = (Map) parsed.get('definitions') + def Map parsed = (Map) slurper.parse( Path.of(Utils.getSchemaPath(baseDir, schemaFilename)) ) + def Map schemaParams = (Map) parsed.get('defs') def specifiedParamKeys = params.keySet() // Collect expected parameters from the schema def enumsTuple = collectEnums(schemaParams) def List expectedParams = (List) enumsTuple[0] + addExpectedParams() def Map enums = (Map) enumsTuple[1] - // Collect expected parameters from the schema when parameters are specified outside of "definitions" + // Collect expected parameters from the schema when parameters are specified outside of "defs" if (parsed.containsKey('properties')) { def enumsTupleTopLevel = collectEnums(['top_level': ['properties': parsed.get('properties')]]) expectedParams += (List) enumsTupleTopLevel[0] @@ -332,15 +301,6 @@ class SchemaValidator extends PluginExtensionPoint { } //=====================================================================// - // Check if files or directories exist - def List pathsToCheck = (List) collectExists(schemaParams) - pathsToCheck.each { - if (params[it]) { - pathExists(params[it].toString(), it.toString(), s3PathCheck) - } - } - - def Boolean lenientMode = params.validationLenientMode ? params.validationLenientMode : false def Boolean failUnrecognisedParams = params.validationFailUnrecognisedParams ? params.validationFailUnrecognisedParams : false for (String specifiedParam in specifiedParamKeys) { @@ -368,16 +328,8 @@ class SchemaValidator extends PluginExtensionPoint { //=====================================================================// // Validate parameters against the schema - def String schema_string = Files.readString( Path.of(getSchemaPath(baseDir, schemaFilename)) ) - final rawSchema = new JSONObject(new JSONTokener(schema_string)) - final SchemaLoader schemaLoader = SchemaLoader.builder() - .schemaJson(rawSchema) - .addFormatValidator("file-path", new FilePathValidator()) - .addFormatValidator("directory-path", new DirectoryPathValidator()) - .addFormatValidator("path", new PathValidator()) - .addFormatValidator("file-path-pattern", new FilePathPatternValidator()) - .build() - final schema = schemaLoader.load().build() + def String schema_string = Files.readString( Path.of(Utils.getSchemaPath(baseDir, schemaFilename)) ) + def validator = new JsonSchemaValidator() // check for warnings if( this.hasWarnings() ) { @@ -389,298 +341,17 @@ class SchemaValidator extends PluginExtensionPoint { def colors = logColours(useMonochromeLogs) // Validate - try { - if (lenientMode) { - // Create new validator with LENIENT mode - Validator validator = Validator.builder() - .primitiveValidationStrategy(PrimitiveValidationStrategy.LENIENT) - .build(); - validator.performValidation(schema, paramsJSON); - } else { - schema.validate(paramsJSON) - } - if (this.hasErrors()) { - // Needed when validationFailUnrecognisedParams is true - def msg = "${colors.red}The following invalid input values have been detected:\n\n" + this.getErrors().join('\n').trim() + "\n${colors.reset}\n" - log.error("ERROR: Validation of pipeline parameters failed!") - throw new SchemaValidationException(msg, this.getErrors()) - } - } catch (ValidationException e) { - JSONObject exceptionJSON = (JSONObject) e.toJSON() - collectErrors(exceptionJSON, paramsJSON, enums, rawSchema) - def msg = "${colors.red}The following invalid input values have been detected:\n\n" + this.getErrors().join('\n').trim() + "\n${colors.reset}\n" - log.error("ERROR: Validation of pipeline parameters failed!") + List validationErrors = validator.validate(paramsJSON, schema_string) + this.errors.addAll(validationErrors) + if (this.hasErrors()) { + def msg = "${colors.red}The following invalid input values have been detected:\n\n" + errors.join('\n').trim() + "\n${colors.reset}\n" + log.error("Validation of pipeline parameters failed!") throw new SchemaValidationException(msg, this.getErrors()) } - //=====================================================================// - // Look for other schemas to validate - for (group in schemaParams) { - def Map properties = (Map) group.value['properties'] - for (p in properties) { - def String key = (String) p.key - if (!params[key]) { - continue - } - def Map property = properties[key] as Map - if (property.containsKey('schema')) { - def String schema_name = getSchemaPath(baseDir, property['schema'].toString()) - def Path file_path - try { - file_path = Nextflow.file(params[key]) as Path - } catch (IllegalArgumentException e) { - errors << "* --${key}: The file path '${params[key]}' is invalid. Unable to validate file.".toString() - continue - } - log.debug "Starting validation: '$key': '$file_path' with '$schema_name'" - def String fileType = SamplesheetConverter.getFileType(file_path) - def String delimiter = fileType == "csv" ? "," : fileType == "tsv" ? "\t" : null - def List> fileContent - def List> fileContentCasted - def Map types = variableTypes(schema_name, baseDir) - if (types.find{ it.value == "array" } as Boolean && fileType in ["csv", "tsv"]){ - def msg = "${colors.red}Using {\"type\": \"array\"} in schema with a \".$fileType\" samplesheet is not supported${colors.reset}\n" - log.error("ERROR: Validation of pipeline parameters failed!") - throw new SchemaValidationException(msg, []) - } - def Boolean containsHeader = !(types.keySet().size() == 1 && types.keySet()[0] == "") - - if(!containsHeader){ - types = ["empty": types[""]] - } - - if(fileType == "yaml"){ - fileContentCasted = new Yaml().load(file_path.text).collect { - if(containsHeader) { - return it as Map - } - return ["empty": it] as Map - } - } - else if(fileType == "json"){ - fileContentCasted = new JsonSlurper().parseText(file_path.text).collect { - if(containsHeader) { - return it as Map - } - return ["empty": it] as Map - } - } - else { - fileContent = file_path.splitCsv(header:containsHeader ?: ["empty"], strip:true, sep:delimiter, quote:'\"') - fileContentCasted = castToType(fileContent, types) - } - if (validateFile(useMonochromeLogs, key, fileContentCasted, schema_name, baseDir, s3PathCheck)) { - log.debug "Validation passed: '$key': '$file_path' with '$schema_name'" - } - } - } - } - log.debug "Finishing parameters validation" } - - // - // Function to obtain the variable types of properties from a JSON Schema - // - Map variableTypes(String schemaFilename, String baseDir) { - def Map variableTypes = [:] - def String type = '' - - // Read the schema - def slurper = new JsonSlurper() - def Map parsed = (Map) slurper.parse( Path.of(getSchemaPath(baseDir, schemaFilename)) ) - - // Obtain the type of each variable in the schema - def Map properties = (Map) parsed['items']['properties'] - for (p in properties) { - def String key = (String) p.key - def Map property = properties[key] as Map - if (property.containsKey('type')) { - if (property['type'] == 'number') { - type = 'float' - } - else { - type = property['type'] - } - variableTypes[key] = type - } - else { - variableTypes[key] = 'string' // If there isn't a type specifyed, return 'string' to avoid having a null value - } - } - - return variableTypes - } - - - // - // Cast a value to the provided type in a Strict mode - // - Set VALID_BOOLEAN_VALUES = ['true', 'false'] as Set - - List castToType(List rows, Map types) { - def List casted = [] - - for( Map row in rows) { - def Map castedRow = [:] - - for (String key in row.keySet()) { - def String str = row[key] - def String type = types[key] - - try { - if( str == null || str == '' ) castedRow[key] = null - else if( type == null ) castedRow[key] = str - else if( type.toLowerCase() == 'boolean' && str.toLowerCase() in VALID_BOOLEAN_VALUES ) castedRow[key] = str.toBoolean() - else if( type.toLowerCase() == 'character' ) castedRow[key] = str.toCharacter() - else if( type.toLowerCase() == 'short' && str.isNumber() ) castedRow[key] = str.toShort() - else if( type.toLowerCase() == 'integer' && str.isInteger() ) castedRow[key] = str.toInteger() - else if( type.toLowerCase() == 'long' && str.isLong() ) castedRow[key] = str.toLong() - else if( type.toLowerCase() == 'float' && str.isFloat() ) castedRow[key] = str.toFloat() - else if( type.toLowerCase() == 'double' && str.isDouble() ) castedRow[key] = str.toDouble() - else if( type.toLowerCase() == 'string' ) castedRow[key] = str - else { - castedRow[key] = str - } - } catch( Exception e ) { - log.warn "Unable to cast value $str to type $type: $e" - castedRow[key] = str - } - - } - - casted = casted + castedRow - } - - return casted - } - - - // - // Function to validate a file by its schema - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - boolean validateFile( - - Boolean monochrome_logs, String paramName, Object fileContent, String schemaFilename, String baseDir, Boolean s3PathCheck = false - - ) { - // declare this once for the method - def colors = logColours(monochrome_logs) - - // Load the schema - def String schema_string = Files.readString( Path.of(getSchemaPath(baseDir, schemaFilename)) ) - final rawSchema = new JSONObject(new JSONTokener(schema_string)) - final SchemaLoader schemaLoader = SchemaLoader.builder() - .schemaJson(rawSchema) - .addFormatValidator("file-path", new FilePathValidator()) - .addFormatValidator("directory-path", new DirectoryPathValidator()) - .addFormatValidator("path", new PathValidator()) - .addFormatValidator("file-path-pattern", new FilePathPatternValidator()) - .build() - final schema = schemaLoader.load().build() - - // Remove all null values from JSON object - // and convert the groovy object to a JSONArray - def jsonGenerator = new JsonGenerator.Options() - .excludeNulls() - .build() - def JSONArray arrayJSON = new JSONArray(jsonGenerator.toJson(fileContent)) - - //=====================================================================// - // Check for params with expected values - def slurper = new JsonSlurper() - def Map parsed = (Map) slurper.parse( Path.of(getSchemaPath(baseDir, schemaFilename)) ) - def Map schemaParams = (Map) ["items": parsed.get('items')] - - // Collect expected parameters from the schema - def enumsTuple = collectEnums(schemaParams) - def List expectedParams = (List) enumsTuple[0] + addExpectedParams() - def Map enums = (Map) enumsTuple[1] - - //=====================================================================// - // Check if files or directories exist - def List pathsToCheck = (List) collectExists(schemaParams) - pathsToCheck.each { String fieldName -> - for (int i=0; i < arrayJSON.size(); i++) { - def JSONObject entry = arrayJSON.getJSONObject(i) - if ( entry.has(fieldName) && entry[fieldName] instanceof JSONArray ) { - entry[fieldName].collect{ pathExists(it.toString(), " Entry ${(i+1).toString()} - ${fieldName.toString()}", s3PathCheck) } - } - else if ( entry.has(fieldName) ) { - pathExists(entry[fieldName].toString(), " Entry ${(i+1).toString()} - ${fieldName.toString()}", s3PathCheck) - } - } - } - - //=====================================================================// - // Validate - try { - // Create new validator with LENIENT mode - Validator validator = Validator.builder() - .primitiveValidationStrategy(PrimitiveValidationStrategy.LENIENT) - .build(); - validator.performValidation(schema, arrayJSON); - if (this.hasErrors()) { - // Needed for custom errors such as pathExists() errors - def msg = "${colors.red}The following errors have been detected:\n\n" + this.getErrors().join('\n').trim() + "\n${colors.reset}\n" - log.error("ERROR: Validation of '$paramName' file failed!") - throw new SchemaValidationException(msg, this.getErrors()) - } - } catch (ValidationException e) { - JSONObject exceptionJSON = (JSONObject) e.toJSON() - JSONObject objectJSON = new JSONObject(); - objectJSON.put("objects",arrayJSON); - collectErrors(exceptionJSON, objectJSON, enums, rawSchema) - def msg = "${colors.red}The following errors have been detected:\n\n" + this.getErrors().join('\n').trim() + "\n${colors.reset}\n" - log.error("ERROR: Validation of '$paramName' file failed!") - throw new SchemaValidationException(msg, this.getErrors()) - } - - return true - } - - - // - // Function to check if a file or directory exists - // - List pathExists(String path, String paramName, Boolean s3PathCheck) { - if (path.startsWith('s3://') && !s3PathCheck) { - log.debug "Ignoring validation of S3 URL path '${path}'".toString() - } else { - def Path file = Nextflow.file(path) as Path - if (!file.exists()) { - errors << "* --${paramName}: the file or directory '${path}' does not exist.".toString() - } - } - } - - - // - // Function to collect parameters with an exists key in the schema. - // - List collectExists(Map schemaParams) { - def exists = [] - for (group in schemaParams) { - def Map properties = (Map) group.value['properties'] - for (p in properties) { - def String key = (String) p.key - def Map property = properties[key] as Map - if(property.containsKey('items')){ - property = property.items as Map - } - if (property.containsKey('exists') && property.containsKey('format')) { - if (property['exists'] && (property['format'] == 'file-path' || property['format'] == 'directory-path' || property['format'] == 'path') ) { - exists.push(key) - } - } - } - } - return exists - } - - // // Function to collect enums (options) of a parameter and expected parameters (present in the schema) // @@ -729,7 +400,6 @@ class SchemaValidator extends PluginExtensionPoint { String command ) { def Map params = initialiseExpectedParams(session.params) - def String baseDir = session.baseDir def String schemaFilename = options?.containsKey('parameters_schema') ? options.parameters_schema as String : 'nextflow_schema.json' def Boolean useMonochromeLogs = options?.containsKey('monochrome_logs') ? options.monochrome_logs as Boolean : @@ -742,7 +412,7 @@ class SchemaValidator extends PluginExtensionPoint { String output = '' output += 'Typical pipeline command:\n\n' output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad( Path.of(getSchemaPath(baseDir, schemaFilename)) ) + Map params_map = paramsLoad( Path.of(Utils.getSchemaPath(session.baseDir.toString(), schemaFilename)) ) Integer max_chars = paramsMaxChars(params_map) + 1 Integer desc_indent = max_chars + 14 Integer dec_linewidth = 160 - desc_indent @@ -832,7 +502,6 @@ class SchemaValidator extends PluginExtensionPoint { ) { def String schemaFilename = options?.containsKey('parameters_schema') ? options.parameters_schema as String : 'nextflow_schema.json' - def String baseDir = session.baseDir def Map params = session.params // Get a selection of core Nextflow workflow options @@ -857,7 +526,7 @@ class SchemaValidator extends PluginExtensionPoint { // Get pipeline parameters defined in JSON Schema def Map params_summary = [:] - def Map params_map = paramsLoad( Path.of(getSchemaPath(baseDir, schemaFilename)) ) + def Map params_map = paramsLoad( Path.of(Utils.getSchemaPath(session.baseDir.toString(), schemaFilename)) ) for (group in params_map.keySet()) { def sub_params = new LinkedHashMap() def Map group_params = params_map.get(group) as Map // This gets the parameters of that particular group @@ -910,7 +579,6 @@ class SchemaValidator extends PluginExtensionPoint { WorkflowMetadata workflow ) { - def String baseDir = session.baseDir def Map params = session.params def String schemaFilename = options?.containsKey('parameters_schema') ? options.parameters_schema as String : 'nextflow_schema.json' @@ -938,105 +606,6 @@ class SchemaValidator extends PluginExtensionPoint { return output } - // - // Loop over nested exceptions and print the causingException - // - private void collectErrors(JSONObject exJSON, JSONObject paramsJSON, Map enums, JSONObject schemaJSON, Integer limit=5) { - def JSONArray causingExceptions = (JSONArray) exJSON['causingExceptions'] - def JSONArray valuesJSON = new JSONArray () - def String validationType = "parameter: --" - if (paramsJSON.has('objects')) { - valuesJSON = (JSONArray) paramsJSON['objects'] - validationType = "value: " - } - def Integer entryNumber = 0 - if (causingExceptions.length() == 0) { - def String pointer = (String) exJSON['pointerToViolation'] - ~/^#\// - def String message = (String) exJSON['message'] - def Pattern p = (Pattern) ~/required key \[([^\]]+)\] not found/ - def Matcher m = message =~ p - // Missing required param - if(m.matches()){ - def List l = m[0] as ArrayList - if (pointer.isNumber()) { - entryNumber = pointer.replace('/', ' - ') as Integer - entryNumber = entryNumber + 1 - errors << "* -- Entry ${entryNumber}: Missing required ${validationType}${l[1]}".toString() - } else { - errors << "* Missing required ${validationType}${l[1]}".toString() - } - } - // Other base-level error - else if(exJSON['pointerToViolation'] == '#'){ - errors << "* ${message}".toString() - } - // Error with specific param - else { - def String param = (String) exJSON['pointerToViolation'] - ~/^#\// - def String paramName = param - def param_val = "" - if (paramsJSON.has('objects')) { - def paramSplit = param.tokenize( '/' ) - int indexInt = paramSplit[0] as int - String paramString = paramSplit[1] as String - paramName = paramString - param_val = valuesJSON[indexInt][paramString].toString() - } else { - param_val = paramsJSON[param].toString() - } - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - def List enums_param = (List) enums[param] - if (enums_param.size() > limit) { - errors << "${error_msg} (${limit} of ${enums_param.size()}): ${enums_param[0..limit-1].join(', ')}, ... )".toString() - } else { - errors << "${error_msg}: ${enums_param.join(', ')})".toString() - } - } else { - if (param.contains('/')) { - entryNumber = param.split('/')[0] as Integer - entryNumber = entryNumber + 1 - def String columnName = param.split('/')[1] - paramName = columnName - param = " Entry ${entryNumber} - ${columnName}" - } - // Custom errorMessage - def String errorMessage - try { - errorMessage = schemaJSON['items']['properties'][paramName]['errorMessage'] - } catch (JSONException) { - def Map paramMap = findDeep(schemaJSON.toMap(), paramName) as Map - errorMessage = paramMap['errorMessage'] - } - if (errorMessage) { - log.debug "* --${param}: ${message} (${param_val})".toString() - message = errorMessage - } - errors << "* --${param}: ${message} (${param_val})".toString() - } - } - errors.unique() - } - for (ex in causingExceptions) { - def JSONObject exception = (JSONObject) ex - collectErrors(exception, paramsJSON, enums, schemaJSON) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(JSONArray json_array, String element) { - def list = [] - int len = json_array.length() - for (int i=0;i + * @author : nvnieuwk + * @author : KevinMenden + */ + +@Slf4j +public class Utils { + + // Function to infer the file type of a samplesheet + public static String getFileType(Path file) { + def String extension = file.getExtension() + if (extension in ["csv", "tsv", "yml", "yaml", "json"]) { + return extension == "yml" ? "yaml" : extension + } + + def String header = getHeader(file) + + def Integer commaCount = header.count(",") + def Integer tabCount = header.count("\t") + + if ( commaCount == tabCount ){ + log.error("Could not derive file type from ${file}. Please specify the file extension (CSV, TSV, YML, YAML and JSON are supported).".toString()) + } + if ( commaCount > tabCount ){ + return "csv" + } + else { + return "tsv" + } + } + + // Function to get the header from a CSV or TSV file + public static String getHeader(Path file) { + def String header + file.withReader { header = it.readLine() } + return header + } + + // Converts a given file to a List + public static List fileToList(Path file, Path schema) { + def String fileType = Utils.getFileType(file) + def String delimiter = fileType == "csv" ? "," : fileType == "tsv" ? "\t" : null + def Map types = variableTypes(schema) + + if (types.find{ it.value == "array" } as Boolean && fileType in ["csv", "tsv"]){ + def msg = "Using \"type\": \"array\" in schema with a \".$fileType\" samplesheet is not supported\n" + log.error("ERROR: Validation of pipeline parameters failed!") + throw new SchemaValidationException(msg, []) + } + + if(fileType == "yaml"){ + return new Yaml().load((file.text)) + } + else if(fileType == "json"){ + return new JsonSlurper().parseText(file.text) as List + } + else { + def Boolean header = getValueFromJson("#/items/properties", new JSONObject(schema.text)) ? true : false + def List fileContent = file.splitCsv(header:header, strip:true, sep:delimiter, quote:'\"') + if (!header) { + // Flatten no header inputs if they contain one value + fileContent = fileContent.collect { it instanceof List && it.size() == 1 ? it[0] : it } + } + + return castToType(fileContent) + } + } + + // Converts a given file to a JSONArray + public static JSONArray fileToJsonArray(Path file, Path schema) { + // Remove all null values from JSON object + // and convert the groovy object to a JSONArray + def jsonGenerator = new JsonGenerator.Options() + .excludeNulls() + .build() + return new JSONArray(jsonGenerator.toJson(fileToList(file, schema))) + } + + // + // Cast a value to the provided type in a Strict mode + // + + public static Object castToType(Object input) { + def Set validBooleanValues = ['true', 'false'] as Set + + if (input instanceof Map) { + // Cast all values in the map + def Map output = [:] + input.each { k, v -> + output[k] = castToType(v) + } + return output + } + else if (input instanceof List) { + // Cast all values in the list + def List output = [] + for( entry : input ) { + output.add(castToType(entry)) + } + return output + } else if (input instanceof String) { + // Cast the string if there is one + if (input == "") { + return null + } + return JSONObject.stringToValue(input) + } + } + + // Resolve Schema path relative to main workflow directory + public static String getSchemaPath(String baseDir, String schemaFilename='nextflow_schema.json') { + if (Path.of(schemaFilename).exists()) { + return schemaFilename + } else { + return "${baseDir}/${schemaFilename}" + } + } + + // Function to obtain the variable types of properties from a JSON Schema + public static Map variableTypes(Path schema) { + def Map variableTypes = [:] + def String type = '' + + // Read the schema + def slurper = new JsonSlurper() + def Map parsed = (Map) slurper.parse( schema ) + + // Obtain the type of each variable in the schema + def Map properties = (Map) parsed['items']['properties'] + for (p in properties) { + def String key = (String) p.key + def Map property = properties[key] as Map + if (property.containsKey('type')) { + if (property['type'] == 'number') { + type = 'float' + } + else { + type = property['type'] + } + variableTypes[key] = type + } + else { + variableTypes[key] = 'string' // If there isn't a type specified, return 'string' to avoid having a null value + } + } + + return variableTypes + } + + // Function to check if a String value is an Integer + public static Boolean isInteger(String input) { + try { + input as Integer + return true + } catch (NumberFormatException e) { + return false + } + } + + // Function to check if a String value is a Float + public static Boolean isFloat(String input) { + try { + input as Float + return true + } catch (NumberFormatException e) { + return false + } + } + + // Function to check if a String value is a Double + public static Boolean isDouble(String input) { + try { + input as Double + return true + } catch (NumberFormatException e) { + return false + } + } + + // Function to get the value from a JSON pointer + public static Object getValueFromJson(String jsonPointer, Object json) { + def JSONPointer schemaPointer = new JSONPointer(jsonPointer) + try { + return schemaPointer.queryFrom(json) ?: "" + } catch (JSONPointerException e) { + return "" + } + } +} \ No newline at end of file diff --git a/plugins/nf-validation/src/test/nextflow/validation/NfValidationTest.groovy b/plugins/nf-validation/src/test/nextflow/validation/NfValidationTest.groovy new file mode 100644 index 0000000..114922c --- /dev/null +++ b/plugins/nf-validation/src/test/nextflow/validation/NfValidationTest.groovy @@ -0,0 +1,76 @@ +package nextflow.validation + +import java.nio.file.Path + +import nextflow.plugin.Plugins +import nextflow.plugin.TestPluginDescriptorFinder +import nextflow.plugin.TestPluginManager +import nextflow.plugin.extension.PluginExtensionProvider +import org.junit.Rule +import org.pf4j.PluginDescriptorFinder +import spock.lang.Shared +import test.Dsl2Spec +import test.OutputCapture +/** + * @author : mirpedrol + * @author : nvnieuwk + * @author : jorgeaguileraseqera + */ +class NfValidationTest extends Dsl2Spec{ + + @Rule + OutputCapture capture = new OutputCapture() + + + @Shared String pluginsMode + + def setup() { + // reset previous instances + PluginExtensionProvider.reset() + // this need to be set *before* the plugin manager class is created + pluginsMode = System.getProperty('pf4j.mode') + System.setProperty('pf4j.mode', 'dev') + // the plugin root should + def root = Path.of('.').toAbsolutePath().normalize() + def manager = new TestPluginManager(root){ + @Override + protected PluginDescriptorFinder createPluginDescriptorFinder() { + return new TestPluginDescriptorFinder(){ + @Override + protected Path getManifestPath(Path pluginPath) { + return pluginPath.resolve('build/resources/main/META-INF/MANIFEST.MF') + } + } + } + } + Plugins.init(root, 'dev', manager) + } + + def cleanup() { + Plugins.stop() + PluginExtensionProvider.reset() + pluginsMode ? System.setProperty('pf4j.mode',pluginsMode) : System.clearProperty('pf4j.mode') + } + + // + // Params validation tests + // + + def 'should import functions' () { + given: + def SCRIPT_TEXT = ''' + include { validateParameters } from 'plugin/nf-validation' + ''' + + when: + dsl_eval(SCRIPT_TEXT) + def stdout = capture + .toString() + .readLines() + .findResults {it.contains('WARN nextflow.validation.SchemaValidator') || it.startsWith('* --') ? it : null } + + then: + noExceptionThrown() + !stdout + } +} diff --git a/plugins/nf-validation/src/test/nextflow/validation/ParamsHelpTest.groovy b/plugins/nf-validation/src/test/nextflow/validation/ParamsHelpTest.groovy new file mode 100644 index 0000000..f9ee44b --- /dev/null +++ b/plugins/nf-validation/src/test/nextflow/validation/ParamsHelpTest.groovy @@ -0,0 +1,178 @@ +package nextflow.validation + +import java.nio.file.Path + +import nextflow.plugin.Plugins +import nextflow.plugin.TestPluginDescriptorFinder +import nextflow.plugin.TestPluginManager +import nextflow.plugin.extension.PluginExtensionProvider +import org.junit.Rule +import org.pf4j.PluginDescriptorFinder +import spock.lang.Shared +import test.Dsl2Spec +import test.OutputCapture + +/** + * @author : mirpedrol + * @author : nvnieuwk + * @author : KevinMenden + */ +class ParamsHelpTest extends Dsl2Spec{ + + @Rule + OutputCapture capture = new OutputCapture() + + + @Shared String pluginsMode + + Path root = Path.of('.').toAbsolutePath().normalize() + Path getRoot() { this.root } + String getRootString() { this.root.toString() } + + def setup() { + // reset previous instances + PluginExtensionProvider.reset() + // this need to be set *before* the plugin manager class is created + pluginsMode = System.getProperty('pf4j.mode') + System.setProperty('pf4j.mode', 'dev') + // the plugin root should + def root = this.getRoot() + def manager = new TestPluginManager(root){ + @Override + protected PluginDescriptorFinder createPluginDescriptorFinder() { + return new TestPluginDescriptorFinder(){ + @Override + protected Path getManifestPath(Path pluginPath) { + return pluginPath.resolve('build/resources/main/META-INF/MANIFEST.MF') + } + } + } + } + Plugins.init(root, 'dev', manager) + } + + def cleanup() { + Plugins.stop() + PluginExtensionProvider.reset() + pluginsMode ? System.setProperty('pf4j.mode',pluginsMode) : System.clearProperty('pf4j.mode') + } + + def 'should print a help message' () { + given: + def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString() + def SCRIPT_TEXT = """ + include { paramsHelp } from 'plugin/nf-validation' + + def command = "nextflow run --input samplesheet.csv --outdir -profile docker" + + def help_msg = paramsHelp(command, parameters_schema: '$schema') + log.info help_msg + """ + + when: + dsl_eval(SCRIPT_TEXT) + def stdout = capture + .toString() + .readLines() + .findResults {it.contains('Typical pipeline command:') || + it.contains('nextflow run') || + it.contains('Input/output options') || + it.contains('--input') || + it.contains('--outdir') || + it.contains('--email') || + it.contains('--multiqc_title') || + it.contains('Reference genome options') || + it.contains('--genome') || + it.contains('--fasta') + ? it : null } + + then: + noExceptionThrown() + stdout.size() == 10 + } + + def 'should print a help message with argument options' () { + given: + def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString() + def SCRIPT_TEXT = """ + include { paramsHelp } from 'plugin/nf-validation' + params.validationShowHiddenParams = true + def command = "nextflow run --input samplesheet.csv --outdir -profile docker" + + def help_msg = paramsHelp(command, parameters_schema: '$schema') + log.info help_msg + """ + + when: + dsl_eval(SCRIPT_TEXT) + def stdout = capture + .toString() + .readLines() + .findResults {it.contains('publish_dir_mode') && + it.contains('(accepted: symlink, rellink, link, copy, copyNoFollow') + ? it : null } + + then: + noExceptionThrown() + stdout.size() == 1 + } + + def 'should print a help message of one parameter' () { + given: + def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString() + def SCRIPT_TEXT = """ + include { paramsHelp } from 'plugin/nf-validation' + params.help = 'publish_dir_mode' + + def command = "nextflow run --input samplesheet.csv --outdir -profile docker" + + def help_msg = paramsHelp(command, parameters_schema: '$schema') + log.info help_msg + """ + + when: + dsl_eval(SCRIPT_TEXT) + def stdout = capture + .toString() + .readLines() + .findResults {it.startsWith('--publish_dir_mode') || + it.contains('type :') || + it.contains('default :') || + it.contains('description:') || + it.contains('help_text :') || + it.contains('fa_icon :') || // fa_icon shouldn't be printed + it.contains('enum :') || + it.contains('hidden :') + ? it : null } + + then: + noExceptionThrown() + stdout.size() == 7 + } + + def 'should fail when help param doesnt exist' () { + given: + def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString() + def SCRIPT_TEXT = """ + include { paramsHelp } from 'plugin/nf-validation' + params.help = 'no_exist' + + def command = "nextflow run --input samplesheet.csv --outdir -profile docker" + + def help_msg = paramsHelp(command, parameters_schema: '$schema') + log.info help_msg + """ + + when: + dsl_eval(SCRIPT_TEXT) + def stdout = capture + .toString() + .readLines() + .findResults {it.startsWith('--no_exist') ? it : null } + + then: + def error = thrown(Exception) + error.message == "Specified param 'no_exist' does not exist in JSON schema." + !stdout + } +} \ No newline at end of file diff --git a/plugins/nf-validation/src/test/nextflow/validation/ParamsSummaryLogTest.groovy b/plugins/nf-validation/src/test/nextflow/validation/ParamsSummaryLogTest.groovy new file mode 100644 index 0000000..0f3b244 --- /dev/null +++ b/plugins/nf-validation/src/test/nextflow/validation/ParamsSummaryLogTest.groovy @@ -0,0 +1,94 @@ +package nextflow.validation + +import java.nio.file.Path + +import nextflow.plugin.Plugins +import nextflow.plugin.TestPluginDescriptorFinder +import nextflow.plugin.TestPluginManager +import nextflow.plugin.extension.PluginExtensionProvider +import org.junit.Rule +import org.pf4j.PluginDescriptorFinder +import spock.lang.Shared +import test.Dsl2Spec +import test.OutputCapture + +/** + * @author : mirpedrol + * @author : nvnieuwk + * @author : KevinMenden + */ +class ParamsSummaryLogTest extends Dsl2Spec{ + + @Rule + OutputCapture capture = new OutputCapture() + + + @Shared String pluginsMode + + Path root = Path.of('.').toAbsolutePath().normalize() + Path getRoot() { this.root } + String getRootString() { this.root.toString() } + + def setup() { + // reset previous instances + PluginExtensionProvider.reset() + // this need to be set *before* the plugin manager class is created + pluginsMode = System.getProperty('pf4j.mode') + System.setProperty('pf4j.mode', 'dev') + // the plugin root should + def root = this.getRoot() + def manager = new TestPluginManager(root){ + @Override + protected PluginDescriptorFinder createPluginDescriptorFinder() { + return new TestPluginDescriptorFinder(){ + @Override + protected Path getManifestPath(Path pluginPath) { + return pluginPath.resolve('build/resources/main/META-INF/MANIFEST.MF') + } + } + } + } + Plugins.init(root, 'dev', manager) + } + + def cleanup() { + Plugins.stop() + PluginExtensionProvider.reset() + pluginsMode ? System.setProperty('pf4j.mode',pluginsMode) : System.clearProperty('pf4j.mode') + } + + def 'should print params summary' () { + given: + def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString() + def SCRIPT_TEXT = """ + params.outdir = "outDir" + include { paramsSummaryLog } from 'plugin/nf-validation' + + def summary_params = paramsSummaryLog(workflow, parameters_schema: '$schema') + log.info summary_params + """ + + when: + dsl_eval(SCRIPT_TEXT) + def stdout = capture + .toString() + .readLines() + .findResults {it.contains('Only displaying parameters that differ from the pipeline defaults') || + it.contains('Core Nextflow options') || + it.contains('runName') || + it.contains('launchDir') || + it.contains('workDir') || + it.contains('projectDir') || + it.contains('userName') || + it.contains('profile') || + it.contains('configFiles') || + it.contains('Input/output options') || + it.contains('outdir') + ? it : null } + + then: + noExceptionThrown() + stdout.size() == 11 + stdout ==~ /.*\[0;34moutdir : .\[0;32moutDir.*/ + } +} \ No newline at end of file diff --git a/plugins/nf-validation/src/test/nextflow/validation/SamplesheetConverterTest.groovy b/plugins/nf-validation/src/test/nextflow/validation/SamplesheetConverterTest.groovy index 3d309cc..5236a1a 100644 --- a/plugins/nf-validation/src/test/nextflow/validation/SamplesheetConverterTest.groovy +++ b/plugins/nf-validation/src/test/nextflow/validation/SamplesheetConverterTest.groovy @@ -13,8 +13,8 @@ import test.Dsl2Spec import test.OutputCapture /** - * @author : Nicolas Vannieuwkerke - * + * @author : mirpedrol + * @author : nvnieuwk */ class SamplesheetConverterTest extends Dsl2Spec{ @@ -77,10 +77,10 @@ class SamplesheetConverterTest extends Dsl2Spec{ then: noExceptionThrown() - stdout.contains("[[string1:fullField, string2:fullField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25.12, false, ${this.getRootString()}/src/testResources/test.txt, ${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/test.txt, unique1, 1, itDoesExist]" as String) + stdout.contains("[[string1:fullField, string2:fullField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25.12, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/test.txt, unique1, 1, itDoesExist]" as String) stdout.contains("[[string1:value, string2:value, integer1:0, integer2:0, boolean1:true, boolean2:true], string1, 25.08, false, [], [], [], [], [], itDoesExist]") stdout.contains("[[string1:dependentRequired, string2:dependentRequired, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, [], [], [], unique2, 1, itDoesExist]") - stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${this.getRootString()}/src/testResources/test.txt, ${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/testDir, unique3, 1, itDoesExist]" as String) + stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/testDir, unique3, 1, itDoesExist]" as String) } def 'should work fine - quoted CSV' () { @@ -104,10 +104,10 @@ class SamplesheetConverterTest extends Dsl2Spec{ then: noExceptionThrown() - stdout.contains("[[string1:fullField, string2:fullField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25.12, false, ${this.getRootString()}/src/testResources/test.txt, ${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/test.txt, unique1, 1, itDoesExist]" as String) + stdout.contains("[[string1:fullField, string2:fullField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25.12, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/test.txt, unique1, 1, itDoesExist]" as String) stdout.contains("[[string1:value, string2:value, integer1:0, integer2:0, boolean1:true, boolean2:true], string1, 25.08, false, [], [], [], [], [], itDoesExist]") stdout.contains("[[string1:dependentRequired, string2:dependentRequired, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, [], [], [], unique2, 1, itDoesExist]") - stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${this.getRootString()}/src/testResources/test.txt, ${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/testDir, unique3, 1, itDoesExist]" as String) + stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/testDir, unique3, 1, itDoesExist]" as String) } def 'should work fine - TSV' () { @@ -131,10 +131,10 @@ class SamplesheetConverterTest extends Dsl2Spec{ then: noExceptionThrown() - stdout.contains("[[string1:fullField, string2:fullField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25.12, false, ${this.getRootString()}/src/testResources/test.txt, ${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/test.txt, unique1, 1, itDoesExist]" as String) + stdout.contains("[[string1:fullField, string2:fullField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25.12, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/test.txt, unique1, 1, itDoesExist]" as String) stdout.contains("[[string1:value, string2:value, integer1:0, integer2:0, boolean1:true, boolean2:true], string1, 25.08, false, [], [], [], [], [], itDoesExist]") stdout.contains("[[string1:dependentRequired, string2:dependentRequired, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, [], [], [], unique2, 1, itDoesExist]") - stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${this.getRootString()}/src/testResources/test.txt, ${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/testDir, unique3, 1, itDoesExist]" as String) + stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/testDir, unique3, 1, itDoesExist]" as String) } def 'should work fine - YAML' () { @@ -158,10 +158,10 @@ class SamplesheetConverterTest extends Dsl2Spec{ then: noExceptionThrown() - stdout.contains("[[string1:fullField, string2:fullField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25.12, false, ${this.getRootString()}/src/testResources/test.txt, ${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/test.txt, unique1, 1, itDoesExist]" as String) + stdout.contains("[[string1:fullField, string2:fullField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25.12, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/test.txt, unique1, 1, itDoesExist]" as String) stdout.contains("[[string1:value, string2:value, integer1:0, integer2:0, boolean1:true, boolean2:true], string1, 25.08, false, [], [], [], [], [], itDoesExist]") stdout.contains("[[string1:dependentRequired, string2:dependentRequired, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, [], [], [], unique2, 1, itDoesExist]") - stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${this.getRootString()}/src/testResources/test.txt, ${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/testDir, unique3, 1, itDoesExist]" as String) + stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/testDir, unique3, 1, itDoesExist]" as String) } def 'should work fine - JSON' () { @@ -185,10 +185,10 @@ class SamplesheetConverterTest extends Dsl2Spec{ then: noExceptionThrown() - stdout.contains("[[string1:fullField, string2:fullField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25.12, false, ${this.getRootString()}/src/testResources/test.txt, ${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/test.txt, unique1, 1, itDoesExist]" as String) + stdout.contains("[[string1:fullField, string2:fullField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25.12, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/test.txt, unique1, 1, itDoesExist]" as String) stdout.contains("[[string1:value, string2:value, integer1:0, integer2:0, boolean1:true, boolean2:true], string1, 25.08, false, [], [], [], [], [], itDoesExist]") stdout.contains("[[string1:dependentRequired, string2:dependentRequired, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, [], [], [], unique2, 1, itDoesExist]") - stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${this.getRootString()}/src/testResources/test.txt, ${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/testDir, unique3, 1, itDoesExist]" as String) + stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/testDir, unique3, 1, itDoesExist]" as String) } def 'arrays should work fine - YAML' () { @@ -212,9 +212,9 @@ class SamplesheetConverterTest extends Dsl2Spec{ then: noExceptionThrown() - stdout.contains("[[array_meta:null], [${this.getRootString()}/src/testResources/testDir/testFile.txt, ${this.getRootString()}/src/testResources/testDir2/testFile2.txt], [${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/testDir2], [${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/testDir2/testFile2.txt], [string1, string2], [25, 26], [25, 26.5], [false, true], [1, 2, 3], [true], [${this.getRootString()}/src/testResources/testDir/testFile.txt], [[${this.getRootString()}/src/testResources/testDir/testFile.txt]]]" as String) - stdout.contains("[[array_meta:[look, an, array, in, meta]], [], [], [], [string1, string2], [25, 26], [25, 26.5], [], [1, 2, 3], [false, true, false], [${this.getRootString()}/src/testResources/testDir/testFile.txt], [[${this.getRootString()}/src/testResources/testDir/testFile.txt]]]" as String) - stdout.contains("[[array_meta:null], [], [], [], [string1, string2], [25, 26], [25, 26.5], [], [1, 2, 3], [false, true, false], [${this.getRootString()}/src/testResources/testDir/testFile.txt], [[${this.getRootString()}/src/testResources/testDir/testFile.txt], [${this.getRootString()}/src/testResources/testDir/testFile.txt, ${this.getRootString()}/src/testResources/testDir2/testFile2.txt]]]" as String) + stdout.contains("[[array_meta:[]], [${getRootString()}/src/testResources/testDir/testFile.txt, ${getRootString()}/src/testResources/testDir2/testFile2.txt], [${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/testDir2], [${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/testDir2/testFile2.txt], [string1, string2], [25, 26], [25, 26.5], [false, true], [1, 2, 3], [true], [${getRootString()}/src/testResources/testDir/testFile.txt], [[${getRootString()}/src/testResources/testDir/testFile.txt]]]" as String) + stdout.contains("[[array_meta:[look, an, array, in, meta]], [], [], [], [string1, string2], [25, 26], [25, 26.5], [], [1, 2, 3], [false, true, false], [${getRootString()}/src/testResources/testDir/testFile.txt], [[${getRootString()}/src/testResources/testDir/testFile.txt]]]" as String) + stdout.contains("[[array_meta:[]], [], [], [], [string1, string2], [25, 26], [25, 26.5], [], [1, 2, 3], [false, true, false], [${getRootString()}/src/testResources/testDir/testFile.txt], [[${getRootString()}/src/testResources/testDir/testFile.txt], [${getRootString()}/src/testResources/testDir/testFile.txt, ${getRootString()}/src/testResources/testDir2/testFile2.txt]]]" as String) } def 'arrays should work fine - JSON' () { @@ -238,102 +238,11 @@ class SamplesheetConverterTest extends Dsl2Spec{ then: noExceptionThrown() - stdout.contains("[[array_meta:null], [${this.getRootString()}/src/testResources/testDir/testFile.txt, ${this.getRootString()}/src/testResources/testDir2/testFile2.txt], [${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/testDir2], [${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/testDir2/testFile2.txt], [string1, string2], [25, 26], [25, 26.5], [false, true], [1, 2, 3], [true], [${this.getRootString()}/src/testResources/testDir/testFile.txt], [[${this.getRootString()}/src/testResources/testDir/testFile.txt]]]" as String) - stdout.contains("[[array_meta:[look, an, array, in, meta]], [], [], [], [string1, string2], [25, 26], [25, 26.5], [], [1, 2, 3], [false, true, false], [${this.getRootString()}/src/testResources/testDir/testFile.txt], [[${this.getRootString()}/src/testResources/testDir/testFile.txt]]]" as String) - stdout.contains("[[array_meta:null], [], [], [], [string1, string2], [25, 26], [25, 26.5], [], [1, 2, 3], [false, true, false], [${this.getRootString()}/src/testResources/testDir/testFile.txt], [[${this.getRootString()}/src/testResources/testDir/testFile.txt], [${this.getRootString()}/src/testResources/testDir/testFile.txt, ${this.getRootString()}/src/testResources/testDir2/testFile2.txt]]]" as String) + stdout.contains("[[array_meta:[]], [${getRootString()}/src/testResources/testDir/testFile.txt, ${getRootString()}/src/testResources/testDir2/testFile2.txt], [${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/testDir2], [${getRootString()}/src/testResources/testDir, ${getRootString()}/src/testResources/testDir2/testFile2.txt], [string1, string2], [25, 26], [25, 26.5], [false, true], [1, 2, 3], [true], [${getRootString()}/src/testResources/testDir/testFile.txt], [[${getRootString()}/src/testResources/testDir/testFile.txt]]]" as String) + stdout.contains("[[array_meta:[look, an, array, in, meta]], [], [], [], [string1, string2], [25, 26], [25, 26.5], [], [1, 2, 3], [false, true, false], [${getRootString()}/src/testResources/testDir/testFile.txt], [[${getRootString()}/src/testResources/testDir/testFile.txt]]]" as String) + stdout.contains("[[array_meta:[]], [], [], [], [string1, string2], [25, 26], [25, 26.5], [], [1, 2, 3], [false, true, false], [${getRootString()}/src/testResources/testDir/testFile.txt], [[${getRootString()}/src/testResources/testDir/testFile.txt], [${getRootString()}/src/testResources/testDir/testFile.txt, ${getRootString()}/src/testResources/testDir2/testFile2.txt]]]" as String) } - def 'array errors before channel conversion - YAML' () { - given: - def SCRIPT_TEXT = ''' - include { fromSamplesheet } from 'plugin/nf-validation' - - params.input = 'src/testResources/error_arrays.yaml' - - workflow { - Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_converter_arrays.json").view() - } - ''' - - when: - dsl_eval(SCRIPT_TEXT) - def stdout = capture - .toString() - .readLines() - .findResults {it.startsWith('[[') ? it : null } - - then: - def error = thrown(SchemaValidationException) - def errorMessages = error.message.readLines() - errorMessages[0] == "\033[0;31mThe following errors have been detected:" - errorMessages[2] == "* -- Entry 1 - field_3: the file or directory 'src/testResources/testDir3' does not exist." - errorMessages[3] == "* -- Entry 1 - field_3: the file or directory 'src/testResources/testDir2/testFile3.txt' does not exist." - errorMessages[4] == "* -- Entry 1 - field_2: the file or directory 'src/testResources/testDir3' does not exist." - errorMessages[5] == "* -- Entry 1 - field_1: the file or directory 'src/testResources/testDir/testFile.fasta' does not exist." - errorMessages[6] == "* -- Entry 1 - field_1: the file or directory 'src/testResources/testDir2/testFile3.txt' does not exist." - errorMessages[7] == '* -- Entry 1 - field_4: array items are not unique (["string2","string2","string1"])' - errorMessages[8] == '* -- Entry 1 - field_1: string [src/testResources/testDir/testFile.fasta] does not match pattern ^.*\\.txt$ (["src/testResources/testDir/testFile.fasta","src/testResources/testDir2/testFile3.txt"])' - errorMessages[9] == "* -- Entry 1 - field_5: expected maximum item count: 3, found: 4 ([25,25,27,28])" - errorMessages[10] == "* -- Entry 1 - field_6: array items are not unique ([25,25])" - errorMessages[11] == "* -- Entry 2: Missing required value: field_4" - errorMessages[12] == "* -- Entry 2 - field_5: expected minimum item count: 2, found: 1 ([25])" - errorMessages[13] == "* -- Entry 3 - field_4: expected type: JSONArray, found: String (abc)" - !stdout - } - - def 'array errors samplesheet format - CSV' () { - given: - def SCRIPT_TEXT = ''' - include { fromSamplesheet } from 'plugin/nf-validation' - - params.input = 'src/testResources/correct.csv' - - workflow { - Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_converter_arrays.json").view() - } - ''' - - when: - dsl_eval(SCRIPT_TEXT) - def stdout = capture - .toString() - .readLines() - .findResults {it.startsWith('[[') ? it : null } - - then: - def error = thrown(SchemaValidationException) - def errorMessages = error.message.readLines() - errorMessages[0] == 'Using "type": "array" in schema with a ".csv" samplesheet is not supported' - !stdout - } - - def 'array errors samplesheet format - TSV' () { - given: - def SCRIPT_TEXT = ''' - include { fromSamplesheet } from 'plugin/nf-validation' - - params.input = 'src/testResources/correct.tsv' - - workflow { - Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_converter_arrays.json").view() - } - ''' - - when: - dsl_eval(SCRIPT_TEXT) - def stdout = capture - .toString() - .readLines() - .findResults {it.startsWith('[[') ? it : null } - - then: - def error = thrown(SchemaValidationException) - def errorMessages = error.message.readLines() - errorMessages[0] == 'Using "type": "array" in schema with a ".tsv" samplesheet is not supported' - !stdout - } - - def 'no header - CSV' () { given: def SCRIPT_TEXT = ''' @@ -351,12 +260,11 @@ class SamplesheetConverterTest extends Dsl2Spec{ def stdout = capture .toString() .readLines() - .findResults {it.startsWith('[') ? it : null } then: noExceptionThrown() - stdout.contains("[test_1]") - stdout.contains("[test_2]") + stdout.contains("test_1") + stdout.contains("test_2") } def 'no header - YAML' () { @@ -376,12 +284,11 @@ class SamplesheetConverterTest extends Dsl2Spec{ def stdout = capture .toString() .readLines() - .findResults {it.startsWith('[') ? it : null } then: noExceptionThrown() - stdout.contains("[test_1]") - stdout.contains("[test_2]") + stdout.contains("test_1") + stdout.contains("test_2") } def 'no header - JSON' () { @@ -401,12 +308,11 @@ class SamplesheetConverterTest extends Dsl2Spec{ def stdout = capture .toString() .readLines() - .findResults {it.startsWith('[') ? it : null } then: noExceptionThrown() - stdout.contains("[test_1]") - stdout.contains("[test_2]") + stdout.contains("test_1") + stdout.contains("test_2") } def 'extra field' () { @@ -426,14 +332,18 @@ class SamplesheetConverterTest extends Dsl2Spec{ def stdout = capture .toString() .readLines() + .collect { + it.split("nextflow.validation.SamplesheetConverter - ")[-1] + } then: noExceptionThrown() - stdout.contains("\tThe samplesheet contains following unchecked field(s): [extraField]") - stdout.contains("[[string1:fullField, string2:fullField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${this.getRootString()}/src/testResources/test.txt, ${this.getRootString()}/src/testResources/testDir, [], unique1, 1, itDoesExist]" as String) + stdout.contains("Found the following unidentified headers in src/testResources/extraFields.csv:") + stdout.contains("\t- extraField") + stdout.contains("[[string1:fullField, string2:fullField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, [], unique1, 1, itDoesExist]" as String) stdout.contains("[[string1:value, string2:value, integer1:0, integer2:0, boolean1:true, boolean2:true], string1, 25, false, [], [], [], [], [], itDoesExist]") stdout.contains("[[string1:dependentRequired, string2:dependentRequired, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, [], [], [], unique2, 1, itDoesExist]") - stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${this.getRootString()}/src/testResources/test.txt, ${this.getRootString()}/src/testResources/testDir, [], unique3, 1, itDoesExist]" as String) + stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${getRootString()}/src/testResources/test.txt, ${getRootString()}/src/testResources/testDir, [], unique3, 1, itDoesExist]" as String) } def 'no meta' () { @@ -460,15 +370,15 @@ class SamplesheetConverterTest extends Dsl2Spec{ stdout.contains("[test1, test2]") } - def 'errors' () { + def 'deeply nested samplesheet - YAML' () { given: def SCRIPT_TEXT = ''' include { fromSamplesheet } from 'plugin/nf-validation' - params.input = 'src/testResources/errors.csv' + params.input = 'src/testResources/deeply_nested.yaml' workflow { - Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_converter.json").view() + Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_deeply_nested_samplesheet.json").view() } ''' @@ -477,63 +387,22 @@ class SamplesheetConverterTest extends Dsl2Spec{ def stdout = capture .toString() .readLines() - .findResults {it.startsWith('[[') ? it : null } - - then: - def error = thrown(SchemaValidationException) - def errorMessages = error.message.readLines() - errorMessages[0] == "Samplesheet errors:" - errorMessages[1] == "\tEntry 1: [field_2, field_3] field(s) should be defined when 'field_1' is specified, but the field(s) [field_2] is/are not defined." - errorMessages[2] == "\tEntry 3: The 'field_10' value needs to be unique. 'non_unique' was found at least twice in the samplesheet." - errorMessages[3] == "\tEntry 3: The combination of 'field_11' with fields [field_10] needs to be unique. [field_11:1, field_10:non_unique] was found at least twice." - !stdout - } - - def 'errors before channel conversion' () { - given: - def SCRIPT_TEXT = ''' - include { fromSamplesheet } from 'plugin/nf-validation' - - params.input = 'src/testResources/errorsBeforeConversion.csv' - - workflow { - Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_converter.json").view() - } - ''' - - when: - dsl_eval(SCRIPT_TEXT) - def stdout = capture - .toString() - .readLines() - .findResults {it.startsWith('[[') ? it : null } + .findResults {it.startsWith('[') ? it : null } then: - def error = thrown(SchemaValidationException) - def errorMessages = error.message.readLines() - errorMessages[0] == "\033[0;31mThe following errors have been detected:" - errorMessages[2] == "* -- Entry 1 - field_9: the file or directory 'non_existing_path' does not exist." - errorMessages[3] == "* -- Entry 1 - field_7: the file or directory 'non_existing_file.tsv' does not exist." - errorMessages[4] == '* -- Entry 1 - field_7: string [non_existing_file.tsv] does not match pattern ^.*\\.txt$ (non_existing_file.tsv)' - errorMessages[5] == "* -- Entry 1 - field_8: 'src/testResources/test.txt' is not a directory, but a file (src/testResources/test.txt)" - errorMessages[6] == "* -- Entry 1 - field_5: expected type: Number, found: String (string)" - errorMessages[7] == "* -- Entry 1 - field_6: expected type: Boolean, found: String (20)" - errorMessages[8] == "* -- Entry 2: Missing required value: field_4" - errorMessages[9] == "* -- Entry 2: Missing required value: field_6" - errorMessages[10] == "* -- Entry 3 - field_3: expected type: Boolean, found: String (3333)" - errorMessages[11] == "* -- Entry 3 - field_2: expected type: Integer, found: String (false)" - !stdout + noExceptionThrown() + stdout.contains("[[mapMeta:this is in a map, arrayMeta:[metaString45, metaString478], otherArrayMeta:[metaString45, metaString478], meta:metaValue, metaMap:[entry1:entry1String, entry2:12.56]], [[string1, string2], string3, 1, 1, ${getRootString()}/file1.txt], [string4, string5, string6], [[string7, string8], [string9, string10]], test]" as String) } - def 'duplicates' () { + def 'deeply nested samplesheet - JSON' () { given: def SCRIPT_TEXT = ''' include { fromSamplesheet } from 'plugin/nf-validation' - params.input = 'src/testResources/duplicate.csv' + params.input = 'src/testResources/deeply_nested.json' workflow { - Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_converter.json").view() + Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_deeply_nested_samplesheet.json").view() } ''' @@ -542,13 +411,10 @@ class SamplesheetConverterTest extends Dsl2Spec{ def stdout = capture .toString() .readLines() - .findResults {it.startsWith('[[') ? it : null } + .findResults {it.startsWith('[') ? it : null } then: - def error = thrown(SchemaValidationException) - def errorMessages = error.message.readLines() - errorMessages[0] == "Samplesheet errors:" - errorMessages[4] == "\tThe samplesheet contains duplicate rows for entry 2 and entry 3 ([field_4:string1, field_5:25, field_6:false])" - !stdout + noExceptionThrown() + stdout.contains("[[mapMeta:this is in a map, arrayMeta:[metaString45, metaString478], otherArrayMeta:[metaString45, metaString478], meta:metaValue, metaMap:[entry1:entry1String, entry2:12.56]], [[string1, string2], string3, 1, 1, ${getRootString()}/file1.txt], [string4, string5, string6], [[string7, string8], [string9, string10]], test]" as String) } } diff --git a/plugins/nf-validation/src/test/nextflow/validation/PluginExtensionMethodsTest.groovy b/plugins/nf-validation/src/test/nextflow/validation/ValidateParametersTest.groovy similarity index 69% rename from plugins/nf-validation/src/test/nextflow/validation/PluginExtensionMethodsTest.groovy rename to plugins/nf-validation/src/test/nextflow/validation/ValidateParametersTest.groovy index 8669197..eb76f16 100644 --- a/plugins/nf-validation/src/test/nextflow/validation/PluginExtensionMethodsTest.groovy +++ b/plugins/nf-validation/src/test/nextflow/validation/ValidateParametersTest.groovy @@ -11,11 +11,13 @@ import org.pf4j.PluginDescriptorFinder import spock.lang.Shared import test.Dsl2Spec import test.OutputCapture + /** - * @author : jorge - * + * @author : mirpedrol + * @author : nvnieuwk + * @author : jorgeaguileraseqera */ -class PluginExtensionMethodsTest extends Dsl2Spec{ +class ValidateParametersTest extends Dsl2Spec{ @Rule OutputCapture capture = new OutputCapture() @@ -23,6 +25,10 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ @Shared String pluginsMode + Path root = Path.of('.').toAbsolutePath().normalize() + Path getRoot() { this.root } + String getRootString() { this.root.toString() } + def setup() { // reset previous instances PluginExtensionProvider.reset() @@ -30,7 +36,7 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ pluginsMode = System.getProperty('pf4j.mode') System.setProperty('pf4j.mode', 'dev') // the plugin root should - def root = Path.of('.').toAbsolutePath().normalize() + def root = this.getRoot() def manager = new TestPluginManager(root){ @Override protected PluginDescriptorFinder createPluginDescriptorFinder() { @@ -51,28 +57,6 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ pluginsMode ? System.setProperty('pf4j.mode',pluginsMode) : System.clearProperty('pf4j.mode') } - // - // Params validation tests - // - - def 'should import functions' () { - given: - def SCRIPT_TEXT = ''' - include { validateParameters } from 'plugin/nf-validation' - ''' - - when: - dsl_eval(SCRIPT_TEXT) - def stdout = capture - .toString() - .readLines() - .findResults {it.contains('WARN nextflow.validation.SchemaValidator') || it.startsWith('* --') ? it : null } - - then: - noExceptionThrown() - !stdout - } - def 'should validate when no params' () { given: def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString() @@ -92,7 +76,11 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ then: def error = thrown(SchemaValidationException) - error.message == "The following invalid input values have been detected:\n\n* Missing required parameter: --input\n* Missing required parameter: --outdir\n\n" + error.message == """The following invalid input values have been detected: + +* Missing required parameter(s): input, outdir + +""" !stdout } @@ -238,11 +226,15 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ then: def error = thrown(SchemaValidationException) def errorMessages = error.message.readLines() - errorMessages[0] == "\033[0;31mThe following errors have been detected:" - errorMessages[2] == "* -- Entry 1: Missing required value: sample" - errorMessages[3] == "* -- Entry 1 - strandedness: Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded' (weird)" - errorMessages[4] == "* -- Entry 1 - fastq_2: FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz' (test1_fastq2.fasta)" - errorMessages[5] == "* -- Entry 2 - sample: Sample name must be provided and cannot contain spaces (test 2)" + errorMessages[0] == "\033[0;31mThe following invalid input values have been detected:" + errorMessages[1] == "" + errorMessages[2] == "* --input (src/testResources/wrong.csv): Validation of file failed:" + errorMessages[3] == "\t-> Entry 1: Error for field 'strandedness' (weird): Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded'" + errorMessages[4] == "\t-> Entry 1: Error for field 'fastq_2' (test1_fastq2.fasta): \"test1_fastq2.fasta\" does not match regular expression [^\\S+\\.f(ast)?q\\.gz\$]" + errorMessages[5] == "\t-> Entry 1: Error for field 'fastq_2' (test1_fastq2.fasta): \"test1_fastq2.fasta\" is longer than 0 characters" + errorMessages[6] == "\t-> Entry 1: Error for field 'fastq_2' (test1_fastq2.fasta): FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + errorMessages[7] == "\t-> Entry 1: Missing required field(s): sample" + errorMessages[8] == "\t-> Entry 2: Error for field 'sample' (test 2): Sample name must be provided and cannot contain spaces" !stdout } @@ -267,11 +259,15 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ then: def error = thrown(SchemaValidationException) def errorMessages = error.message.readLines() - errorMessages[0] == "\033[0;31mThe following errors have been detected:" - errorMessages[2] == "* -- Entry 1: Missing required value: sample" - errorMessages[3] == "* -- Entry 1 - strandedness: Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded' (weird)" - errorMessages[4] == "* -- Entry 1 - fastq_2: FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz' (test1_fastq2.fasta)" - errorMessages[5] == "* -- Entry 2 - sample: Sample name must be provided and cannot contain spaces (test 2)" + errorMessages[0] == "\033[0;31mThe following invalid input values have been detected:" + errorMessages[1] == "" + errorMessages[2] == "* --input (src/testResources/wrong.tsv): Validation of file failed:" + errorMessages[3] == "\t-> Entry 1: Error for field 'strandedness' (weird): Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded'" + errorMessages[4] == "\t-> Entry 1: Error for field 'fastq_2' (test1_fastq2.fasta): \"test1_fastq2.fasta\" does not match regular expression [^\\S+\\.f(ast)?q\\.gz\$]" + errorMessages[5] == "\t-> Entry 1: Error for field 'fastq_2' (test1_fastq2.fasta): \"test1_fastq2.fasta\" is longer than 0 characters" + errorMessages[6] == "\t-> Entry 1: Error for field 'fastq_2' (test1_fastq2.fasta): FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + errorMessages[7] == "\t-> Entry 1: Missing required field(s): sample" + errorMessages[8] == "\t-> Entry 2: Error for field 'sample' (test 2): Sample name must be provided and cannot contain spaces" !stdout } @@ -296,11 +292,15 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ then: def error = thrown(SchemaValidationException) def errorMessages = error.message.readLines() - errorMessages[0] == "\033[0;31mThe following errors have been detected:" - errorMessages[2] == "* -- Entry 1: Missing required value: sample" - errorMessages[3] == "* -- Entry 1 - strandedness: Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded' (weird)" - errorMessages[4] == "* -- Entry 1 - fastq_2: FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz' (test1_fastq2.fasta)" - errorMessages[5] == "* -- Entry 2 - sample: Sample name must be provided and cannot contain spaces (test 2)" + errorMessages[0] == "\033[0;31mThe following invalid input values have been detected:" + errorMessages[1] == "" + errorMessages[2] == "* --input (src/testResources/wrong.yaml): Validation of file failed:" + errorMessages[3] == "\t-> Entry 1: Error for field 'strandedness' (weird): Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded'" + errorMessages[4] == "\t-> Entry 1: Error for field 'fastq_2' (test1_fastq2.fasta): \"test1_fastq2.fasta\" does not match regular expression [^\\S+\\.f(ast)?q\\.gz\$]" + errorMessages[5] == "\t-> Entry 1: Error for field 'fastq_2' (test1_fastq2.fasta): \"test1_fastq2.fasta\" is longer than 0 characters" + errorMessages[6] == "\t-> Entry 1: Error for field 'fastq_2' (test1_fastq2.fasta): FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + errorMessages[7] == "\t-> Entry 1: Missing required field(s): sample" + errorMessages[8] == "\t-> Entry 2: Error for field 'sample' (test 2): Sample name must be provided and cannot contain spaces" !stdout } @@ -325,11 +325,15 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ then: def error = thrown(SchemaValidationException) def errorMessages = error.message.readLines() - errorMessages[0] == "\033[0;31mThe following errors have been detected:" - errorMessages[2] == "* -- Entry 1: Missing required value: sample" - errorMessages[3] == "* -- Entry 1 - strandedness: Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded' (weird)" - errorMessages[4] == "* -- Entry 1 - fastq_2: FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz' (test1_fastq2.fasta)" - errorMessages[5] == "* -- Entry 2 - sample: Sample name must be provided and cannot contain spaces (test 2)" + errorMessages[0] == "\033[0;31mThe following invalid input values have been detected:" + errorMessages[1] == "" + errorMessages[2] == "* --input (src/testResources/wrong.json): Validation of file failed:" + errorMessages[3] == "\t-> Entry 1: Error for field 'strandedness' (weird): Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded'" + errorMessages[4] == "\t-> Entry 1: Error for field 'fastq_2' (test1_fastq2.fasta): \"test1_fastq2.fasta\" does not match regular expression [^\\S+\\.f(ast)?q\\.gz\$]" + errorMessages[5] == "\t-> Entry 1: Error for field 'fastq_2' (test1_fastq2.fasta): \"test1_fastq2.fasta\" is longer than 0 characters" + errorMessages[6] == "\t-> Entry 1: Error for field 'fastq_2' (test1_fastq2.fasta): FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + errorMessages[7] == "\t-> Entry 1: Missing required field(s): sample" + errorMessages[8] == "\t-> Entry 2: Error for field 'sample' (test 2): Sample name must be provided and cannot contain spaces" !stdout } @@ -431,7 +435,11 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ then: def error = thrown(SchemaValidationException) - error.message == "The following invalid input values have been detected:\n\n* --outdir: expected type: String, found: Integer (10)\n\n" + error.message == """The following invalid input values have been detected: + +* --outdir (10): Value is [integer] but should be [string] + +""" !stdout } @@ -460,33 +468,6 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ !stdout } - def 'should find validation errors for enum' () { - given: - def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString() - def SCRIPT_TEXT = """ - params.monochrome_logs = true - params.input = 'src/testResources/correct.csv' - params.outdir = 'src/testResources/testDir' - params.publish_dir_mode = 'incorrect' - params.max_time = '10.day' - include { validateParameters } from 'plugin/nf-validation' - - validateParameters(parameters_schema: '$schema', monochrome_logs: params.monochrome_logs) - """ - - when: - dsl_eval(SCRIPT_TEXT) - def stdout = capture - .toString() - .readLines() - .findResults {it.contains('WARN nextflow.validation.SchemaValidator') || it.startsWith('* --') ? it : null } - - then: - def error = thrown(SchemaValidationException) - error.message == "The following invalid input values have been detected:\n\n* --publish_dir_mode: 'incorrect' is not a valid choice (Available choices (5 of 6): symlink, rellink, link, copy, copyNoFollow, ... )\n\n" - !stdout - } - def 'correct validation of integers' () { given: def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString() @@ -558,7 +539,11 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ then: def error = thrown(SchemaValidationException) - error.message == "The following invalid input values have been detected:\n\n* Missing required parameter: --integer\n* Missing required parameter: --number\n\n" + error.message == """The following invalid input values have been detected: + +* Missing required parameter(s): number, integer + +""" !stdout } @@ -611,9 +596,8 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString() def SCRIPT_TEXT = """ params.input = 'src/testResources/correct.csv' - params.outdir = 'src/testResources/testDir' + params.outdir = 1 params.validationLenientMode = true - params.max_cpus = '4' include { validateParameters } from 'plugin/nf-validation' validateParameters(parameters_schema: '$schema') @@ -653,21 +637,19 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ then: def error = thrown(SchemaValidationException) - error.message == "The following invalid input values have been detected:\n\n* --max_cpus: expected type: Integer, found: BigDecimal (1.2)\n\n" + error.message == "The following invalid input values have been detected:\n\n* --max_cpus (1.2): Value is [number] but should be [integer]\n\n" !stdout } - def 'should fail because of wrong pattern' () { + + def 'should validate a schema from an input file' () { given: - def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString() + def schema = Path.of('src/testResources/nextflow_schema_with_samplesheet.json').toAbsolutePath().toString() def SCRIPT_TEXT = """ - params.monochrome_logs = true - params.input = 'src/testResources/correct.csv' - params.outdir = 'src/testResources/testDir' - params.max_memory = '10' + params.input = 'src/testResources/samplesheet.csv' include { validateParameters } from 'plugin/nf-validation' - validateParameters(parameters_schema: '$schema', monochrome_logs: params.monochrome_logs) + validateParameters(parameters_schema: '$schema') """ when: @@ -677,87 +659,20 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ .readLines() .findResults {it.contains('WARN nextflow.validation.SchemaValidator') || it.startsWith('* --') ? it : null } - then: - def error = thrown(SchemaValidationException) - error.message == '''The following invalid input values have been detected:\n\n* --max_memory: string [10] does not match pattern ^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$ (10)\n\n''' - !stdout - } - - // - // --help argument tests - // - - def 'should print a help message' () { - given: - def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString() - def SCRIPT_TEXT = """ - include { paramsHelp } from 'plugin/nf-validation' - - def command = "nextflow run --input samplesheet.csv --outdir -profile docker" - - def help_msg = paramsHelp(command, parameters_schema: '$schema') - log.info help_msg - """ - - when: - dsl_eval(SCRIPT_TEXT) - def stdout = capture - .toString() - .readLines() - .findResults {it.contains('Typical pipeline command:') || - it.contains('nextflow run') || - it.contains('Input/output options') || - it.contains('--input') || - it.contains('--outdir') || - it.contains('--email') || - it.contains('--multiqc_title') || - it.contains('Reference genome options') || - it.contains('--genome') || - it.contains('--fasta') - ? it : null } - - then: - noExceptionThrown() - stdout.size() == 10 - } - - def 'should print a help message with argument options' () { - given: - def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString() - def SCRIPT_TEXT = """ - include { paramsHelp } from 'plugin/nf-validation' - params.validationShowHiddenParams = true - def command = "nextflow run --input samplesheet.csv --outdir -profile docker" - - def help_msg = paramsHelp(command, parameters_schema: '$schema') - log.info help_msg - """ - - when: - dsl_eval(SCRIPT_TEXT) - def stdout = capture - .toString() - .readLines() - .findResults {it.contains('publish_dir_mode') && - it.contains('(accepted: symlink, rellink, link, copy, copyNoFollow') - ? it : null } - then: noExceptionThrown() - stdout.size() == 1 + !stdout } - def 'should print a help message of one parameter' () { + def 'should fail because of wrong file pattern' () { given: - def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString() + def schema = Path.of('src/testResources/nextflow_schema_with_samplesheet.json').toAbsolutePath().toString() def SCRIPT_TEXT = """ - include { paramsHelp } from 'plugin/nf-validation' - params.help = 'publish_dir_mode' - - def command = "nextflow run --input samplesheet.csv --outdir -profile docker" + params.monochrome_logs = true + params.input = 'src/testResources/samplesheet_wrong_pattern.csv' + include { validateParameters } from 'plugin/nf-validation' - def help_msg = paramsHelp(command, parameters_schema: '$schema') - log.info help_msg + validateParameters(parameters_schema: '$schema', monochrome_logs: params.monochrome_logs) """ when: @@ -765,98 +680,29 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ def stdout = capture .toString() .readLines() - .findResults {it.startsWith('--publish_dir_mode') || - it.contains('type :') || - it.contains('default :') || - it.contains('description:') || - it.contains('help_text :') || - it.contains('fa_icon :') || // fa_icon shouldn't be printed - it.contains('enum :') || - it.contains('hidden :') - ? it : null } + .findResults {it.contains('WARN nextflow.validation.SchemaValidator') || it.startsWith('* --') ? it : null } then: - noExceptionThrown() - stdout.size() == 7 - } + def error = thrown(SchemaValidationException) + error.message == """The following invalid input values have been detected: - def 'should fail when help param doesnt exist' () { - given: - def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString() - def SCRIPT_TEXT = """ - include { paramsHelp } from 'plugin/nf-validation' - params.help = 'no_exist' +* --input (src/testResources/samplesheet_wrong_pattern.csv): Validation of file failed: +\t-> Entry 1: Error for field 'fastq_1' (test1_fastq1.txt): FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz' +\t-> Entry 2: Error for field 'fastq_1' (test2_fastq1.txt): FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz' - def command = "nextflow run --input samplesheet.csv --outdir -profile docker" - - def help_msg = paramsHelp(command, parameters_schema: '$schema') - log.info help_msg - """ - - when: - dsl_eval(SCRIPT_TEXT) - def stdout = capture - .toString() - .readLines() - .findResults {it.startsWith('--no_exist') ? it : null } - - then: - def error = thrown(Exception) - error.message == "Specified param 'no_exist' does not exist in JSON schema." +""" !stdout } - // - // Summary of params tests - // - - def 'should print params summary' () { - given: - def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString() - def SCRIPT_TEXT = """ - params.outdir = "outDir" - include { paramsSummaryLog } from 'plugin/nf-validation' - - def summary_params = paramsSummaryLog(workflow, parameters_schema: '$schema') - log.info summary_params - """ - - when: - dsl_eval(SCRIPT_TEXT) - def stdout = capture - .toString() - .readLines() - .findResults {it.contains('Only displaying parameters that differ from the pipeline defaults') || - it.contains('Core Nextflow options') || - it.contains('runName') || - it.contains('launchDir') || - it.contains('workDir') || - it.contains('projectDir') || - it.contains('userName') || - it.contains('profile') || - it.contains('configFiles') || - it.contains('Input/output options') || - it.contains('outdir') - ? it : null } - - then: - noExceptionThrown() - stdout.size() == 11 - stdout ==~ /.*\[0;34moutdir : .\[0;32moutDir.*/ - } - - // - // Samplesheet validation tests - // - - def 'should validate a schema from an input file' () { + def 'should fail because of missing required value' () { given: def schema = Path.of('src/testResources/nextflow_schema_with_samplesheet.json').toAbsolutePath().toString() def SCRIPT_TEXT = """ - params.input = 'src/testResources/samplesheet.csv' + params.monochrome_logs = true + params.input = 'src/testResources/samplesheet_no_required.csv' include { validateParameters } from 'plugin/nf-validation' - - validateParameters(parameters_schema: '$schema') + + validateParameters(parameters_schema: '$schema', monochrome_logs: params.monochrome_logs) """ when: @@ -867,18 +713,25 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ .findResults {it.contains('WARN nextflow.validation.SchemaValidator') || it.startsWith('* --') ? it : null } then: - noExceptionThrown() + def error = thrown(SchemaValidationException) + error.message == '''The following invalid input values have been detected: + +* --input (src/testResources/samplesheet_no_required.csv): Validation of file failed: +\t-> Entry 1: Missing required field(s): sample +\t-> Entry 2: Missing required field(s): strandedness, sample +\t-> Entry 3: Missing required field(s): sample + +''' !stdout } - def 'should fail because of wrong file pattern' () { + def 'should fail because of wrong draft' () { given: - def schema = Path.of('src/testResources/nextflow_schema_with_samplesheet.json').toAbsolutePath().toString() + def schema = Path.of('src/testResources/nextflow_schema_draft7.json').toAbsolutePath().toString() def SCRIPT_TEXT = """ params.monochrome_logs = true - params.input = 'src/testResources/samplesheet_wrong_pattern.csv' include { validateParameters } from 'plugin/nf-validation' - + validateParameters(parameters_schema: '$schema', monochrome_logs: params.monochrome_logs) """ @@ -891,18 +744,17 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ then: def error = thrown(SchemaValidationException) - error.message == '''The following errors have been detected:\n\n* -- Entry 1 - fastq_1: FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz' (test1_fastq1.txt)\n* -- Entry 2 - fastq_1: FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz' (test2_fastq1.txt)\n\n''' !stdout } - def 'should fail because of missing required value' () { + def 'should fail because of existing file' () { given: - def schema = Path.of('src/testResources/nextflow_schema_with_samplesheet.json').toAbsolutePath().toString() + def schema = Path.of('src/testResources/nextflow_schema_with_exists_false.json').toAbsolutePath().toString() def SCRIPT_TEXT = """ params.monochrome_logs = true - params.input = 'src/testResources/samplesheet_no_required.csv' + params.outdir = "src/testResources/" include { validateParameters } from 'plugin/nf-validation' - + validateParameters(parameters_schema: '$schema', monochrome_logs: params.monochrome_logs) """ @@ -915,18 +767,22 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ then: def error = thrown(SchemaValidationException) - error.message == '''The following errors have been detected:\n\n* -- Entry 1: Missing required value: sample\n* -- Entry 2: Missing required value: sample\n\n''' + error.message == '''The following invalid input values have been detected: + +* --outdir (src/testResources/): the file or directory 'src/testResources/' should not exist + +''' !stdout } - def 'should fail because of arrays with csv' () { + def 'should fail because of non-unique entries' () { given: - def schema = Path.of('src/testResources/nextflow_schema_with_samplesheet_converter_arrays.json').toAbsolutePath().toString() + def schema = Path.of('src/testResources/nextflow_schema_with_samplesheet_uniqueEntries.json').toAbsolutePath().toString() def SCRIPT_TEXT = """ params.monochrome_logs = true - params.input = 'src/testResources/correct.csv' + params.input = "src/testResources/samplesheet_non_unique.csv" include { validateParameters } from 'plugin/nf-validation' - + validateParameters(parameters_schema: '$schema', monochrome_logs: params.monochrome_logs) """ @@ -939,7 +795,13 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ then: def error = thrown(SchemaValidationException) - error.message == '''Using {"type": "array"} in schema with a ".csv" samplesheet is not supported\n''' + error.message == '''The following invalid input values have been detected: + +* --input (src/testResources/samplesheet_non_unique.csv): Validation of file failed: + -> Entry 3: Detected non-unique combination of the following fields: [sample, fastq_1] + +''' !stdout } -} \ No newline at end of file + +} diff --git a/plugins/nf-validation/src/testResources/deeply_nested.json b/plugins/nf-validation/src/testResources/deeply_nested.json new file mode 100644 index 0000000..0b141e4 --- /dev/null +++ b/plugins/nf-validation/src/testResources/deeply_nested.json @@ -0,0 +1,34 @@ +[ + { + "map": { + "arrayTest": [ + "string1", + "string2" + ], + "stringTest": "string3", + "numberTest": 1, + "integerTest": 1, + "fileTest": "file1.txt", + "mapMeta": "this is in a map" + }, + "array": [ + "string4", + "string5", + "string6" + ], + "arrayInArray": [ + [ "string7", "string8" ], + [ "string9", "string10" ] + ], + "arrayMeta": [ + "metaString45", + "metaString478" + ], + "value": "test", + "meta": "metaValue", + "metaMap": { + "entry1": "entry1String", + "entry2": 12.56 + } + } +] \ No newline at end of file diff --git a/plugins/nf-validation/src/testResources/deeply_nested.yaml b/plugins/nf-validation/src/testResources/deeply_nested.yaml new file mode 100644 index 0000000..2d5f873 --- /dev/null +++ b/plugins/nf-validation/src/testResources/deeply_nested.yaml @@ -0,0 +1,25 @@ +- map: + arrayTest: + - string1 + - string2 + stringTest: string3 + numberTest: 1 + integerTest: 1 + fileTest: file1.txt + mapMeta: this is in a map + array: + - string4 + - string5 + - string6 + arrayInArray: [ + [ "string7", "string8" ], + [ "string9", "string10" ] + ] + arrayMeta: + - "metaString45" + - "metaString478" + value: test + meta: metaValue + metaMap: + entry1: entry1String + entry2: 12.56 \ No newline at end of file diff --git a/plugins/nf-validation/src/testResources/nextflow_schema.json b/plugins/nf-validation/src/testResources/nextflow_schema.json index 5c8a10d..5bb7c6e 100644 --- a/plugins/nf-validation/src/testResources/nextflow_schema.json +++ b/plugins/nf-validation/src/testResources/nextflow_schema.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", "title": "nf-core/testpipeline pipeline parameters", "description": "this is a test", "type": "object", - "definitions": { + "defs": { "input_output_options": { "title": "Input/output options", "type": "object", @@ -250,19 +250,19 @@ }, "allOf": [ { - "$ref": "#/definitions/input_output_options" + "$ref": "#/defs/input_output_options" }, { - "$ref": "#/definitions/reference_genome_options" + "$ref": "#/defs/reference_genome_options" }, { - "$ref": "#/definitions/institutional_config_options" + "$ref": "#/defs/institutional_config_options" }, { - "$ref": "#/definitions/max_job_request_options" + "$ref": "#/defs/max_job_request_options" }, { - "$ref": "#/definitions/generic_options" + "$ref": "#/defs/generic_options" } ] } diff --git a/plugins/nf-validation/src/testResources/nextflow_schema_draft7.json b/plugins/nf-validation/src/testResources/nextflow_schema_draft7.json new file mode 100644 index 0000000..006e945 --- /dev/null +++ b/plugins/nf-validation/src/testResources/nextflow_schema_draft7.json @@ -0,0 +1,25 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", + "title": "nf-core/testpipeline pipeline parameters", + "description": "this is a test", + "type": "object", + "defs": { + "file_patterns": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "properties": { + "glob": { + "type": "string", + "format": "file-path-pattern" + } + } + } + }, + "allOf": [ + { + "$ref": "#/defs/file_patterns" + } + ] +} diff --git a/plugins/nf-validation/src/testResources/nextflow_schema_file_path_pattern.json b/plugins/nf-validation/src/testResources/nextflow_schema_file_path_pattern.json index e6d6e53..e0fc8a4 100644 --- a/plugins/nf-validation/src/testResources/nextflow_schema_file_path_pattern.json +++ b/plugins/nf-validation/src/testResources/nextflow_schema_file_path_pattern.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", "title": "nf-core/testpipeline pipeline parameters", "description": "this is a test", "type": "object", - "definitions": { + "defs": { "file_patterns": { "title": "Input/output options", "type": "object", @@ -19,7 +19,7 @@ }, "allOf": [ { - "$ref": "#/definitions/file_patterns" + "$ref": "#/defs/file_patterns" } ] } diff --git a/plugins/nf-validation/src/testResources/nextflow_schema_required_numerics.json b/plugins/nf-validation/src/testResources/nextflow_schema_required_numerics.json index 22c98c4..cc27278 100644 --- a/plugins/nf-validation/src/testResources/nextflow_schema_required_numerics.json +++ b/plugins/nf-validation/src/testResources/nextflow_schema_required_numerics.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", "title": "nf-core/testpipeline pipeline parameters", "description": "this is a test", "type": "object", - "definitions": { + "defs": { "input_output_options": { "title": "Input/output options", "type": "object", @@ -53,10 +53,10 @@ }, "allOf": [ { - "$ref": "#/definitions/input_output_options" + "$ref": "#/defs/input_output_options" }, { - "$ref": "#/definitions/numeric_options" + "$ref": "#/defs/numeric_options" } ] } diff --git a/plugins/nf-validation/src/testResources/nextflow_schema_with_deeply_nested_samplesheet.json b/plugins/nf-validation/src/testResources/nextflow_schema_with_deeply_nested_samplesheet.json new file mode 100644 index 0000000..8377896 --- /dev/null +++ b/plugins/nf-validation/src/testResources/nextflow_schema_with_deeply_nested_samplesheet.json @@ -0,0 +1,28 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", + "title": "nf-core/testpipeline pipeline parameters", + "description": "this is a test", + "type": "object", + "defs": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["input"], + "properties": { + "input": { + "type": "string", + "format": "file-path", + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$", + "schema": "src/testResources/samplesheet_schema_deeply_nested.json", + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).", + "fa_icon": "fas fa-file-csv" + } + } + } + } +} diff --git a/plugins/nf-validation/src/testResources/nextflow_schema_with_exists_false.json b/plugins/nf-validation/src/testResources/nextflow_schema_with_exists_false.json new file mode 100644 index 0000000..4c30cf9 --- /dev/null +++ b/plugins/nf-validation/src/testResources/nextflow_schema_with_exists_false.json @@ -0,0 +1,27 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", + "title": "nf-core/testpipeline pipeline parameters", + "description": "this is a test", + "type": "object", + "defs": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "properties": { + "outdir": { + "type": "string", + "format": "directory-path", + "exists": false + } + } + } + }, + "allOf": [ + { + "$ref": "#/defs/input_output_options" + } + ] +} diff --git a/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet.json b/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet.json index 2473e18..f88e51a 100644 --- a/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet.json +++ b/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", "title": "nf-core/testpipeline pipeline parameters", "description": "this is a test", "type": "object", - "definitions": { + "defs": { "input_output_options": { "title": "Input/output options", "type": "object", @@ -20,9 +20,15 @@ "schema": "src/testResources/samplesheet_schema.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).", - "fa_icon": "fas fa-file-csv" + "fa_icon": "fas fa-file-csv", + "exists": true } } } - } + }, + "allOf": [ + { + "$ref": "#/defs/input_output_options" + } + ] } diff --git a/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet_converter.json b/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet_converter.json index af503ce..b411b16 100644 --- a/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet_converter.json +++ b/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet_converter.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", "title": "nf-core/testpipeline pipeline parameters", "description": "this is a test", "type": "object", - "definitions": { + "defs": { "input_output_options": { "title": "Input/output options", "type": "object", diff --git a/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet_no_header.json b/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet_no_header.json index 6132f32..c37c0b4 100644 --- a/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet_no_header.json +++ b/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet_no_header.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", "title": "nf-core/testpipeline pipeline parameters", "description": "this is a test", "type": "object", - "definitions": { + "defs": { "input_output_options": { "title": "Input/output options", "type": "object", diff --git a/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet_no_meta.json b/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet_no_meta.json index 1f4b660..6c7a022 100644 --- a/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet_no_meta.json +++ b/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet_no_meta.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", "title": "nf-core/testpipeline pipeline parameters", "description": "this is a test", "type": "object", - "definitions": { + "defs": { "input_output_options": { "title": "Input/output options", "type": "object", diff --git a/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet_uniqueEntries.json b/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet_uniqueEntries.json new file mode 100644 index 0000000..1c113b8 --- /dev/null +++ b/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet_uniqueEntries.json @@ -0,0 +1,34 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json", + "title": "nf-core/testpipeline pipeline parameters", + "description": "this is a test", + "type": "object", + "defs": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["input"], + "properties": { + "input": { + "type": "string", + "format": "file-path", + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$", + "schema": "src/testResources/samplesheet_schema_uniqueEntries.json", + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).", + "fa_icon": "fas fa-file-csv", + "exists": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/defs/input_output_options" + } + ] +} diff --git a/plugins/nf-validation/src/testResources/no_header_schema.json b/plugins/nf-validation/src/testResources/no_header_schema.json index 89194a4..ccfd576 100644 --- a/plugins/nf-validation/src/testResources/no_header_schema.json +++ b/plugins/nf-validation/src/testResources/no_header_schema.json @@ -1,14 +1,9 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "description": "Schema for the file provided with params.input", "type": "array", "items": { - "type": "object", - "properties": { - "": { - "type": "string" - } - } + "type": "string" } } diff --git a/plugins/nf-validation/src/testResources/no_meta_schema.json b/plugins/nf-validation/src/testResources/no_meta_schema.json index ba22d76..e3a028d 100644 --- a/plugins/nf-validation/src/testResources/no_meta_schema.json +++ b/plugins/nf-validation/src/testResources/no_meta_schema.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "description": "Schema for the file provided with params.input", "type": "array", "items": { diff --git a/plugins/nf-validation/src/testResources/samplesheet_no_required.csv b/plugins/nf-validation/src/testResources/samplesheet_no_required.csv index 900cad9..19254fe 100644 --- a/plugins/nf-validation/src/testResources/samplesheet_no_required.csv +++ b/plugins/nf-validation/src/testResources/samplesheet_no_required.csv @@ -1,3 +1,4 @@ fastq_1,fastq_2,strandedness test1_fastq1.fastq.gz,test1_fastq2.fastq.gz,forward -test2_fastq1.fastq.gz,,forward +test2_fastq1.fastq.gz,, +test3_fastq1.fastq.gz,,forward diff --git a/plugins/nf-validation/src/testResources/samplesheet_non_unique.csv b/plugins/nf-validation/src/testResources/samplesheet_non_unique.csv new file mode 100644 index 0000000..c35ba51 --- /dev/null +++ b/plugins/nf-validation/src/testResources/samplesheet_non_unique.csv @@ -0,0 +1,4 @@ +sample,fastq_1,fastq_2,strandedness +test_1,test1_fastq1.fastq.gz,test1_fastq2.fastq.gz,forward +test_2,test2_fastq1.fastq.gz,,unstranded +test_2,test2_fastq1.fastq.gz,,forward diff --git a/plugins/nf-validation/src/testResources/samplesheet_schema.json b/plugins/nf-validation/src/testResources/samplesheet_schema.json index f66ed98..d727b06 100644 --- a/plugins/nf-validation/src/testResources/samplesheet_schema.json +++ b/plugins/nf-validation/src/testResources/samplesheet_schema.json @@ -1,5 +1,7 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/test/test/master/assets/schema_input.json", + "title": "Test schema for samplesheets", "description": "Schema for the file provided with params.input", "type": "array", "items": { diff --git a/plugins/nf-validation/src/testResources/samplesheet_schema_deeply_nested.json b/plugins/nf-validation/src/testResources/samplesheet_schema_deeply_nested.json new file mode 100644 index 0000000..89d2496 --- /dev/null +++ b/plugins/nf-validation/src/testResources/samplesheet_schema_deeply_nested.json @@ -0,0 +1,81 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/test/test/master/assets/schema_input.json", + "title": "Test schema for samplesheets", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "map": { + "type": "object", + "properties": { + "arrayTest": { + "type": "array", + "items": { + "type": "string" + } + }, + "stringTest": { + "type": "string" + }, + "numberTest": { + "type": "number" + }, + "integerTest": { + "type": "integer" + }, + "fileTest": { + "type": "string", + "format": "file-path" + }, + "mapMeta": { + "type": "string", + "meta": "mapMeta" + } + } + }, + "array": { + "type": "array", + "items": { + "type": "string" + } + }, + "arrayInArray": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "arrayMeta": { + "type": "array", + "meta": ["arrayMeta", "otherArrayMeta"], + "items": { + "type": "string" + } + }, + "value": { + "type": "string" + }, + "meta": { + "type": "string", + "meta": "meta" + }, + "metaMap": { + "type": "object", + "meta": "metaMap", + "properties": { + "entry1": { + "type": "string" + }, + "entry2": { + "type": "number" + } + } + } + } + } +} diff --git a/plugins/nf-validation/src/testResources/samplesheet_schema_uniqueEntries.json b/plugins/nf-validation/src/testResources/samplesheet_schema_uniqueEntries.json new file mode 100644 index 0000000..6c4396b --- /dev/null +++ b/plugins/nf-validation/src/testResources/samplesheet_schema_uniqueEntries.json @@ -0,0 +1,41 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/test/test/master/assets/schema_input.json", + "title": "Test schema for samplesheets", + "description": "Schema for the file provided with params.input", + "type": "array", + "uniqueEntries": ["sample", "fastq_1"], + "items": { + "type": "object", + "properties": { + "sample": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Sample name must be provided and cannot contain spaces" + }, + "fastq_1": { + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "fastq_2": { + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz$" + }, + { + "type": "string", + "maxLength": 0 + } + ] + }, + "strandedness": { + "type": "string", + "errorMessage": "Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded'", + "enum": ["forward", "reverse", "unstranded"] + } + } + } +} diff --git a/plugins/nf-validation/src/testResources/schema_input.json b/plugins/nf-validation/src/testResources/schema_input.json index b896f13..bf790e2 100644 --- a/plugins/nf-validation/src/testResources/schema_input.json +++ b/plugins/nf-validation/src/testResources/schema_input.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nextflow-io/nf-validation/master/plugins/nf-validation/src/testResources/schema_input.json", "title": "Samplesheet validation schema", "description": "Schema for the samplesheet used in this pipeline", @@ -10,8 +10,7 @@ "field_1": { "type": "string", "meta": ["string1","string2"], - "default": "value", - "dependentRequired": ["field_2", "field_3"] + "default": "value" }, "field_2": { "type": "integer", @@ -49,18 +48,23 @@ "exists": true }, "field_10": { - "type": "string", - "unique": true + "type": "string" }, "field_11": { - "type": "integer", - "unique": ["field_10"] + "type": "integer" }, "field_12": { "type": "string", "default": "itDoesExist" } }, - "required": ["field_4", "field_6"] - } + "required": ["field_4", "field_6"], + "dependentRequired": { + "field_1": ["field_2", "field_3"] + } + }, + "allOf": [ + {"uniqueEntries": ["field_11", "field_10"]}, + {"uniqueEntries": ["field_10"]} + ] } diff --git a/plugins/nf-validation/src/testResources/schema_input_with_arrays.json b/plugins/nf-validation/src/testResources/schema_input_with_arrays.json index 06ebba8..7e4dcfa 100644 --- a/plugins/nf-validation/src/testResources/schema_input_with_arrays.json +++ b/plugins/nf-validation/src/testResources/schema_input_with_arrays.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nextflow-io/nf-validation/master/plugins/nf-validation/src/testResources/schema_input.json", "title": "Samplesheet validation schema", "description": "Schema for the samplesheet used in this pipeline", @@ -89,6 +89,7 @@ "items": { "type": "array", "items": { + "type": "string", "format": "file-path", "pattern": "^.*\\.txt$", "exists": true