diff --git a/.github/workflows/github-actions-demo-yml b/.github/workflows/github-actions-demo-yml deleted file mode 100644 index 8a9c1ffdf..000000000 --- a/.github/workflows/github-actions-demo-yml +++ /dev/null @@ -1,18 +0,0 @@ -name: GitHub Actions Demo -run-name: ${{ github.actor }} is testing out GitHub Actions πŸš€ -on: [push] -jobs: - Explore-GitHub-Actions: - runs-on: ubuntu-latest - steps: - - run: echo "πŸŽ‰ The job was automatically triggered by a ${{ github.event_name }} event." - - run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!" - - run: echo "πŸ”Ž The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}." - - name: Check out repository code - uses: actions/checkout@v3 - - run: echo "πŸ’‘ The ${{ github.repository }} repository has been cloned to the runner." - - run: echo "πŸ–₯️ The workflow is now ready to test your code on the runner." - - name: List files in the repository - run: | - ls ${{ github.workspace }} - - run: echo "🍏 This job's status is ${{ job.status }}." \ No newline at end of file diff --git a/.github/workflows/mk-docs.yaml b/.github/workflows/mk-docs.yaml new file mode 100644 index 000000000..db681603a --- /dev/null +++ b/.github/workflows/mk-docs.yaml @@ -0,0 +1,19 @@ +name: mk-progenetix-docs +on: + push: + branches: + - website-docs +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: 3.x + - run: pip install mkdocs-material + - run: pip install mkdocs-macros-plugin + - run: pip install pymdown-extensions + - run: pip install mkdocs-mermaid2-plugin + - run: pip install mdx_gh_links + - run: mkdocs gh-deploy --force diff --git a/.gitignore b/.gitignore index ff647f764..bd691366a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,3 @@ site .DS_Store models/.DS_Store /.vs -docs/schemas-md diff --git a/bin/SCHEMAS2MD.md b/bin/SCHEMAS2MD.md index cc7ff5b63..7ae735205 100644 --- a/bin/SCHEMAS2MD.md +++ b/bin/SCHEMAS2MD.md @@ -132,7 +132,7 @@ _NB:_ The script was built to work with the Beacon v2 Model schemas and the auth _NB:_ The decission to take YAMLs (and not JSON) as an input is deliberate and made by the author. -_NB:_ The script only processes the `Terms` nested **up to 3 degrees of hierarchy**. Before Adoption of VRS/PHX that limit was OK. +_NB:_ The script only processes the `Terms` nested **up to 3 degrees of hierarchy**. Before Adoption of VRS/PXF that limit was OK. _NB:_ The script also includes the Beacon v2 Models examples from [beacon-v2 repo](https://github.com/ga4gh-beacon/beacon-v2) in JSON format. diff --git a/bin/jsonref2json.py b/bin/_jsonref2json.py similarity index 100% rename from bin/jsonref2json.py rename to bin/_jsonref2json.py diff --git a/bin/beacon_yaml2md.pl b/bin/beacon_yaml2md.pl index 9c8be4f38..96e59c17f 100755 --- a/bin/beacon_yaml2md.pl +++ b/bin/beacon_yaml2md.pl @@ -2,11 +2,11 @@ # # Script to convert Beacon v2 Models schemas to Markdown tables # -# Last Modified: May/05/2022 +# Last Modified: Mar/26/2024 # # Version 2.0.0 # -# Copyright (C) 2021-2022 Manuel Rueda (manuel.rueda@crg.eu) +# Copyright (C) 2021-2024 Manuel Rueda (manuel.rueda@cnag.eu) # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -236,6 +236,10 @@ sub yaml2md_obj { # We parse $yaml to get paths and more... my ( $base, $dir, $ext ) = fileparse( $yaml, '.yaml' ); $ext =~ s/\.yaml/.md/; + + # Ad hoc fix for two files that have same namex except for uc/lc + # AgeRange == ageRange and Value == value on MacOS cwAPFS (Case insensitive) + $base = $base . '_PXF' if ( $base eq 'AgeRange' || $base eq 'Value' ); my $file = catfile( $mo_dir, $base . $ext ); # Note -> $base.$ext write_file( $file, $out_str ); @@ -278,11 +282,11 @@ sub yaml_slicer { # one YAML file for each property and then re-use code from the 'main' schema ########################################## - # **** Note about VRS / PHX adoption *** # + # **** Note about VRS / PXF adoption *** # ########################################## # The adoption of those standards had technical implications. The script expects objects to have - # for the object and then . VRS/PHX follow JSON schemas that include /oneOf allOf anyOf/ + # for the object and then . VRS/PXF follow JSON schemas that include /oneOf allOf anyOf/ # plus other complex intructions such as . # This becomes a real challenge with $ref as, for instance, in we can not find the key for # 'MolecularVariation', 'SystemicVariation', 'LegacyVariation' @@ -352,7 +356,7 @@ sub yaml_slicer { sub table_content { my ( $yaml_properties, $ra_properties, $headers, $obj, $link ) = @_; - my @lc_headers = map { lc } @$headers; # Copy array uc to avoid modifying original $ref + my @lc_headers = map { lc } @$headers; # Copy array uc to avoid modifying original $ref my $out_str = ''; #---------------------------------------------------------| @@ -394,10 +398,10 @@ sub table_content { if $header eq 'example'; # Slice differentely if $object->{type} eq 'array' - if ($object->{type} eq 'array' ) { - for ('description', 'properties'){ - $value_header = $object->{items}{$_} if $header eq $_; - } + if ( $object->{type} eq 'array' ) { + for ( 'description', 'properties' ) { + $value_header = $object->{items}{$_} if $header eq $_; + } } # Now convert data structure to string @@ -454,7 +458,7 @@ sub ref2str { # string or undef else { - $out_str = defined $data->[0] ? join ', ', @$data : 'NA'; # Note ', ' to allow HTML column rendering + $out_str = defined $data->[0] ? join ', ', @$data : 'NA'; # Note ', ' to allow HTML column rendering } } elsif ( ref $data eq 'HASH' ) { @@ -480,15 +484,20 @@ sub add_external_links { my ( $tmp_str, $key ) = @_; # Note: This is an ad hoc solution to fix errors with deeply-nested data - my @phx = qw( typedQuantities days weeks Quantity high low); - my @vrs = qw(_id state type CURIE Location); + my @pxf = qw( typedQuantities days weeks Quantity high low); + my @vrs = qw(_id state type CURIE Location); my @framework = ("ontologyTerm"); - return ( any { ( $_ eq $key ) } @phx ) + + return ( any { ( $_ eq $key ) } @pxf ) ? "[$key](https://phenopacket-schema.readthedocs.io/en/latest/building-blocks.html)" : ( any { ( $_ eq $key ) } @vrs ) ? "[$key](https://vrs.ga4gh.org/en/stable/terms_and_model.html#$key)" - : ( any { ( $_ eq $key ) } @framework ) - ? "[$key](https://github.com/ga4gh-beacon/beacon-v2/blob/main/framework/src/common/$key.yaml)" + : ( any { ( $_ eq $key ) } @framework ) + ? "[$key](https://github.com/ga4gh-beacon/beacon-v2/blob/main/framework/src/common/$key.yaml)" + + # NB: Ad hoc solution for properties having equal name (lc) + : ( $key eq 'AgeRange' || $key eq 'Value' ) + ? "[$key]($tmp_str/${key}_PXF.md)" : "[$key]($tmp_str/$key.md)"; } @@ -588,7 +597,7 @@ sub create_str_yaml { ## ontologyTerm.yaml is needed due to a bug with jsonref2json.js that overrided "parent" field - my $str_ontologyTerm = < [ 'MolecularVariation', 'SystemicVariation', 'LegacyVariation' ], 'SystemicVariation' => ['CopyNumber'], - 'MolecularVariation' => [ 'Allele', 'Haplotype' ], - 'location' => [ 'CURIE', 'Location' ], + 'MolecularVariation' => [ 'Allele', 'Haplotype' ], + 'location' => [ 'CURIE', 'Location' ], 'state' => [ 'SequenceState', 'SequenceExpression' ], - 'Value' => [ 'Quantity', 'ontologyTerm' ] + 'Value' => [ 'Quantity', 'ontologyTerm' ] }; # We'll be checking @@ -699,14 +708,17 @@ sub parse_json_keywords { # my $const = $pointer->get("/$keyword/$property/$count/properties/type/const"); # $tmp_hash->{properties}{$const} = $elements; #} else{ - my $tmp_term = ( $pointer->contains("/$keyword/$count/title") && $pointer->get("/$keyword/$count/title") ne 'Ontology Term' ) + my $tmp_term = + ( $pointer->contains("/$keyword/$count/title") + && $pointer->get("/$keyword/$count/title") ne + 'Ontology Term' ) ? $pointer->get("/$keyword/$count/title") : @{ $terms->{$property} }[$count]; - $tmp_hash->{properties}{$tmp_term} = $elements if $tmp_term; # Ad-hoc some terms appear duplicated and come empty.... - #} + $tmp_hash->{properties}{$tmp_term} = $elements if $tmp_term; # Ad-hoc some terms appear duplicated and come empty.... + #} $count++; } - $data = $tmp_hash; # Adding new reference + $data = $tmp_hash; # Adding new reference } } return $data; @@ -872,7 +884,7 @@ =head1 HOW TO RUN BEACON_YAML2MD I The decission to take YAMLs (and not JSON) as an input is deliberate and made by the author. -I The script only processes the C nested B. Before Adoption of VRS/PHX that limit was OK. +I The script only processes the C nested B. Before Adoption of VRS/PXF that limit was OK. I The script also includes the Beacon v2 Models examples from L in JSON format. diff --git a/bin/transform_json2md.sh b/bin/transform_json2md.sh index 371471265..ce73254fc 100755 --- a/bin/transform_json2md.sh +++ b/bin/transform_json2md.sh @@ -2,11 +2,11 @@ # # Script to convert Beacon v2 Models to Markdown # -# Last Modified: Jul/20/2022 +# Last Modified: Mar/26/2022 # # Version 2.0.0 # -# Copyright (C) 2021-2022 Manuel Rueda (manuel.rueda@crg.eu) +# Copyright (C) 2021-2024 Manuel Rueda (manuel.rueda@cnag.eu) # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -26,11 +26,12 @@ set -eu mod_dir=../models/json/beacon-v2-default-model fwk_dir=../framework/json -adhoc_url='https://raw.githubusercontent.com/ga4gh-beacon/beacon-v2/main/bin/adhoc' +#adhoc_url='https://raw.githubusercontent.com/g4gh-beacon/beacon-v2/main/bin/adhoc' +adhoc_url='https://raw.githubusercontent.com/mrueda/beacon-v2/main/bin/adhoc' out_dir=./deref_schemas jsonref='node ./jsonref2json.js' yaml2md=./beacon_yaml2md.pl -yaml2json='perl -MYAML -MJSON -0777 -wnl -e' +yaml2json='perl -MYAML::XS -MJSON::XS -0777 -wnl -e' mkdir -p $out_dir/obj @@ -75,7 +76,7 @@ do rm $out_dir/$schema/defaultSchema.mod.json echo "Transforming $schema JSON to YAML ..." - $yaml2json 'print YAML::Dump(decode_json($_))' $out_dir/$schema/defaultSchema.json | perl -pe 's/ \*(\d+)$/ $1/' > $out_dir/$schema/defaultSchema.yaml + $yaml2json 'print YAML::XS::Dump(decode_json($_))' $out_dir/$schema/defaultSchema.json | perl -pe 's/ \*(\d+)$/ $1/' > $out_dir/$schema/defaultSchema.yaml echo "---" done diff --git a/docs/FAQ.md b/docs/FAQ.md index 93f331662..11758f132 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -149,5 +149,16 @@ representations and will be adopted by Beacon v2.n after reaching a release state. +??? faq "Does the Beacon protocol support Boolean expressions? " + + No (...but). Beacon queries as of v2 always assume a logical **AND** between query parameters + and individual filters, _i.e._ all conditions have to be met. There is currently + no support for Boolean expressions. + However, a logical exception is the use of multiple filters for the same parameter which + a Beacon implementation should treat as a logical **OR** since they otherwise + would fail in most instances. E.g. the query using `NCIT:C3493` and `NCIT:C2926` + (mapped against `biosample.histological_diagnosis.id`) would match both + _Lung Non-Small Cell Carcinoma_ (NCIT:C2926) and _Lung Squamous Cell Carcinoma_ + (NCIT:C3493) which are exclusive diagnoses. diff --git a/docs/code-organization.md b/docs/code-organization.md index 4c7769ac6..70e5d4fc2 100644 --- a/docs/code-organization.md +++ b/docs/code-organization.md @@ -22,7 +22,117 @@ Changes to the Markdown files in the `/docs` directory (and its children) will i The `gh-pages` branch is generated from the `/docs` directory through its `mkdocs` workflow and contains the website itself. **Do not edit** -## Topic branches +## Hotfix branches + +These are the branches that are meant to fix some bugs that break the specifications and need an urgent fix. The branches are directly deployed towards the `main` branch. + +### `entry-type-definitions-cleanup` + +This branch aims to redefine the wording of the entry types in a way that makes more clear what are the entry types. As the old definitions could mislead to some confusions, this is a sensible change that is needed to directly affect the current modification. + +Current status:Waiting for PR to be accepted. + +### `hotfix_filteringTermsResults` + +As the beaconFilteringTermsResults.json "type" property of the FilteringTerm is ambiguous, this branch has the objective to fix this and make filtering terms object to be operative and ready to point to the filtering terms type. + +Current status:Waiting for PR to be accepted. + +### `schema-urgent-fixes` + +Some of the instances of the schema were missing attributes and other aspects that are required to make a beacon work. This is mandatory to be urgently fixed and this is the purpose of this branch deployment. + +Current status:Waiting for PR to be accepted. + +## Feature and subfeature branches + +The feature branches are the branches that bring a lot of changes together to change some specific part of the specifications. They can be composed by different subfeature branches that commit to them or just have one single working branch. The feature branches commit to the `develop` branch as they are the changes that will lead beacon to be upgraded to a new version. The subfeature branches commit to their parent feature branch, as they are a microchange of all the aspects that have to change in a new feature that is being developed. The branches are named as the main purpose of them, so it is made very clear what is the working area of them and the subfeature branches add the name of the feature branch they belong as a prefix followed by an underscore. The list of these feature branches with their subfeature branches is the one shown next: + +### `clean-up` + +#### `clean-up_sticky_modified_files` + +Current status:Last commit by @jrambla. + +#### `clean-up_renaming_entity_to_entry_type` + +Current status:Added in framework-refactor-entry-type-definitions branch by @mbaudis and waiting for a PR to be accepted. + +#### `clean-up_decouple-model-framework-refs` + +Current status:Last commit by @mbaudis. + +#### `clean-up_move-CURIE-to-beaconCommonComponents` + +Current status:This branch has not had any commit. + +#### `clean-up_de-snakify-token-names` + +Current status:This branch has not had any commit. + +#### `clean-up_refactor-analysys-pipeline-info` + +Current status:Last commit by @mbaudis. + +### `resultsets` + +#### `resultsets_remove-requirement-results-resultsCount` + +Current status:Last commit by @mbaudis. + +### `requestParameters` + +#### `requestParameters_genomicVariations-fix` + +Current status:Last commit by @mbaudis. + +#### `requestParameters_list-parameters-with-comma` + +Current status:This branch has not had any commit. + +### `phenopackets` + +#### `phenopackets_standards-alignment` + +Current status:Last commit by @mbaudis. + +### `network` + +#### `network_aggregator-and-networks-support` + +Current status:This branch has not had any commit. + +#### `network_meta-add-aggregator-flag` + +Current status:Last commit by @mbaudis. + +### `anyOf` + +#### `anyOf_filteringTerms` + +Current status:This branch has not had any commit. + +### `genomicVariations` + +Current status:Waiting for PR to be accepted. + +### `received-filters` + +Current status:Waiting for PR to be accepted. + +### `response-meta-add-warnings` + +Current status:Waiting for PR to be accepted. + +### `framework-refactor-entry-type-definitions` + +Current status:Waiting for PR to be accepted. + + + + + + ==TBD== diff --git a/docs/filters.md b/docs/filters.md index db8de74b7..ff343c4ba 100644 --- a/docs/filters.md +++ b/docs/filters.md @@ -72,14 +72,19 @@ Alphanumerical value Filter types contain: "type": "alphanumeric", "id": "PATO:0000011", "label": "age" - }, - ... + } ] ``` ## Using Filters in Queries -For all query types, the logical `AND` is implied between Filters. The Filter `id` is required for all query types. +The Filter `id` is required for all query types. + +!!! Note "Boolean Logic Between Filtering Terms" + + Beacon queries as of v2 always omply a logical **AND** between query parameters + and individual filters, _i.e._ all conditions have to be met. There is currently + no support for Boolean expressions. !!! Note "Filters in `GET` Requests" @@ -87,6 +92,12 @@ For all query types, the logical `AND` is implied between Filters. The Filter `i In this case general filter defaults apply (e.g. `{ "includeDescendantTerms": true }`). Generally, use of filters other than CURIE values for filter ids is discouraged. +!!! Attention "List Parameters in GET Requests" + + Since the direct interpretation of list parameters in queries is not supported by + some server environments (e.g. PHP, GO…), list parameters such as `start` and `end` + should be provided as **comma-concatenated** strings when using them in GET requests. + ### CURIE based filters query (type "OntologyFilters") !!! note "Hierarchical term expansion" diff --git a/docs/formats-standards.md b/docs/formats-standards.md index d2da97509..d5f42384d 100644 --- a/docs/formats-standards.md +++ b/docs/formats-standards.md @@ -376,5 +376,5 @@ Beacon directly uses the (IMO preferable) [representation through an ontology te ##### LINK: [Phenopackets Documentation](https://phenopacket-schema.readthedocs.io/en/latest/index.html) -[^1]: Source: [@andrewyatz](https://github.com/@andrewyatz/) at [SchemaBlocks {S}[B]](https://schemablocks.org/standards/genome-coordinates.html) +[^1]: Source: [@andrewyatz](https://github.com/@andrewyatz/) at [GenomeStandards](https://genomestandards.org/standards/genome-coordinates/) diff --git a/docs/framework.md b/docs/framework.md index a9d620cd2..d21521619 100644 --- a/docs/framework.md +++ b/docs/framework.md @@ -67,12 +67,12 @@ Contains the Json schema files that describe the Beacon configuration, its conte Contains the following Json schemas: -* **beaconRequestBody.json:** Schema for the whole Beacon request. It is named `RequestBody` to keep the same nomenclature used by OpenAPI v3, but it actually contains the definition of the whole HTTP POST request payload. -* **beaconRequestMeta.json:** Meta section of the Beacon request. It includes request context details relevant for the Beacon server when processing the request, like the Beacon API version used to format the request or the schemas expected for the entry types in the response. -* **filteringTerms.json:** defines the schema for the filters included in the request. -* **requestParameters.json** defines the, very free, schema of the parameters included in the request. -* **examples-fullDocuments folder:** includes examples of "actual" requests. The example labelled with `MIN` in the name shows the minimal required attributes for the request to be compliant. The example labelled with `MAX` in the name includes a richer case with all the sections filled in. -* **examples-sections folder:** includes examples of "actual" sections of the requests. It is included to allow specification designers and Beacon implementers to check the compliance with a single section instead of having to implement a whole request. Such way, We aim to facilitate an "incremental" implementation of an instance. +* `beaconRequestBody.json:` Schema for the whole Beacon request. It is named `RequestBody` to keep the same nomenclature used by OpenAPI v3, but it actually contains the definition of the whole HTTP POST request payload. +* `beaconRequestMeta.json:` Meta section of the Beacon request. It includes request context details relevant for the Beacon server when processing the request, like the Beacon API version used to format the request or the schemas expected for the entry types in the response. +* `filteringTerms.json:` defines the schema for the filters included in the request. +* `requestParameters.json` defines the, very free, schema of the parameters included in the request. +* `examples-fullDocuments` folder: includes examples of "actual" requests. The example labelled with `MIN` in the name shows the minimal required attributes for the request to be compliant. The example labelled with `MAX` in the name includes a richer case with all the sections filled in. +* `examples-sections` folder: includes examples of "actual" sections of the requests. It is included to allow specification designers and Beacon implementers to check the compliance with a single section instead of having to implement a whole request. Such way, We aim to facilitate an "incremental" implementation of an instance. #### Differences between FilteringTerms and RequestParameters Both, the filters (*filteringTerms*) and the parameters (*requestParameters*), are used to refine the query. The availability of two mechanisms to refine the queries could sound initially confusing, but that separation is taylored to facilitate the interpretation of the request by the Beacon server. @@ -86,34 +86,63 @@ An unrestricted query like `/datasets` should return the list of all datasets in * anything else would probably be a request parameter. ### The Responses + The Beacon concept includes several types of responses: some informative or informational and some with actual data payloads, and the error one. -#### The Informational responses -A Beacon is able to return information, details, about itself. Many of the schema responses included in the `responses` folder have a 1-to-1 relationship with the corresponding configuration documents and their equivalent root endpoints, e.g. the `beaconEntryTypeResponse.json` is the schema of a response that wraps the `beaconConfiguration.json` document, and is then used as the payload of the `/entry_types` root endpoint. Schematically: +#### Informational responses + +A Beacon is able to return information, details, about itself. Many of the schema +responses included in the `responses` folder have a 1-to-1 relationship with the +corresponding configuration documents and their equivalent root endpoints, e.g. +the `beaconEntryTypeResponse.json` is the schema of a response that wraps the +`beaconConfiguration.json` document, and is then used as the payload of the +`/entry_types` root endpoint. Schematically: + * *configuration/an_schema.json*: describes the schema of the configuration file itself. * *responses/an_schema_response.json*: describes the format of the response that returns these configuration information. * *root/endpoints.json*: describes the API endpoints to be called and parameters to be used to retrieve such responses. The following schemas refer to informational responses: *beaconConfigurationResponse*, *beaconEntryTypeResponse*, *beaconFilteringTermsResponse*, Γ’nd *beaconMapResponse*. -#### The results responses +#### Data Responses + A Beacon could return responses at different granularity levels: -* **boolean response:** only returns `exists: true` ('Yes') or `exists: false` ('No') to a given query. -* **count response:** returns `Yes`/`No` and the number of matching results. -* **resultset response:** returns `Yes`/`No`, the number of matching results and details of them per every collection (e.g. every dataset or cohort) and, if granted, details on every record that matches the query. +* **boolean**: only returns `exists: true` ('Yes') or `exists: false` ('No') to a given query. +* **count**: returns `Yes`/`No` and the number of matching results. +* **record** returns `Yes`/`No`, the number of matching results and all documents + corresponding to the requested entities. Documents are wrapped in "result set" + objects for every collection (e.g. every dataset or cohort). Even for _record_ + level responses each beacon can control the details of data exposed in record + besides the minimal requirements of the entry type's schema. Each of these granularity levels has an equivalent response schema: * **boolean**: `beaconBooleanResponse` * **count**: `beaconCountResponse` -* **resultset** (with or w/o record details): `beaconResultSetsResponse` +* **record**: `beaconResultSetsResponse` An additional schema, *beaconCollectionsResponse*, describes such responses that returns details about the collections in a Beacon, but not the collection content themselves. Otherwise said, the response describes a dataset, but not returns the contents of any dataset. -### The common components +### Common Components + +Some elements are transerval to the Framework and to any model, e.g. the schema +for describing an ontology term or the reference to an external schema (like the +reference to GA4GH Phenopackets or GA4GH Service Info schemas). + +#### Pagination - `skip` and `limit` + +Record level responses potentially may return **many** (_i.e._ thousands and beyond) +documents which usually would be "paginated", _i.e._ split into may chunks ("pages"). +Beacon handles _pagination_ through the `skip` and `limit` parameters as part of the +request: + +* `limit` in the request tells the server the maximum number of records that should + be returned in a single response (_i.e._ the "page size") +* `skip` indicates how many of those pages should be skipped over when delivering + the results -Some elements are transerval to the Framework and to any model, e.g. the schema for describing an ontology term or the reference to an external schema (like the reference to GA4GH Phenopackets or GA4GH Service Info schemas). +Therefore, `skip: 2` and `limit: 8` will return records 17-24 (if those exist). ### Testing the compliance of an implementation with *testMode* @@ -136,12 +165,11 @@ Except when testing, most of the Beacon queries are expected to be answered by ' * **defaultGranularity:** Default granularity of the responses. Some responses could return higher detail, but this would be the granularity by default. * **securityLevels:** All access levels supported by the Beacon. Any combination is valid, as every option would apply to different parts of the Beacon. Available options are: -Granularity|Description +Granularity | Description -----------|----------- -`boolean`|returns 'true/false' responses. -`count`|adds the total number of positive results found. -`aggregated`|returns summary, aggregated or distribution like responses per collection. -`record`|returns details for every row. +`boolean` | returns 'true/false' responses. +`count` | adds the total number of positive results found. +`record` | returns details for every row. For those cases where a Beacon prefers to return records with less, not all, attributes, different strategies have been considered, e.g.: keep non-mandatory attributes empty, or Beacon to provide a minimal record definition, but these strategies still need to be tested in real world cases and hence no design decision has been taken yet. diff --git a/docs/img/BeaconGeneQuery-graphics.png b/docs/img/BeaconGeneQuery-graphics.png new file mode 100644 index 000000000..7f75655d4 Binary files /dev/null and b/docs/img/BeaconGeneQuery-graphics.png differ diff --git a/docs/index.md b/docs/index.md index 5f5682d43..6d03a7655 100644 --- a/docs/index.md +++ b/docs/index.md @@ -5,7 +5,7 @@ discovery of genomic (and phenoclinic) data in biomedical research and clinical Beacon facilitates the discovery of genomic variants and biomedical data in single or distributed resources with the goal to empower _federated_ data models - _i.e._ the discovery (and potential retrieval) of data from different -orgnisational and geographic locations. +organisational and geographic locations.
![Beacon v2 Cartoon](img/Beacon-Networks-v2-graphics/Beacon-Networks-v2-graphics-Michael.003.png){: style="width: 600px; margin-top: -30px; margin-bottom: -30px;" } diff --git a/docs/schemas-md/beacon_terms.md b/docs/schemas-md/beacon_terms.md index 79b4edc88..cefc4a524 100644 --- a/docs/schemas-md/beacon_terms.md +++ b/docs/schemas-md/beacon_terms.md @@ -5,7 +5,7 @@ * [ageAtProcedure](./obj/ageAtProcedure.md) * [ageOfOnset](./obj/ageOfOnset.md) * [ageRange](./obj/ageRange.md) -* [AgeRange](./obj/AgeRange.md) +* [AgeRange_PXF](./obj/AgeRange_PXF.md) * [aligner](./obj/aligner.md) * [Allele](./obj/Allele.md) * [alleleFrequency](./obj/alleleFrequency.md) @@ -167,8 +167,8 @@ * [tumorProgression](./obj/tumorProgression.md) * [unit](./obj/unit.md) * [updateDateTime](./obj/updateDateTime.md) -* [Value](./obj/Value.md) * [value](./obj/value.md) +* [Value_PXF](./obj/Value_PXF.md) * [variantAlternativeIds](./obj/variantAlternativeIds.md) * [variantCaller](./obj/variantCaller.md) * [variantInternalId](./obj/variantInternalId.md) diff --git a/docs/schemas-md/obj/AgeRange.md b/docs/schemas-md/obj/AgeRange_PXF.md similarity index 100% rename from docs/schemas-md/obj/AgeRange.md rename to docs/schemas-md/obj/AgeRange_PXF.md diff --git a/docs/schemas-md/obj/Complex Value.md b/docs/schemas-md/obj/Complex Value.md index c2ef54bca..fcfd3be4a 100644 --- a/docs/schemas-md/obj/Complex Value.md +++ b/docs/schemas-md/obj/Complex Value.md @@ -1,3 +1,3 @@ |Term | Description | Type | Properties | Example | Enum| | ---| ---| ---| ---| ---| --- | -| Complex Value | Definition of a complex value class. Provenance: GA4GH Phenopackets v2 `TypedQuantity` | object | [typedQuantities](https://phenopacket-schema.readthedocs.io/en/latest/building-blocks.html) | NA | NA| +| Complex Value | Definition of a complex value class. Provenance: GA4GH Phenopackets v2 `TypedQuantity` | object | [required](./required.md), [typedQuantities](https://phenopacket-schema.readthedocs.io/en/latest/building-blocks.html) | NA | NA| diff --git a/docs/schemas-md/obj/Value.md b/docs/schemas-md/obj/Value_PXF.md similarity index 100% rename from docs/schemas-md/obj/Value.md rename to docs/schemas-md/obj/Value_PXF.md diff --git a/docs/schemas-md/obj/ageAtProcedure.md b/docs/schemas-md/obj/ageAtProcedure.md index 30a208cf2..159874807 100644 --- a/docs/schemas-md/obj/ageAtProcedure.md +++ b/docs/schemas-md/obj/ageAtProcedure.md @@ -1,3 +1,3 @@ |Term | Description | Type | Properties | Example | Enum| | ---| ---| ---| ---| ---| --- | -| ageAtProcedure | NA | oneOf | [Age](./Age.md), [AgeRange](./AgeRange.md), [GestationalAge](./GestationalAge.md), [TimeInterval](./TimeInterval.md) | NA | NA| +| ageAtProcedure | NA | oneOf | [Age](./Age.md), [AgeRange](./AgeRange_PXF.md), [GestationalAge](./GestationalAge.md), [TimeInterval](./TimeInterval.md) | NA | NA| diff --git a/docs/schemas-md/obj/ageOfOnset.md b/docs/schemas-md/obj/ageOfOnset.md index db5e513d8..8d7a32bdd 100644 --- a/docs/schemas-md/obj/ageOfOnset.md +++ b/docs/schemas-md/obj/ageOfOnset.md @@ -1,3 +1,3 @@ |Term | Description | Type | Properties | Example | Enum| | ---| ---| ---| ---| ---| --- | -| ageOfOnset | NA | oneOf | [Age](./Age.md), [AgeRange](./AgeRange.md), [GestationalAge](./GestationalAge.md), [TimeInterval](./TimeInterval.md) | NA | NA| +| ageOfOnset | NA | oneOf | [Age](./Age.md), [AgeRange](./AgeRange_PXF.md), [GestationalAge](./GestationalAge.md), [TimeInterval](./TimeInterval.md) | NA | NA| diff --git a/docs/schemas-md/obj/alternateBases.md b/docs/schemas-md/obj/alternateBases.md index f564a83b5..1b51c83e5 100644 --- a/docs/schemas-md/obj/alternateBases.md +++ b/docs/schemas-md/obj/alternateBases.md @@ -1,3 +1,3 @@ |Term | Description | Type | Properties | Example | Enum| | ---| ---| ---| ---| ---| --- | -| alternateBases | Alternate bases for this variant (starting from `start`). * Accepted values: IUPAC codes for nucleotides (e.g. `https://www.bioinformatics.org/sms/iupac.html`). * N is a wildcard, that denotes the position of any base, and can beused as a standalone base of any type or within a partially knownsequence. As example, a query of `ANNT` the Ns can take take any form of[ACGT] and will match `ANNT`, `ACNT`, `ACCT`, `ACGT` ... and so forth.* an *empty value* is used in the case of deletions with the maximally trimmed, deleted sequence being indicated in `ReferenceBases`* Categorical variant queries, e.g. such *not* being represented through sequence & position, make use of the `variantType` parameter.* Either `alternateBases` or `variantType` is required.' | string | NA | T, G, N, AG, | NA| +| alternateBases | Alternate bases for this variant (starting from `start`). * Accepted values: IUPAC codes for nucleotides (e.g. `https://www.bioinformatics.org/sms/iupac.html`). * N is a wildcard, that denotes the position of any base, and can be used as a standalone base of any type or within a partially known sequence.* an *empty value* is used in the case of deletions with the maximally trimmed, deleted sequence being indicated in `ReferenceBases` | string | NA | T, G, N, AG, | NA| diff --git a/docs/schemas-md/obj/date.md b/docs/schemas-md/obj/date.md index 265b422f2..0a06f4730 100644 --- a/docs/schemas-md/obj/date.md +++ b/docs/schemas-md/obj/date.md @@ -1,3 +1,3 @@ |Term | Description | Type | Properties | Example | Enum| | ---| ---| ---| ---| ---| --- | -| date | Date of the exposure in ISO8601 format. | string | NA | NA | NA| +| date | Date of measurement. Addition compared to Phenopackets model. | string | NA | NA | NA| diff --git a/docs/schemas-md/obj/geneIds.md b/docs/schemas-md/obj/geneIds.md index e6bfc8ee0..f49486aa2 100644 --- a/docs/schemas-md/obj/geneIds.md +++ b/docs/schemas-md/obj/geneIds.md @@ -1,3 +1,3 @@ |Term | Description | Type | Properties | Example | Enum| | ---| ---| ---| ---| ---| --- | -| geneIds | NA | array | NA | `["ACE2"]`,
`["BRCA1"]` | NA| +| geneIds | NA | array | NA | `["ACE2"]`,
`["BRCA1", "ENSG00000012048"]` | NA| diff --git a/docs/schemas-md/obj/id.md b/docs/schemas-md/obj/id.md index 75ad8e9c2..9b1a75368 100644 --- a/docs/schemas-md/obj/id.md +++ b/docs/schemas-md/obj/id.md @@ -1,3 +1,3 @@ |Term | Description | Type | Properties | Example | Enum| | ---| ---| ---| ---| ---| --- | -| id | Run ID. | string | NA | SRR10903401 | NA| +| id | A CURIE identifier, e.g. as `id` for an ontology term. | string | NA | ga4gh:GA.01234abcde, DUO:0000004, orcid:0000-0003-3463-0775, PMID:15254584 | NA| diff --git a/docs/schemas-md/obj/measurementValue.md b/docs/schemas-md/obj/measurementValue.md index 51ef7d07c..7a7d07e97 100644 --- a/docs/schemas-md/obj/measurementValue.md +++ b/docs/schemas-md/obj/measurementValue.md @@ -1,3 +1,3 @@ |Term | Description | Type | Properties | Example | Enum| | ---| ---| ---| ---| ---| --- | -| measurementValue | NA | oneOf | [Complex Value](./Complex Value.md), [Value](./Value.md) | NA | NA| +| measurementValue | NA | oneOf | [Complex Value](./Complex Value.md), [Value](./Value_PXF.md) | NA | NA| diff --git a/docs/schemas-md/obj/notes.md b/docs/schemas-md/obj/notes.md index 34da3f8f7..8c32f7e29 100644 --- a/docs/schemas-md/obj/notes.md +++ b/docs/schemas-md/obj/notes.md @@ -1,3 +1,3 @@ |Term | Description | Type | Properties | Example | Enum| | ---| ---| ---| ---| ---| --- | -| notes | Unstructured text to describe additional properties of this disease instance. | string | NA | Some free text | NA| +| notes | Unstructured text to describe this measurement. Addition compared to Phenopackets model. | string | NA | Some free text | NA| diff --git a/docs/schemas-md/obj/observationMoment.md b/docs/schemas-md/obj/observationMoment.md index fc75d56bb..4e0e8bdf1 100644 --- a/docs/schemas-md/obj/observationMoment.md +++ b/docs/schemas-md/obj/observationMoment.md @@ -1,3 +1,3 @@ |Term | Description | Type | Properties | Example | Enum| | ---| ---| ---| ---| ---| --- | -| observationMoment | NA | oneOf | [Age](./Age.md), [AgeRange](./AgeRange.md), [GestationalAge](./GestationalAge.md), [TimeInterval](./TimeInterval.md) | NA | NA| +| observationMoment | NA | oneOf | [Age](./Age.md), [AgeRange](./AgeRange_PXF.md), [GestationalAge](./GestationalAge.md), [TimeInterval](./TimeInterval.md) | NA | NA| diff --git a/docs/schemas-md/obj/onset.md b/docs/schemas-md/obj/onset.md index fc975e055..254bb2206 100644 --- a/docs/schemas-md/obj/onset.md +++ b/docs/schemas-md/obj/onset.md @@ -1,3 +1,3 @@ |Term | Description | Type | Properties | Example | Enum| | ---| ---| ---| ---| ---| --- | -| onset | NA | oneOf | [Age](./Age.md), [AgeRange](./AgeRange.md), [GestationalAge](./GestationalAge.md), [TimeInterval](./TimeInterval.md) | NA | NA| +| onset | NA | oneOf | [Age](./Age.md), [AgeRange](./AgeRange_PXF.md), [GestationalAge](./GestationalAge.md), [TimeInterval](./TimeInterval.md) | NA | NA| diff --git a/docs/schemas-md/obj/population.md b/docs/schemas-md/obj/population.md index cbe5a0719..f68bc1929 100644 --- a/docs/schemas-md/obj/population.md +++ b/docs/schemas-md/obj/population.md @@ -1,3 +1,3 @@ |Term | Description | Type | Properties | Example | Enum| | ---| ---| ---| ---| ---| --- | -| population | A name for the population. A population could an ethnic, geographical one or just the `members`of a study. | string | NA | East Asian, ICGC Chronic Lymphocytic Leukemia-ES, Men, Children | NA| +| population | A name for the population. A population could an ethnic, geographical one or just the members of a study. | string | NA | East Asian, ICGC Chronic Lymphocytic Leukemia-ES, Men, Children | NA| diff --git a/docs/schemas-md/obj/referenceBases.md b/docs/schemas-md/obj/referenceBases.md index 52d7f35d9..9065277f6 100644 --- a/docs/schemas-md/obj/referenceBases.md +++ b/docs/schemas-md/obj/referenceBases.md @@ -1,3 +1,3 @@ |Term | Description | Type | Properties | Example | Enum| | ---| ---| ---| ---| ---| --- | -| referenceBases | Reference bases for this variant (starting from `start`). * Accepted values: IUPAC codes for nucleotides (e.g. `https://www.bioinformatics.org/sms/iupac.html`). * N is a wildcard, that denotes the position of any base, and can be used as a standalone base of any type or within a partially known sequence. As example, a query of `ANNT` the Ns can take take any form of `[ACGT]` and will match `ANNT`, `ACNT`, `ACCT`, `ACGT` ... and so forth.* an *empty value* is used in the case of insertions with the maximally trimmed, inserted sequence being indicated in `AlternateBases`.NOTE: Beacon instances may not support UIPAC codes and it is not mandatory for them to do so. In such cases the use of [ACGTN] is mandated. | string | NA | A, T, N, , ACG | NA| +| referenceBases | Reference bases for this variant (starting from `start`). * Accepted values: IUPAC codes for nucleotides (e.g. `https://www.bioinformatics.org/sms/iupac.html`). * N is a wildcard, that denotes the position of any base, and can be used as a standalone base of any type or within a partially known sequence.* an *empty value* is used in the case of insertions with the maximally trimmed, inserted sequence being indicated in `AlternateBases`. | string | NA | A, T, N, , ACG | NA| diff --git a/docs/schemas-md/obj/resolution.md b/docs/schemas-md/obj/resolution.md index c435266b9..302e9b009 100644 --- a/docs/schemas-md/obj/resolution.md +++ b/docs/schemas-md/obj/resolution.md @@ -1,3 +1,3 @@ |Term | Description | Type | Properties | Example | Enum| | ---| ---| ---| ---| ---| --- | -| resolution | NA | oneOf | [Age](./Age.md), [AgeRange](./AgeRange.md), [GestationalAge](./GestationalAge.md), [TimeInterval](./TimeInterval.md) | NA | NA| +| resolution | NA | oneOf | [Age](./Age.md), [AgeRange](./AgeRange_PXF.md), [GestationalAge](./GestationalAge.md), [TimeInterval](./TimeInterval.md) | NA | NA| diff --git a/docs/schemas-md/obj/unit.md b/docs/schemas-md/obj/unit.md index b7f641e54..be78d0896 100644 --- a/docs/schemas-md/obj/unit.md +++ b/docs/schemas-md/obj/unit.md @@ -1,3 +1,3 @@ |Term | Description | Type | Properties | Example | Enum| | ---| ---| ---| ---| ---| --- | -| unit | The kind of unit. Recommended from NCIT Unit of Category ontology term (NCIT:C42568) descendants | object | [id](./id.md), [label](./label.md) | `[{"id": "NCIT:C70575", "label": "Roentgen"}, {"id": "NCIT:C28252", "label": "Kilogram"}, {"id": "NCIT:C28253", "label": "Milligram"}]` | NA| +| unit | Unit of the exposure. Recommended from NCIT Unit of Category ontology term (NCIT:C42568) descendants. | object | [id](./id.md), [label](./label.md) | `[{"id": "NCIT:C70575", "label": "Roentgen"}, {"id": "NCIT:C28252", "label": "Kilogram"}, {"id": "NCIT:C28253", "label": "Milligram"}]` | NA| diff --git a/docs/schemas-md/obj/value.md b/docs/schemas-md/obj/value.md index 9f7ec9ebd..efbfedc5c 100644 --- a/docs/schemas-md/obj/value.md +++ b/docs/schemas-md/obj/value.md @@ -1,3 +1,3 @@ |Term | Description | Type | Properties | Example | Enum| | ---| ---| ---| ---| ---| --- | -| value | The value of the quantity in the units | number | NA | NA | NA| +| value | Quantification of the exposure. | number | NA | NA | NA| diff --git a/docs/schemas-md/obj/variantType.md b/docs/schemas-md/obj/variantType.md index e2f950ad2..6b3ab66be 100644 --- a/docs/schemas-md/obj/variantType.md +++ b/docs/schemas-md/obj/variantType.md @@ -1,3 +1,3 @@ |Term | Description | Type | Properties | Example | Enum| | ---| ---| ---| ---| ---| --- | -| variantType | The `variantType` declares the nature of the variation in relation to a reference. In a response, it is used to describe the variation. In a request, it is used to declare the type of event the Beacon client is looking for. If in queries variants can not be defined through a sequence of one or more bases (`precise` variants) it can be used standalone (i.e. without `alternateBases`) together with positional parameters. Examples here are e.g. queries for structural variants such as `DUP` (increased allelic count of material from the genomic region between `start` and `end` positions without assumption about the placement of the additional sequence) or `DEL` (deletion of sequence following `start`). Either `alternateBases` or `variantType` is required, with the exception of range queries (single `start` and `end` parameters). | string | NA | SNP, DEL, DUP, BND | NA| +| variantType | The `variantType` declares the nature of the variation in relation to a reference. In a response, it is used to describe the variation. Examples here are e.g. structural variants such as `DUP` (increased allelic count of material from the genomic region between `start` and `end` positions without assumption about the placement of the additional sequence) or `DEL` (deletion of sequence following `start`). Either `alternateBases` or `variantType` is required in representing a `LegacyVariation`. | string | NA | SNP, DEL, DUP, BND | NA| diff --git a/docs/variant-queries.md b/docs/variant-queries.md index b8827aa68..698ccc016 100644 --- a/docs/variant-queries.md +++ b/docs/variant-queries.md @@ -170,7 +170,10 @@ parameters. _GeneId Queries_ are in essence a variation of _Range Queries_ in which the coordinates are replaced by the [HGNC](https://www.genenames.org) gene symbol. It is left to the implementation if the matching is done on variants annotated for the gene symbol or if -a positional translation is being applied. +a positional translation is being applied. + +![Beacon Gene Query Schema](img/BeaconGeneQuery-graphics.png) + #### Parameters @@ -186,6 +189,36 @@ a positional translation is being applied. ?geneId=EIF4A1&variantMaxLength=1000000&variantType=DEL ``` +=== "Beacon v2 POST for `geneId` (deletion CNV)" + + ``` + { + "$schema":"https://raw.githubusercontent.com/ga4gh-beacon/beacon-v2/main/framework/json/requests/beaconRequestBody.json", + "meta": { + "apiVersion": "2.0", + "requestedSchemas": [ + { + "entityType": "genomicVariation", + "schema:": "https://raw.githubusercontent.com/ga4gh-beacon/beacon-v2/main/models/json/beacon-v2-default-model/genomicVariations/defaultSchema.json" + } + ] + }, + "query": { + "requestParameters": { + "g_variant": + "geneId": "EIF4A1", + "variantType": "EFO:0030067" + } + } + }, + "requestedGranularity": "record", + "pagination": { + "skip": 0, + "limit": 5 + } + } + ``` + ## Beacon _Bracket Queries_ @@ -208,6 +241,12 @@ differing in their exact base extents. Bracket queries require the use of **two** `start` and `end` parameters, in contrast to _Range Queries_. +!!! Attention "List Parameters in GET Requests" + + Since the direct interpretation of list parameters in queries is not supported by + some server environments (e.g. PHP, GO…), list parameters such as `start` and `end` + should be provided as **comma-concatenated** strings when using them in GET requests. + #### Example: CNV Query - _TP53_ Deletion Query by Coordinates @@ -230,12 +269,6 @@ larger than approx. 5Mb (operational definitions of focality vary between 1 and * `datasetIds=__some-dataset-ids__` * `filters` ... - !!! Attention "List Parameters in GET Requests" - - Since the direct interpretation of list parameters in queries is not supported by - some server environments (e.g. PHP, GO…), list parameters such as `start` and `end` - should be provided as **comma-concatenated** strings when using them in GET requests. - === "Beacon v2 POST" @@ -292,27 +325,27 @@ larger than approx. 5Mb (operational definitions of focality vary between 1 and ## Genomic Allele Query (Short Form) -==TBD== +When available variants can be identified through their genomic HGVS short form. === "Beacon v2 GET" ``` - ?allele=NM_004006.2:c.4375C>T + ?genomicAlleleShortForm=NM_004006.2:c.4375C>T ``` - ==to be completed== ## Aminoacid Change Query -==TBD== +Annotated variants can potentiallyqueried using the single amino acid replacement +format. The `aminoacidChange` parameter may be combined with e.g. a `geneId` to increase +specificity === "Beacon v2 GET" ``` - ?aminoacidChange=V600E + ?aminoacidChange=V600E&geneId=BRAF ``` - ==to be completed== ## `variantType` Parameter Interpretation diff --git a/mkdocs.yaml b/mkdocs.yaml index bd8f9501d..d2803ef51 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -22,9 +22,8 @@ nav: - Changes: changes-todo - Beacon News ↗: https://beacon-project.io/news/ - Using Beacons: - - Data Discovery: - - Genomic Queries: variant-queries - - Phenotypes and More...: /filters/#using-filters-in-queries + - Genomic Queries: variant-queries + - Phenotypes and More...: /filters/#using-filters-in-queries - Data Delivery: - Biosamples, Variants...: records - Data Handovers: handovers @@ -64,7 +63,7 @@ plugins: # author: Beacon API Development Team # output_path: pdf/beacon.pdf - macros - # - mermaid2 + - mermaid2 markdown_extensions: - toc: @@ -116,13 +115,11 @@ theme: - navigation.instant - navigation.tracking - navigation.sections - - toc.integrate + # - toc.integrate # Options extra: social: - - icon: fontawesome/brands/twitter - link: https://twitter.com/genomebeacons - icon: fontawesome/brands/github link: https://github.com/ga4gh-beacon