diff --git a/.build/build-rat.xml b/.build/build-rat.xml index d8268e4b8948..da9c13d9ab9b 100644 --- a/.build/build-rat.xml +++ b/.build/build-rat.xml @@ -53,6 +53,7 @@ + @@ -67,6 +68,8 @@ + + diff --git a/.gitignore b/.gitignore index 584ace1a0a93..9d9d4dc50d47 100644 --- a/.gitignore +++ b/.gitignore @@ -68,7 +68,9 @@ Thumbs.db .ant_targets # Generated files from the documentation -doc/source/configuration/cassandra_config_file.rst +doc/modules/cassandra/pages/configuration/cass_yaml_file.adoc +doc/modules/cassandra/pages/tools/nodetool/ +doc/modules/cassandra/examples/TEXT/NODETOOL/ # Python virtual environment venv/ diff --git a/build.xml b/build.xml index 23fe5b09430c..9b4b0868a2df 100644 --- a/build.xml +++ b/build.xml @@ -254,13 +254,14 @@ - + - + - - - + + + + diff --git a/doc/Makefile b/doc/Makefile index c6632a573c5c..43acc1ee7c5d 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -1,268 +1,26 @@ -# Makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -PAPER = -BUILDDIR = build - -# Internal variables. -PAPEROPT_a4 = -D latex_paper_size=a4 -PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source -# the i18n builder cannot share the environment and doctrees with the others -I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source - -YAML_DOC_INPUT=../conf/cassandra.yaml -YAML_DOC_OUTPUT=source/configuration/cassandra_config_file.rst - -MAKE_CASSANDRA_YAML = python convert_yaml_to_rst.py $(YAML_DOC_INPUT) $(YAML_DOC_OUTPUT) - -WEB_SITE_PRESENCE_FILE='source/.build_for_website' - -.PHONY: help -help: - @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " website to make HTML files for the Cassandra website" - @echo " dirhtml to make HTML files named index.html in directories" - @echo " singlehtml to make a single large HTML file" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " qthelp to make HTML files and a qthelp project" - @echo " applehelp to make an Apple Help Book" - @echo " devhelp to make HTML files and a Devhelp project" - @echo " epub to make an epub" - @echo " epub3 to make an epub3" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " latexpdf to make LaTeX files and run them through pdflatex" - @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" - @echo " text to make text files" - @echo " man to make manual pages" - @echo " texinfo to make Texinfo files" - @echo " info to make Texinfo files and run them through makeinfo" - @echo " gettext to make PO message catalogs" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " xml to make Docutils-native XML files" - @echo " pseudoxml to make pseudoxml-XML files for display purposes" - @echo " linkcheck to check all external links for integrity" - @echo " doctest to run all doctests embedded in the documentation (if enabled)" - @echo " coverage to run coverage check of the documentation (if enabled)" - @echo " dummy to check syntax errors of document sources" - -.PHONY: clean -clean: - rm -rf $(BUILDDIR)/* - rm -f $(YAML_DOC_OUTPUT) +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +GENERATE_NODETOOL_DOCS = ./scripts/gen-nodetool-docs.py +MAKE_CASSANDRA_YAML = ./scripts/convert_yaml_to_adoc.py ../conf/cassandra.yaml ./modules/cassandra/pages/configuration/cass_yaml_file.adoc .PHONY: html html: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - -.PHONY: website -website: clean - @touch $(WEB_SITE_PRESENCE_FILE) - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - @rm $(WEB_SITE_PRESENCE_FILE) - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - -.PHONY: dirhtml -dirhtml: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." - -.PHONY: singlehtml -singlehtml: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml - @echo - @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." - -.PHONY: pickle -pickle: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle - @echo - @echo "Build finished; now you can process the pickle files." - -.PHONY: json -json: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json - @echo - @echo "Build finished; now you can process the JSON files." - -.PHONY: htmlhelp -htmlhelp: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp - @echo - @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in $(BUILDDIR)/htmlhelp." - -.PHONY: qthelp -qthelp: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp - @echo - @echo "Build finished; now you can run "qcollectiongenerator" with the" \ - ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/ApacheCassandraDocumentation.qhcp" - @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/ApacheCassandraDocumentation.qhc" - -.PHONY: applehelp -applehelp: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp - @echo - @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." - @echo "N.B. You won't be able to view it unless you put it in" \ - "~/Library/Documentation/Help or install it in your application" \ - "bundle." - -.PHONY: devhelp -devhelp: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp - @echo - @echo "Build finished." - @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/ApacheCassandraDocumentation" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/ApacheCassandraDocumentation" - @echo "# devhelp" - -.PHONY: epub -epub: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub - @echo - @echo "Build finished. The epub file is in $(BUILDDIR)/epub." - -.PHONY: epub3 -epub3: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 - @echo - @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." - -.PHONY: latex -latex: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo - @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." - @echo "Run \`make' in that directory to run these through (pdf)latex" \ - "(use \`make latexpdf' here to do that automatically)." - -.PHONY: latexpdf -latexpdf: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through pdflatex..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -.PHONY: latexpdfja -latexpdfja: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through platex and dvipdfmx..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -.PHONY: text -text: - $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text - @echo - @echo "Build finished. The text files are in $(BUILDDIR)/text." - -.PHONY: man -man: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man - @echo - @echo "Build finished. The manual pages are in $(BUILDDIR)/man." - -.PHONY: texinfo -texinfo: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo - @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." - @echo "Run \`make' in that directory to run these through makeinfo" \ - "(use \`make info' here to do that automatically)." - -.PHONY: info -info: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo "Running Texinfo files through makeinfo..." - make -C $(BUILDDIR)/texinfo info - @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." - -.PHONY: gettext -gettext: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale - @echo - @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." - -.PHONY: changes -changes: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes - @echo - @echo "The overview file is in $(BUILDDIR)/changes." - -.PHONY: linkcheck -linkcheck: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in $(BUILDDIR)/linkcheck/output.txt." - -.PHONY: doctest -doctest: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest - @echo "Testing of doctests in the sources finished, look at the " \ - "results in $(BUILDDIR)/doctest/output.txt." - -.PHONY: coverage -coverage: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage - @echo "Testing of coverage in the sources finished, look at the " \ - "results in $(BUILDDIR)/coverage/python.txt." - -.PHONY: xml -xml: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml - @echo - @echo "Build finished. The XML files are in $(BUILDDIR)/xml." - -.PHONY: pseudoxml -pseudoxml: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml - @echo - @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." - -.PHONY: dummy -dummy: - $(MAKE_CASSANDRA_YAML) - $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy - @echo - @echo "Build finished. Dummy builder generates no files." + @# hack until a local basic antora build is put in + +.PHONY: gen-asciidoc +gen-asciidoc: + @mkdir -p modules/cassandra/pages/tools/nodetool + @mkdir -p modules/cassandra/examples/TEXT/NODETOOL + python3 $(GENERATE_NODETOOL_DOCS) + python3 $(MAKE_CASSANDRA_YAML) diff --git a/doc/README.md b/doc/README.md index 931db7d8b6d7..608d236cb75b 100644 --- a/doc/README.md +++ b/doc/README.md @@ -23,29 +23,39 @@ Apache Cassandra documentation directory This directory contains the documentation maintained in-tree for Apache Cassandra. This directory contains the following documents: -- The source of the official Cassandra documentation, in the `source/` +- The source of the official Cassandra documentation, in the `source/modules` subdirectory. See below for more details on how to edit/build that documentation. - The specification(s) for the supported versions of native transport protocol. -- Additional documentation on the SASI implementation (`SASI.md`). TODO: we - should probably move the first half of that documentation to the general - documentation, and the implementation explanation parts into the wiki. Official documentation ---------------------- The source for the official documentation for Apache Cassandra can be found in -the `source` subdirectory. The documentation uses [sphinx](http://www.sphinx-doc.org/) -and is thus written in [reStructuredText](http://docutils.sourceforge.net/rst.html). +the `modules/cassandra/pages` subdirectory. The documentation uses [antora](http://www.antora.org/) +and is thus written in [asciidoc](http://asciidoc.org). -To build the HTML documentation, you will need to first install sphinx and the -[sphinx ReadTheDocs theme](the https://pypi.python.org/pypi/sphinx_rtd_theme), which -on unix you can do with: +To generate the asciidoc files for cassandra.yaml and the nodetool commands, run (from project root): +```bash +ant gen-asciidoc ``` -pip install sphinx sphinx_rtd_theme +or (from this directory): + +```bash +make gen-asciidoc +``` + + +(The following has not yet been implemented, for now see the build instructions in the [cassandra-website](https://github.com/apache/cassandra-website) repo.) +To build the documentation, run (from project root): + +```bash +ant gen-doc +``` +or (from this directory): + +```bash +make html ``` -The documentation can then be built from this directory by calling `make html` -(or `make.bat html` on windows). Alternatively, the top-level `ant gen-doc` -target can be used. diff --git a/doc/SASI.md b/doc/SASI.md deleted file mode 100644 index a2fa7176286a..000000000000 --- a/doc/SASI.md +++ /dev/null @@ -1,798 +0,0 @@ - - -# SASIIndex - -[`SASIIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/SASIIndex.java), -or "SASI" for short, is an implementation of Cassandra's -`Index` interface that can be used as an alternative to the -existing implementations. SASI's indexing and querying improves on -existing implementations by tailoring it specifically to Cassandra's -needs. SASI has superior performance in cases where queries would -previously require filtering. In achieving this performance, SASI aims -to be significantly less resource intensive than existing -implementations, in memory, disk, and CPU usage. In addition, SASI -supports prefix and contains queries on strings (similar to SQL's -`LIKE = "foo*"` or `LIKE = "*foo*"'`). - -The following goes on describe how to get up and running with SASI, -demonstrates usage with examples, and provides some details on its -implementation. - -## Using SASI - -The examples below walk through creating a table and indexes on its -columns, and performing queries on some inserted data. The patchset in -this repository includes support for the Thrift and CQL3 interfaces. - -The examples below assume the `demo` keyspace has been created and is -in use. - -``` -cqlsh> CREATE KEYSPACE demo WITH replication = { - ... 'class': 'SimpleStrategy', - ... 'replication_factor': '1' - ... }; -cqlsh> USE demo; -``` - -All examples are performed on the `sasi` table: - -``` -cqlsh:demo> CREATE TABLE sasi (id uuid, first_name text, last_name text, - ... age int, height int, created_at bigint, primary key (id)); -``` - -#### Creating Indexes - -To create SASI indexes use CQLs `CREATE CUSTOM INDEX` statement: - -``` -cqlsh:demo> CREATE CUSTOM INDEX ON sasi (first_name) USING 'org.apache.cassandra.index.sasi.SASIIndex' - ... WITH OPTIONS = { - ... 'analyzer_class': - ... 'org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer', - ... 'case_sensitive': 'false' - ... }; - -cqlsh:demo> CREATE CUSTOM INDEX ON sasi (last_name) USING 'org.apache.cassandra.index.sasi.SASIIndex' - ... WITH OPTIONS = {'mode': 'CONTAINS'}; - -cqlsh:demo> CREATE CUSTOM INDEX ON sasi (age) USING 'org.apache.cassandra.index.sasi.SASIIndex'; - -cqlsh:demo> CREATE CUSTOM INDEX ON sasi (created_at) USING 'org.apache.cassandra.index.sasi.SASIIndex' - ... WITH OPTIONS = {'mode': 'SPARSE'}; -``` - -The indexes created have some options specified that customize their -behaviour and potentially performance. The index on `first_name` is -case-insensitive. The analyzers are discussed more in a subsequent -example. The `NonTokenizingAnalyzer` performs no analysis on the -text. Each index has a mode: `PREFIX`, `CONTAINS`, or `SPARSE`, the -first being the default. The `last_name` index is created with the -mode `CONTAINS` which matches terms on suffixes instead of prefix -only. Examples of this are available below and more detail can be -found in the section on -[OnDiskIndex](#ondiskindexbuilder).The -`created_at` column is created with its mode set to `SPARSE`, which is -meant to improve performance of querying large, dense number ranges -like timestamps for data inserted every millisecond. Details of the -`SPARSE` implementation can also be found in the section on the -[OnDiskIndex](#ondiskindexbuilder). The `age` -index is created with the default `PREFIX` mode and no -case-sensitivity or text analysis options are specified since the -field is numeric. - -After inserting the following data and performing a `nodetool flush`, -SASI performing index flushes to disk can be seen in Cassandra's logs --- although the direct call to flush is not required (see -[IndexMemtable](#indexmemtable) for more details). - -``` -cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) - ... VALUES (556ebd54-cbe5-4b75-9aae-bf2a31a24500, 'Pavel', 'Yaskevich', 27, 181, 1442959315018); - -cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) - ... VALUES (5770382a-c56f-4f3f-b755-450e24d55217, 'Jordan', 'West', 26, 173, 1442959315019); - -cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) - ... VALUES (96053844-45c3-4f15-b1b7-b02c441d3ee1, 'Mikhail', 'Stepura', 36, 173, 1442959315020); - -cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) - ... VALUES (f5dfcabe-de96-4148-9b80-a1c41ed276b4, 'Michael', 'Kjellman', 26, 180, 1442959315021); - -cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) - ... VALUES (2970da43-e070-41a8-8bcb-35df7a0e608a, 'Johnny', 'Zhang', 32, 175, 1442959315022); - -cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) - ... VALUES (6b757016-631d-4fdb-ac62-40b127ccfbc7, 'Jason', 'Brown', 40, 182, 1442959315023); - -cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) - ... VALUES (8f909e8a-008e-49dd-8d43-1b0df348ed44, 'Vijay', 'Parthasarathy', 34, 183, 1442959315024); - -cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi; - - first_name | last_name | age | height | created_at -------------+---------------+-----+--------+--------------- - Michael | Kjellman | 26 | 180 | 1442959315021 - Mikhail | Stepura | 36 | 173 | 1442959315020 - Jason | Brown | 40 | 182 | 1442959315023 - Pavel | Yaskevich | 27 | 181 | 1442959315018 - Vijay | Parthasarathy | 34 | 183 | 1442959315024 - Jordan | West | 26 | 173 | 1442959315019 - Johnny | Zhang | 32 | 175 | 1442959315022 - -(7 rows) -``` - -#### Equality & Prefix Queries - -SASI supports all queries already supported by CQL, including LIKE statement -for PREFIX, CONTAINS and SUFFIX searches. - -``` -cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi - ... WHERE first_name = 'Pavel'; - - first_name | last_name | age | height | created_at --------------+-----------+-----+--------+--------------- - Pavel | Yaskevich | 27 | 181 | 1442959315018 - -(1 rows) -``` - -``` -cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi - ... WHERE first_name = 'pavel'; - - first_name | last_name | age | height | created_at --------------+-----------+-----+--------+--------------- - Pavel | Yaskevich | 27 | 181 | 1442959315018 - -(1 rows) -``` - -``` -cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi - ... WHERE first_name LIKE 'M%'; - - first_name | last_name | age | height | created_at -------------+-----------+-----+--------+--------------- - Michael | Kjellman | 26 | 180 | 1442959315021 - Mikhail | Stepura | 36 | 173 | 1442959315020 - -(2 rows) -``` - -Of course, the case of the query does not matter for the `first_name` -column because of the options provided at index creation time. - -``` -cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi - ... WHERE first_name LIKE 'm%'; - - first_name | last_name | age | height | created_at -------------+-----------+-----+--------+--------------- - Michael | Kjellman | 26 | 180 | 1442959315021 - Mikhail | Stepura | 36 | 173 | 1442959315020 - -(2 rows) -``` - -#### Compound Queries - -SASI supports queries with multiple predicates, however, due to the -nature of the default indexing implementation, CQL requires the user -to specify `ALLOW FILTERING` to opt-in to the potential performance -pitfalls of such a query. With SASI, while the requirement to include -`ALLOW FILTERING` remains, to reduce modifications to the grammar, the -performance pitfalls do not exist because filtering is not -performed. Details on how SASI joins data from multiple predicates is -available below in the -[Implementation Details](#implementation-details) -section. - -``` -cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi - ... WHERE first_name LIKE 'M%' and age < 30 ALLOW FILTERING; - - first_name | last_name | age | height | created_at -------------+-----------+-----+--------+--------------- - Michael | Kjellman | 26 | 180 | 1442959315021 - -(1 rows) -``` - -#### Suffix Queries - -The next example demonstrates `CONTAINS` mode on the `last_name` -column. By using this mode predicates can search for any strings -containing the search string as a sub-string. In this case the strings -containing "a" or "an". - -``` -cqlsh:demo> SELECT * FROM sasi WHERE last_name LIKE '%a%'; - - id | age | created_at | first_name | height | last_name ---------------------------------------+-----+---------------+------------+--------+--------------- - f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | 1442959315021 | Michael | 180 | Kjellman - 96053844-45c3-4f15-b1b7-b02c441d3ee1 | 36 | 1442959315020 | Mikhail | 173 | Stepura - 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | 1442959315018 | Pavel | 181 | Yaskevich - 8f909e8a-008e-49dd-8d43-1b0df348ed44 | 34 | 1442959315024 | Vijay | 183 | Parthasarathy - 2970da43-e070-41a8-8bcb-35df7a0e608a | 32 | 1442959315022 | Johnny | 175 | Zhang - -(5 rows) - -cqlsh:demo> SELECT * FROM sasi WHERE last_name LIKE '%an%'; - - id | age | created_at | first_name | height | last_name ---------------------------------------+-----+---------------+------------+--------+----------- - f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | 1442959315021 | Michael | 180 | Kjellman - 2970da43-e070-41a8-8bcb-35df7a0e608a | 32 | 1442959315022 | Johnny | 175 | Zhang - -(2 rows) -``` - -#### Expressions on Non-Indexed Columns - -SASI also supports filtering on non-indexed columns like `height`. The -expression can only narrow down an existing query using `AND`. - -``` -cqlsh:demo> SELECT * FROM sasi WHERE last_name LIKE '%a%' AND height >= 175 ALLOW FILTERING; - - id | age | created_at | first_name | height | last_name ---------------------------------------+-----+---------------+------------+--------+--------------- - f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | 1442959315021 | Michael | 180 | Kjellman - 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | 1442959315018 | Pavel | 181 | Yaskevich - 8f909e8a-008e-49dd-8d43-1b0df348ed44 | 34 | 1442959315024 | Vijay | 183 | Parthasarathy - 2970da43-e070-41a8-8bcb-35df7a0e608a | 32 | 1442959315022 | Johnny | 175 | Zhang - -(4 rows) -``` - -#### Text Analysis (Tokenization and Stemming) - -Lastly, to demonstrate text analysis an additional column is needed on -the table. Its definition, index, and statements to update rows are shown below. - -``` -cqlsh:demo> ALTER TABLE sasi ADD bio text; -cqlsh:demo> CREATE CUSTOM INDEX ON sasi (bio) USING 'org.apache.cassandra.index.sasi.SASIIndex' - ... WITH OPTIONS = { - ... 'analyzer_class': 'org.apache.cassandra.index.sasi.analyzer.StandardAnalyzer', - ... 'tokenization_enable_stemming': 'true', - ... 'analyzed': 'true', - ... 'tokenization_normalize_lowercase': 'true', - ... 'tokenization_locale': 'en' - ... }; -cqlsh:demo> UPDATE sasi SET bio = 'Software Engineer, who likes distributed systems, doesnt like to argue.' WHERE id = 5770382a-c56f-4f3f-b755-450e24d55217; -cqlsh:demo> UPDATE sasi SET bio = 'Software Engineer, works on the freight distribution at nights and likes arguing' WHERE id = 556ebd54-cbe5-4b75-9aae-bf2a31a24500; -cqlsh:demo> SELECT * FROM sasi; - - id | age | bio | created_at | first_name | height | last_name ---------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+--------------- - f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | null | 1442959315021 | Michael | 180 | Kjellman - 96053844-45c3-4f15-b1b7-b02c441d3ee1 | 36 | null | 1442959315020 | Mikhail | 173 | Stepura - 6b757016-631d-4fdb-ac62-40b127ccfbc7 | 40 | null | 1442959315023 | Jason | 182 | Brown - 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich - 8f909e8a-008e-49dd-8d43-1b0df348ed44 | 34 | null | 1442959315024 | Vijay | 183 | Parthasarathy - 5770382a-c56f-4f3f-b755-450e24d55217 | 26 | Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 | Jordan | 173 | West - 2970da43-e070-41a8-8bcb-35df7a0e608a | 32 | null | 1442959315022 | Johnny | 175 | Zhang - -(7 rows) -``` - -Index terms and query search strings are stemmed for the `bio` column -because it was configured to use the -[`StandardAnalyzer`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java) -and `analyzed` is set to `true`. The -`tokenization_normalize_lowercase` is similar to the `case_sensitive` -property but for the -[`StandardAnalyzer`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java). These -query demonstrates the stemming applied by [`StandardAnalyzer`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java). - -``` -cqlsh:demo> SELECT * FROM sasi WHERE bio LIKE 'distributing'; - - id | age | bio | created_at | first_name | height | last_name ---------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+----------- - 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich - 5770382a-c56f-4f3f-b755-450e24d55217 | 26 | Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 | Jordan | 173 | West - -(2 rows) - -cqlsh:demo> SELECT * FROM sasi WHERE bio LIKE 'they argued'; - - id | age | bio | created_at | first_name | height | last_name ---------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+----------- - 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich - 5770382a-c56f-4f3f-b755-450e24d55217 | 26 | Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 | Jordan | 173 | West - -(2 rows) - -cqlsh:demo> SELECT * FROM sasi WHERE bio LIKE 'working at the company'; - - id | age | bio | created_at | first_name | height | last_name ---------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+----------- - 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich - -(1 rows) - -cqlsh:demo> SELECT * FROM sasi WHERE bio LIKE 'soft eng'; - - id | age | bio | created_at | first_name | height | last_name ---------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+----------- - 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich - 5770382a-c56f-4f3f-b755-450e24d55217 | 26 | Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 | Jordan | 173 | West - -(2 rows) -``` - -## Implementation Details - -While SASI, at the surface, is simply an implementation of the -`Index` interface, at its core there are several data -structures and algorithms used to satisfy it. These are described -here. Additionally, the changes internal to Cassandra to support SASIs -integration are described. - -The `Index` interface divides responsibility of the -implementer into two parts: Indexing and Querying. Further, Cassandra -makes it possible to divide those responsibilities into the memory and -disk components. SASI takes advantage of Cassandra's write-once, -immutable, ordered data model to build indexes along with the flushing -of the memtable to disk -- this is the origin of the name "SSTable -Attached Secondary Index". - -The SASI index data structures are built in memory as the SSTable is -being written and they are flushed to disk before the writing of the -SSTable completes. The writing of each index file only requires -sequential writes to disk. In some cases, partial flushes are -performed, and later stitched back together, to reduce memory -usage. These data structures are optimized for this use case. - -Taking advantage of Cassandra's ordered data model, at query time, -candidate indexes are narrowed down for searching minimize the amount -of work done. Searching is then performed using an efficient method -that streams data off disk as needed. - -### Indexing - -Per SSTable, SASI writes an index file for each indexed column. The -data for these files is built in memory using the -[`OnDiskIndexBuilder`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java). Once -flushed to disk, the data is read using the -[`OnDiskIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java) -class. These are composed of bytes representing indexed terms, -organized for efficient writing or searching respectively. The keys -and values they hold represent tokens and positions in an SSTable and -these are stored per-indexed term in -[`TokenTreeBuilder`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTreeBuilder.java)s -for writing, and -[`TokenTree`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java)s -for querying. These index files are memory mapped after being written -to disk, for quicker access. For indexing data in the memtable SASI -uses its -[`IndexMemtable`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/IndexMemtable.java) -class. - -#### OnDiskIndex(Builder) - -Each -[`OnDiskIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java) -is an instance of a modified -[Suffix Array](https://en.wikipedia.org/wiki/Suffix_array) data -structure. The -[`OnDiskIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java) -is comprised of page-size blocks of sorted terms and pointers to the -terms' associated data, as well as the data itself, stored also in one -or more page-sized blocks. The -[`OnDiskIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java) -is structured as a tree of arrays, where each level describes the -terms in the level below, the final level being the terms -themselves. The `PointerLevel`s and their `PointerBlock`s contain -terms and pointers to other blocks that *end* with those terms. The -`DataLevel`, the final level, and its `DataBlock`s contain terms and -point to the data itself, contained in [`TokenTree`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java)s. - -The terms written to the -[`OnDiskIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java) -vary depending on its "mode": either `PREFIX`, `CONTAINS`, or -`SPARSE`. In the `PREFIX` and `SPARSE` cases terms exact values are -written exactly once per `OnDiskIndex`. For example, a `PREFIX` index -with terms `Jason`, `Jordan`, `Pavel`, all three will be included in -the index. A `CONTAINS` index writes additional terms for each suffix of -each term recursively. Continuing with the example, a `CONTAINS` index -storing the previous terms would also store `ason`, `ordan`, `avel`, -`son`, `rdan`, `vel`, etc. This allows for queries on the suffix of -strings. The `SPARSE` mode differs from `PREFIX` in that for every 64 -blocks of terms a -[`TokenTree`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java) -is built merging all the -[`TokenTree`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java)s -for each term into a single one. This copy of the data is used for -efficient iteration of large ranges of e.g. timestamps. The index -"mode" is configurable per column at index creation time. - -#### TokenTree(Builder) - -The -[`TokenTree`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java) -is an implementation of the well-known -[B+-tree](https://en.wikipedia.org/wiki/B%2B_tree) that has been -modified to optimize for its use-case. In particular, it has been -optimized to associate tokens, longs, with a set of positions in an -SSTable, also longs. Allowing the set of long values accommodates -the possibility of a hash collision in the token, but the data -structure is optimized for the unlikely possibility of such a -collision. - -To optimize for its write-once environment the -[`TokenTreeBuilder`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTreeBuilder.java) -completely loads its interior nodes as the tree is built and it uses -the well-known algorithm optimized for bulk-loading the data -structure. - -[`TokenTree`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java)s provide the means to iterate a tokens, and file -positions, that match a given term, and to skip forward in that -iteration, an operation used heavily at query time. - -#### IndexMemtable - -The -[`IndexMemtable`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/IndexMemtable.java) -handles indexing the in-memory data held in the memtable. The -[`IndexMemtable`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/IndexMemtable.java) -in turn manages either a -[`TrieMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java) -or a -[`SkipListMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java) -per-column. The choice of which index type is used is data -dependent. The -[`TrieMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java) -is used for literal types. `AsciiType` and `UTF8Type` are literal -types by defualt but any column can be configured as a literal type -using the `is_literal` option at index creation time. For non-literal -types the -[`SkipListMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java) -is used. The -[`TrieMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java) -is an implementation that can efficiently support prefix queries on -character-like data. The -[`SkipListMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java), -conversely, is better suited for Cassandra other data types like -numbers. - -The -[`TrieMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java) -is built using either the `ConcurrentRadixTree` or -`ConcurrentSuffixTree` from the `com.goooglecode.concurrenttrees` -package. The choice between the two is made based on the indexing -mode, `PREFIX` or other modes, and `CONTAINS` mode, respectively. - -The -[`SkipListMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java) -is built on top of `java.util.concurrent.ConcurrentSkipListSet`. - -### Querying - -Responsible for converting the internal `IndexExpression` -representation into SASI's -[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java) -and -[`Expression`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java) -tree, optimizing the tree to reduce the amount of work done, and -driving the query itself the -[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) -is the work horse of SASI's querying implementation. To efficiently -perform union and intersection operations SASI provides several -iterators similar to Cassandra's `MergeIterator` but tailored -specifically for SASIs use, and with more features. The -[`RangeUnionIterator`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java), -like its name suggests, performs set union over sets of tokens/keys -matching the query, only reading as much data as it needs from each -set to satisfy the query. The -[`RangeIntersectionIterator`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java), -similar to its counterpart, performs set intersection over its data. - -#### QueryPlan - -The -[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) -instantiated per search query is at the core of SASIs querying -implementation. Its work can be divided in two stages: analysis and -execution. - -During the analysis phase, -[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) -converts from Cassandra's internal representation of -`IndexExpression`s, which has also been modified to support encoding -queries that contain ORs and groupings of expressions using -parentheses (see the -[Cassandra Internal Changes](#cassandra-internal-changes) -section below for more details). This process produces a tree of -[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java)s, which in turn may contain [`Expression`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java)s, all of which -provide an alternative, more efficient, representation of the query. - -During execution the -[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) -uses the `DecoratedKey`-generating iterator created from the -[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java) tree. These keys are read from disk and a final check to -ensure they satisfy the query is made, once again using the -[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java) tree. At the point the desired amount of matching data has -been found, or there is no more matching data, the result set is -returned to the coordinator through the existing internal components. - -The number of queries (total/failed/timed-out), and their latencies, -are maintined per-table/column family. - -SASI also supports concurrently iterating terms for the same index -accross SSTables. The concurrency factor is controlled by the -`cassandra.search_concurrency_factor` system property. The default is -`1`. - -##### QueryController - -Each -[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) -references a -[`QueryController`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java) -used throughout the execution phase. The -[`QueryController`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java) -has two responsibilities: to manage and ensure the proper cleanup of -resources (indexes), and to strictly enforce the time bound for query, -specified by the user via the range slice timeout. All indexes are -accessed via the -[`QueryController`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java) -so that they can be safely released by it later. The -[`QueryController`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java)'s -`checkpoint` function is called in specific places in the execution -path to ensure the time-bound is enforced. - -##### QueryPlan Optimizations - -While in the analysis phase, the -[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) -performs several potential optimizations to the query. The goal of -these optimizations is to reduce the amount of work performed during -the execution phase. - -The simplest optimization performed is compacting multiple expressions -joined by logical intersection (`AND`) into a single [`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java) with -three or more [`Expression`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java)s. For example, the query `WHERE age < 100 AND -fname = 'p*' AND first_name != 'pa*' AND age > 21` would, -without modification, have the following tree: - - ┌───────┐ - ┌────────│ AND │──────┐ - │ └───────┘ │ - ▼ ▼ - ┌───────┐ ┌──────────┐ - ┌─────│ AND │─────┐ │age < 100 │ - │ └───────┘ │ └──────────┘ - ▼ ▼ - ┌──────────┐ ┌───────┐ - │ fname=p* │ ┌─│ AND │───┐ - └──────────┘ │ └───────┘ │ - ▼ ▼ - ┌──────────┐ ┌──────────┐ - │fname!=pa*│ │ age > 21 │ - └──────────┘ └──────────┘ - -[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) -will remove the redundant right branch whose root is the final `AND` -and has leaves `fname != pa*` and `age > 21`. These [`Expression`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java)s will -be compacted into the parent `AND`, a safe operation due to `AND` -being associative and commutative. The resulting tree looks like the -following: - - ┌───────┐ - ┌────────│ AND │──────┐ - │ └───────┘ │ - ▼ ▼ - ┌───────┐ ┌──────────┐ - ┌───────────│ AND │────────┐ │age < 100 │ - │ └───────┘ │ └──────────┘ - ▼ │ ▼ - ┌──────────┐ │ ┌──────────┐ - │ fname=p* │ ▼ │ age > 21 │ - └──────────┘ ┌──────────┐ └──────────┘ - │fname!=pa*│ - └──────────┘ - -When excluding results from the result set, using `!=`, the -[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) -determines the best method for handling it. For range queries, for -example, it may be optimal to divide the range into multiple parts -with a hole for the exclusion. For string queries, such as this one, -it is more optimal, however, to simply note which data to skip, or -exclude, while scanning the index. Following this optimization the -tree looks like this: - - ┌───────┐ - ┌────────│ AND │──────┐ - │ └───────┘ │ - ▼ ▼ - ┌───────┐ ┌──────────┐ - ┌───────│ AND │────────┐ │age < 100 │ - │ └───────┘ │ └──────────┘ - ▼ ▼ - ┌──────────────────┐ ┌──────────┐ - │ fname=p* │ │ age > 21 │ - │ exclusions=[pa*] │ └──────────┘ - └──────────────────┘ - -The last type of optimization applied, for this query, is to merge -range expressions across branches of the tree -- without modifying the -meaning of the query, of course. In this case, because the query -contains all `AND`s the `age` expressions can be collapsed. Along with -this optimization, the initial collapsing of unneeded `AND`s can also -be applied once more to result in this final tree using to execute the -query: - - ┌───────┐ - ┌──────│ AND │───────┐ - │ └───────┘ │ - ▼ ▼ - ┌──────────────────┐ ┌────────────────┐ - │ fname=p* │ │ 21 < age < 100 │ - │ exclusions=[pa*] │ └────────────────┘ - └──────────────────┘ - -#### Operations and Expressions - -As discussed, the -[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) -optimizes a tree represented by -[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java)s -as interior nodes, and -[`Expression`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java)s -as leaves. The -[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java) -class, more specifically, can have zero, one, or two -[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java)s -as children and an unlimited number of expressions. The iterators used -to perform the queries, discussed below in the -"Range(Union|Intersection)Iterator" section, implement the necessary -logic to merge results transparently regardless of the -[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java)s -children. - -Besides participating in the optimizations performed by the -[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java), -[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java) -is also responsible for taking a row that has been returned by the -query and making a final validation that it in fact does match. This -`satisfiesBy` operation is performed recursively from the root of the -[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java) -tree for a given query. These checks are performed directly on the -data in a given row. For more details on how `satisfiesBy` works see -the documentation -[in the code](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java#L87-L123). - -#### Range(Union|Intersection)Iterator - -The abstract `RangeIterator` class provides a unified interface over -the two main operations performed by SASI at various layers in the -execution path: set intersection and union. These operations are -performed in a iterated, or "streaming", fashion to prevent unneeded -reads of elements from either set. In both the intersection and union -cases the algorithms take advantage of the data being pre-sorted using -the same sort order, e.g. term or token order. - -The -[`RangeUnionIterator`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java) -performs the "Merge-Join" portion of the -[Sort-Merge-Join](https://en.wikipedia.org/wiki/Sort-merge_join) -algorithm, with the properties of an outer-join, or union. It is -implemented with several optimizations to improve its performance over -a large number of iterators -- sets to union. Specifically, the -iterator exploits the likely case of the data having many sub-groups -of overlapping ranges and the unlikely case that all ranges will -overlap each other. For more details see the -[javadoc](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java#L9-L21). - -The -[`RangeIntersectionIterator`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java) -itself is not a subclass of `RangeIterator`. It is a container for -several classes, one of which, `AbstractIntersectionIterator`, -sub-classes `RangeIterator`. SASI supports two methods of performing -the intersection operation, and the ability to be adaptive in choosing -between them based on some properties of the data. - -`BounceIntersectionIterator`, and the `BOUNCE` strategy, works like -the -[`RangeUnionIterator`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java) -in that it performs a "Merge-Join", however, its nature is similar to -a inner-join, where like values are merged by a data-specific merge -function (e.g. merging two tokens in a list to lookup in a SSTable -later). See the -[javadoc](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java#L88-L101) -for more details on its implementation. - -`LookupIntersectionIterator`, and the `LOOKUP` strategy, performs a -different operation, more similar to a lookup in an associative data -structure, or "hash lookup" in database terminology. Once again, -details on the implementation can be found in the -[javadoc](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java#L199-L208). - -The choice between the two iterators, or the `ADAPTIVE` strategy, is -based upon the ratio of data set sizes of the minimum and maximum -range of the sets being intersected. If the number of the elements in -minimum range divided by the number of elements is the maximum range -is less than or equal to `0.01`, then the `ADAPTIVE` strategy chooses -the `LookupIntersectionIterator`, otherwise the -`BounceIntersectionIterator` is chosen. - -### The SASIIndex Class - -The above components are glued together by the -[`SASIIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/SASIIndex.java) -class which implements `Index`, and is instantiated -per-table containing SASI indexes. It manages all indexes for a table -via the -[`sasi.conf.DataTracker`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/conf/DataTracker.java) -and -[`sasi.conf.view.View`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/conf/view/View.java) -components, controls writing of all indexes for an SSTable via its -[`PerSSTableIndexWriter`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/PerSSTableIndexWriter.java), and initiates searches with -`Searcher`. These classes glue the previously -mentioned indexing components together with Cassandra's SSTable -life-cycle ensuring indexes are not only written when Memtable's flush -but also as SSTable's are compacted. For querying, the -`Searcher` does little but defer to -[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) -and update e.g. latency metrics exposed by SASI. - -### Cassandra Internal Changes - -To support the above changes and integrate them into Cassandra a few -minor internal changes were made to Cassandra itself. These are -described here. - -#### SSTable Write Life-cycle Notifications - -The `SSTableFlushObserver` is an observer pattern-like interface, -whose sub-classes can register to be notified about events in the -life-cycle of writing out a SSTable. Sub-classes can be notified when a -flush begins and ends, as well as when each next row is about to be -written, and each next column. SASI's `PerSSTableIndexWriter`, -discussed above, is the only current subclass. - -### Limitations and Caveats - -The following are items that can be addressed in future updates but are not -available in this repository or are not currently implemented. - -* The cluster must be configured to use a partitioner that produces - `LongToken`s, e.g. `Murmur3Partitioner`. Other existing partitioners which - don't produce LongToken e.g. `ByteOrderedPartitioner` and `RandomPartitioner` - will not work with SASI. -* Not Equals and OR support have been removed in this release while - changes are made to Cassandra itself to support them. - -### Contributors - -* [Pavel Yaskevich](https://github.com/xedin) -* [Jordan West](https://github.com/jrwest) -* [Michael Kjellman](https://github.com/mkjellman) -* [Jason Brown](https://github.com/jasobrown) -* [Mikhail Stepura](https://github.com/mishail) diff --git a/doc/antora.yml b/doc/antora.yml new file mode 100644 index 000000000000..4ce17d5f6fd8 --- /dev/null +++ b/doc/antora.yml @@ -0,0 +1,18 @@ +name: Cassandra +title: Cassandra +version: '3.11' +display_version: '3.11' +asciidoc: + attributes: + sectanchors: '' + sectlinks: '' + cass_url: 'http://cassandra.apache.org/' + cass-docker-tag-3x: latest + cass-tag-3x: '3.11' + 311_version: '3.11.10' + 30_version: '3.0.24' + 22_version: '2.2.19' + 21_version: '2.1.22' +nav: +- modules/ROOT/nav.adoc +- modules/cassandra/nav.adoc diff --git a/doc/make.bat b/doc/make.bat deleted file mode 100644 index cbd1d1dbbced..000000000000 --- a/doc/make.bat +++ /dev/null @@ -1,299 +0,0 @@ -@ECHO OFF - -REM -REM Licensed to the Apache Software Foundation (ASF) under one -REM or more contributor license agreements. See the NOTICE file -REM distributed with this work for additional information -REM regarding copyright ownership. The ASF licenses this file -REM to you under the Apache License, Version 2.0 (the -REM "License"); you may not use this file except in compliance -REM with the License. You may obtain a copy of the License at -REM -REM http://www.apache.org/licenses/LICENSE-2.0 -REM -REM Unless required by applicable law or agreed to in writing, software -REM distributed under the License is distributed on an "AS IS" BASIS, -REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -REM See the License for the specific language governing permissions and -REM limitations under the License. -REM - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set BUILDDIR=build -set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . -set I18NSPHINXOPTS=%SPHINXOPTS% . -if NOT "%PAPER%" == "" ( - set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% - set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% -) - -if "%1" == "" goto help - -if "%1" == "help" ( - :help - echo.Please use `make ^` where ^ is one of - echo. html to make standalone HTML files - echo. dirhtml to make HTML files named index.html in directories - echo. singlehtml to make a single large HTML file - echo. pickle to make pickle files - echo. json to make JSON files - echo. htmlhelp to make HTML files and a HTML help project - echo. qthelp to make HTML files and a qthelp project - echo. devhelp to make HTML files and a Devhelp project - echo. epub to make an epub - echo. epub3 to make an epub3 - echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter - echo. text to make text files - echo. man to make manual pages - echo. texinfo to make Texinfo files - echo. gettext to make PO message catalogs - echo. changes to make an overview over all changed/added/deprecated items - echo. xml to make Docutils-native XML files - echo. pseudoxml to make pseudoxml-XML files for display purposes - echo. linkcheck to check all external links for integrity - echo. doctest to run all doctests embedded in the documentation if enabled - echo. coverage to run coverage check of the documentation if enabled - echo. dummy to check syntax errors of document sources - goto end -) - -if "%1" == "clean" ( - for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i - del /q /s %BUILDDIR%\* - goto end -) - - -REM Check if sphinx-build is available and fallback to Python version if any -%SPHINXBUILD% 1>NUL 2>NUL -if errorlevel 9009 goto sphinx_python -goto sphinx_ok - -:sphinx_python - -set SPHINXBUILD=python -m sphinx.__init__ -%SPHINXBUILD% 2> nul -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -:sphinx_ok - - -if "%1" == "html" ( - %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The HTML pages are in %BUILDDIR%/html. - goto end -) - -if "%1" == "dirhtml" ( - %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. - goto end -) - -if "%1" == "singlehtml" ( - %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. - goto end -) - -if "%1" == "pickle" ( - %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can process the pickle files. - goto end -) - -if "%1" == "json" ( - %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can process the JSON files. - goto end -) - -if "%1" == "htmlhelp" ( - %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can run HTML Help Workshop with the ^ -.hhp project file in %BUILDDIR%/htmlhelp. - goto end -) - -if "%1" == "qthelp" ( - %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can run "qcollectiongenerator" with the ^ -.qhcp project file in %BUILDDIR%/qthelp, like this: - echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Foo.qhcp - echo.To view the help file: - echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Foo.ghc - goto end -) - -if "%1" == "devhelp" ( - %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. - goto end -) - -if "%1" == "epub" ( - %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The epub file is in %BUILDDIR%/epub. - goto end -) - -if "%1" == "epub3" ( - %SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3 - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The epub3 file is in %BUILDDIR%/epub3. - goto end -) - -if "%1" == "latex" ( - %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. - goto end -) - -if "%1" == "latexpdf" ( - %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex - cd %BUILDDIR%/latex - make all-pdf - cd %~dp0 - echo. - echo.Build finished; the PDF files are in %BUILDDIR%/latex. - goto end -) - -if "%1" == "latexpdfja" ( - %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex - cd %BUILDDIR%/latex - make all-pdf-ja - cd %~dp0 - echo. - echo.Build finished; the PDF files are in %BUILDDIR%/latex. - goto end -) - -if "%1" == "text" ( - %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The text files are in %BUILDDIR%/text. - goto end -) - -if "%1" == "man" ( - %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The manual pages are in %BUILDDIR%/man. - goto end -) - -if "%1" == "texinfo" ( - %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. - goto end -) - -if "%1" == "gettext" ( - %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The message catalogs are in %BUILDDIR%/locale. - goto end -) - -if "%1" == "changes" ( - %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes - if errorlevel 1 exit /b 1 - echo. - echo.The overview file is in %BUILDDIR%/changes. - goto end -) - -if "%1" == "linkcheck" ( - %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck - if errorlevel 1 exit /b 1 - echo. - echo.Link check complete; look for any errors in the above output ^ -or in %BUILDDIR%/linkcheck/output.txt. - goto end -) - -if "%1" == "doctest" ( - %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest - if errorlevel 1 exit /b 1 - echo. - echo.Testing of doctests in the sources finished, look at the ^ -results in %BUILDDIR%/doctest/output.txt. - goto end -) - -if "%1" == "coverage" ( - %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage - if errorlevel 1 exit /b 1 - echo. - echo.Testing of coverage in the sources finished, look at the ^ -results in %BUILDDIR%/coverage/python.txt. - goto end -) - -if "%1" == "xml" ( - %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The XML files are in %BUILDDIR%/xml. - goto end -) - -if "%1" == "pseudoxml" ( - %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. - goto end -) - -if "%1" == "dummy" ( - %SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. Dummy builder generates no files. - goto end -) - -:end diff --git a/doc/modules/ROOT/nav.adoc b/doc/modules/ROOT/nav.adoc new file mode 100644 index 000000000000..4c80ecafc8f3 --- /dev/null +++ b/doc/modules/ROOT/nav.adoc @@ -0,0 +1,4 @@ +* xref:index.adoc[Main] +** xref:master@_:ROOT:glossary.adoc[Glossary] +** xref:master@_:ROOT:bugs.adoc[How to report bugs] +** xref:master@_:ROOT:contactus.adoc[Contact us] \ No newline at end of file diff --git a/doc/modules/ROOT/pages/index.adoc b/doc/modules/ROOT/pages/index.adoc new file mode 100644 index 000000000000..183bf9e339b0 --- /dev/null +++ b/doc/modules/ROOT/pages/index.adoc @@ -0,0 +1,48 @@ += Welcome to Apache Cassandra's documentation! + +:description: Starting page for Apache Cassandra documentation. +:keywords: Apache, Cassandra, NoSQL, database +:cass-url: http://cassandra.apache.org +:cass-contrib-url: https://wiki.apache.org/cassandra/HowToContribute + +This is the official documentation for {cass-url}[Apache Cassandra]. +If you would like to contribute to this documentation, you are welcome +to do so by submitting your contribution like any other patch following +{cass-contrib-url}[these instructions]. + +== Main documentation + +[cols="a,a"] +|=== + +| xref:cassandra:getting_started/index.adoc[Getting started] | Newbie starting point + +| xref:cassandra:architecture/index.adoc[Architecture] | Cassandra's big picture + +| xref:cassandra:data_modeling/index.adoc[Data modeling] | Hint: it's not relational + +| xref:cassandra:cql/index.adoc[Cassandra Query Language (CQL)] | CQL reference documentation + +| xref:cassandra:configuration/index.adoc[Configuration] | Cassandra's handles and knobs + +| xref:cassandra:operating/index.adoc[Operation] | The operator's corner + +| xref:cassandra:tools/index.adoc[Tools] | cqlsh, nodetool, and others + +| xref:cassandra:troubleshooting/index.adoc[Troubleshooting] | What to look for when you have a problem + +| xref:cassandra:faq/index.adoc[FAQ] | Frequently asked questions + +| xref:cassandra:plugins/index.adoc[Plug-ins] | Third-party plug-ins + +| xref:master@_:ROOT:native_protocol.adoc[Native Protocols] | Native Cassandra protocol specifications + +|=== + +== Meta information +* xref:master@_:ROOT:bugs.adoc[Reporting bugs] +* xref:master@_:ROOT:contactus.adoc[Contact us] +* xref:master@_:ROOT:development/index.adoc[Contributing code] +* xref:master@_:ROOT:docdev/index.adoc[Contributing to the docs] +* xref:master@_:ROOT:community.adoc[Community] +* xref:master@_:ROOT:download.adoc[Download] diff --git a/doc/modules/cassandra/assets/images/Figure_1_backups.jpg b/doc/modules/cassandra/assets/images/Figure_1_backups.jpg new file mode 100644 index 000000000000..160013d76fe2 Binary files /dev/null and b/doc/modules/cassandra/assets/images/Figure_1_backups.jpg differ diff --git a/doc/modules/cassandra/assets/images/Figure_1_data_model.jpg b/doc/modules/cassandra/assets/images/Figure_1_data_model.jpg new file mode 100644 index 000000000000..a3b330e7a391 Binary files /dev/null and b/doc/modules/cassandra/assets/images/Figure_1_data_model.jpg differ diff --git a/doc/modules/cassandra/assets/images/Figure_1_guarantees.jpg b/doc/modules/cassandra/assets/images/Figure_1_guarantees.jpg new file mode 100644 index 000000000000..859342da5e65 Binary files /dev/null and b/doc/modules/cassandra/assets/images/Figure_1_guarantees.jpg differ diff --git a/doc/modules/cassandra/assets/images/Figure_1_read_repair.jpg b/doc/modules/cassandra/assets/images/Figure_1_read_repair.jpg new file mode 100644 index 000000000000..d771550a4f0f Binary files /dev/null and b/doc/modules/cassandra/assets/images/Figure_1_read_repair.jpg differ diff --git a/doc/modules/cassandra/assets/images/Figure_2_data_model.jpg b/doc/modules/cassandra/assets/images/Figure_2_data_model.jpg new file mode 100644 index 000000000000..7acdeac02abc Binary files /dev/null and b/doc/modules/cassandra/assets/images/Figure_2_data_model.jpg differ diff --git a/doc/modules/cassandra/assets/images/Figure_2_read_repair.jpg b/doc/modules/cassandra/assets/images/Figure_2_read_repair.jpg new file mode 100644 index 000000000000..29a912b49693 Binary files /dev/null and b/doc/modules/cassandra/assets/images/Figure_2_read_repair.jpg differ diff --git a/doc/modules/cassandra/assets/images/Figure_3_read_repair.jpg b/doc/modules/cassandra/assets/images/Figure_3_read_repair.jpg new file mode 100644 index 000000000000..f5cc1897e3e3 Binary files /dev/null and b/doc/modules/cassandra/assets/images/Figure_3_read_repair.jpg differ diff --git a/doc/modules/cassandra/assets/images/Figure_4_read_repair.jpg b/doc/modules/cassandra/assets/images/Figure_4_read_repair.jpg new file mode 100644 index 000000000000..25bdb347da53 Binary files /dev/null and b/doc/modules/cassandra/assets/images/Figure_4_read_repair.jpg differ diff --git a/doc/modules/cassandra/assets/images/Figure_5_read_repair.jpg b/doc/modules/cassandra/assets/images/Figure_5_read_repair.jpg new file mode 100644 index 000000000000..d9c04857f76b Binary files /dev/null and b/doc/modules/cassandra/assets/images/Figure_5_read_repair.jpg differ diff --git a/doc/modules/cassandra/assets/images/Figure_6_read_repair.jpg b/doc/modules/cassandra/assets/images/Figure_6_read_repair.jpg new file mode 100644 index 000000000000..6bb4d1e32403 Binary files /dev/null and b/doc/modules/cassandra/assets/images/Figure_6_read_repair.jpg differ diff --git a/doc/modules/cassandra/assets/images/data_modeling_chebotko_logical.png b/doc/modules/cassandra/assets/images/data_modeling_chebotko_logical.png new file mode 100755 index 000000000000..e54b5f2740af Binary files /dev/null and b/doc/modules/cassandra/assets/images/data_modeling_chebotko_logical.png differ diff --git a/doc/modules/cassandra/assets/images/data_modeling_chebotko_physical.png b/doc/modules/cassandra/assets/images/data_modeling_chebotko_physical.png new file mode 100644 index 000000000000..bfdaec552726 Binary files /dev/null and b/doc/modules/cassandra/assets/images/data_modeling_chebotko_physical.png differ diff --git a/doc/modules/cassandra/assets/images/data_modeling_hotel_bucketing.png b/doc/modules/cassandra/assets/images/data_modeling_hotel_bucketing.png new file mode 100644 index 000000000000..8b53e38f90ea Binary files /dev/null and b/doc/modules/cassandra/assets/images/data_modeling_hotel_bucketing.png differ diff --git a/doc/modules/cassandra/assets/images/data_modeling_hotel_erd.png b/doc/modules/cassandra/assets/images/data_modeling_hotel_erd.png new file mode 100755 index 000000000000..e86fe68f34f9 Binary files /dev/null and b/doc/modules/cassandra/assets/images/data_modeling_hotel_erd.png differ diff --git a/doc/modules/cassandra/assets/images/data_modeling_hotel_logical.png b/doc/modules/cassandra/assets/images/data_modeling_hotel_logical.png new file mode 100755 index 000000000000..e920f12486d1 Binary files /dev/null and b/doc/modules/cassandra/assets/images/data_modeling_hotel_logical.png differ diff --git a/doc/modules/cassandra/assets/images/data_modeling_hotel_physical.png b/doc/modules/cassandra/assets/images/data_modeling_hotel_physical.png new file mode 100644 index 000000000000..2d20a6ddbb9f Binary files /dev/null and b/doc/modules/cassandra/assets/images/data_modeling_hotel_physical.png differ diff --git a/doc/modules/cassandra/assets/images/data_modeling_hotel_queries.png b/doc/modules/cassandra/assets/images/data_modeling_hotel_queries.png new file mode 100755 index 000000000000..2434db39d4ff Binary files /dev/null and b/doc/modules/cassandra/assets/images/data_modeling_hotel_queries.png differ diff --git a/doc/modules/cassandra/assets/images/data_modeling_hotel_relational.png b/doc/modules/cassandra/assets/images/data_modeling_hotel_relational.png new file mode 100755 index 000000000000..43e784eea74d Binary files /dev/null and b/doc/modules/cassandra/assets/images/data_modeling_hotel_relational.png differ diff --git a/doc/modules/cassandra/assets/images/data_modeling_reservation_logical.png b/doc/modules/cassandra/assets/images/data_modeling_reservation_logical.png new file mode 100755 index 000000000000..0460633b68fa Binary files /dev/null and b/doc/modules/cassandra/assets/images/data_modeling_reservation_logical.png differ diff --git a/doc/modules/cassandra/assets/images/data_modeling_reservation_physical.png b/doc/modules/cassandra/assets/images/data_modeling_reservation_physical.png new file mode 100755 index 000000000000..1e6e76c16c59 Binary files /dev/null and b/doc/modules/cassandra/assets/images/data_modeling_reservation_physical.png differ diff --git a/doc/modules/cassandra/assets/images/docs_commit.png b/doc/modules/cassandra/assets/images/docs_commit.png new file mode 100644 index 000000000000..d90d96a88ba5 Binary files /dev/null and b/doc/modules/cassandra/assets/images/docs_commit.png differ diff --git a/doc/modules/cassandra/assets/images/docs_create_branch.png b/doc/modules/cassandra/assets/images/docs_create_branch.png new file mode 100644 index 000000000000..a04cb54f34d6 Binary files /dev/null and b/doc/modules/cassandra/assets/images/docs_create_branch.png differ diff --git a/doc/modules/cassandra/assets/images/docs_create_file.png b/doc/modules/cassandra/assets/images/docs_create_file.png new file mode 100644 index 000000000000..b51e37035325 Binary files /dev/null and b/doc/modules/cassandra/assets/images/docs_create_file.png differ diff --git a/doc/modules/cassandra/assets/images/docs_editor.png b/doc/modules/cassandra/assets/images/docs_editor.png new file mode 100644 index 000000000000..5b9997bcc406 Binary files /dev/null and b/doc/modules/cassandra/assets/images/docs_editor.png differ diff --git a/doc/modules/cassandra/assets/images/docs_fork.png b/doc/modules/cassandra/assets/images/docs_fork.png new file mode 100644 index 000000000000..20a592a98e6c Binary files /dev/null and b/doc/modules/cassandra/assets/images/docs_fork.png differ diff --git a/doc/modules/cassandra/assets/images/docs_pr.png b/doc/modules/cassandra/assets/images/docs_pr.png new file mode 100644 index 000000000000..211eb25ef1a6 Binary files /dev/null and b/doc/modules/cassandra/assets/images/docs_pr.png differ diff --git a/doc/modules/cassandra/assets/images/docs_preview.png b/doc/modules/cassandra/assets/images/docs_preview.png new file mode 100644 index 000000000000..207f0ac438b3 Binary files /dev/null and b/doc/modules/cassandra/assets/images/docs_preview.png differ diff --git a/doc/source/development/images/eclipse_debug0.png b/doc/modules/cassandra/assets/images/eclipse_debug0.png similarity index 100% rename from doc/source/development/images/eclipse_debug0.png rename to doc/modules/cassandra/assets/images/eclipse_debug0.png diff --git a/doc/source/development/images/eclipse_debug1.png b/doc/modules/cassandra/assets/images/eclipse_debug1.png similarity index 100% rename from doc/source/development/images/eclipse_debug1.png rename to doc/modules/cassandra/assets/images/eclipse_debug1.png diff --git a/doc/source/development/images/eclipse_debug2.png b/doc/modules/cassandra/assets/images/eclipse_debug2.png similarity index 100% rename from doc/source/development/images/eclipse_debug2.png rename to doc/modules/cassandra/assets/images/eclipse_debug2.png diff --git a/doc/source/development/images/eclipse_debug3.png b/doc/modules/cassandra/assets/images/eclipse_debug3.png similarity index 100% rename from doc/source/development/images/eclipse_debug3.png rename to doc/modules/cassandra/assets/images/eclipse_debug3.png diff --git a/doc/source/development/images/eclipse_debug4.png b/doc/modules/cassandra/assets/images/eclipse_debug4.png similarity index 100% rename from doc/source/development/images/eclipse_debug4.png rename to doc/modules/cassandra/assets/images/eclipse_debug4.png diff --git a/doc/source/development/images/eclipse_debug5.png b/doc/modules/cassandra/assets/images/eclipse_debug5.png similarity index 100% rename from doc/source/development/images/eclipse_debug5.png rename to doc/modules/cassandra/assets/images/eclipse_debug5.png diff --git a/doc/source/development/images/eclipse_debug6.png b/doc/modules/cassandra/assets/images/eclipse_debug6.png similarity index 100% rename from doc/source/development/images/eclipse_debug6.png rename to doc/modules/cassandra/assets/images/eclipse_debug6.png diff --git a/doc/modules/cassandra/assets/images/example-stress-graph.png b/doc/modules/cassandra/assets/images/example-stress-graph.png new file mode 100644 index 000000000000..a65b08b16aac Binary files /dev/null and b/doc/modules/cassandra/assets/images/example-stress-graph.png differ diff --git a/doc/modules/cassandra/assets/images/hints.svg b/doc/modules/cassandra/assets/images/hints.svg new file mode 100644 index 000000000000..5e952e796260 --- /dev/null +++ b/doc/modules/cassandra/assets/images/hints.svg @@ -0,0 +1,9 @@ + + + + diff --git a/doc/modules/cassandra/assets/images/ring.svg b/doc/modules/cassandra/assets/images/ring.svg new file mode 100644 index 000000000000..d0db8c579e3e --- /dev/null +++ b/doc/modules/cassandra/assets/images/ring.svg @@ -0,0 +1,11 @@ + + + + + ... + diff --git a/doc/modules/cassandra/assets/images/vnodes.svg b/doc/modules/cassandra/assets/images/vnodes.svg new file mode 100644 index 000000000000..71b4fa2d8b90 --- /dev/null +++ b/doc/modules/cassandra/assets/images/vnodes.svg @@ -0,0 +1,11 @@ + + + + + + diff --git a/doc/modules/cassandra/examples/BASH/add_repo_keys.sh b/doc/modules/cassandra/examples/BASH/add_repo_keys.sh new file mode 100644 index 000000000000..cdb5881e5638 --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/add_repo_keys.sh @@ -0,0 +1 @@ +$ curl https://www.apache.org/dist/cassandra/KEYS | sudo apt-key add - diff --git a/doc/modules/cassandra/examples/BASH/apt-get_cass.sh b/doc/modules/cassandra/examples/BASH/apt-get_cass.sh new file mode 100644 index 000000000000..9614b29ab2b3 --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/apt-get_cass.sh @@ -0,0 +1 @@ +$ sudo apt-get install cassandra diff --git a/doc/modules/cassandra/examples/BASH/apt-get_update.sh b/doc/modules/cassandra/examples/BASH/apt-get_update.sh new file mode 100644 index 000000000000..b50b7ac768e3 --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/apt-get_update.sh @@ -0,0 +1 @@ +$ sudo apt-get update diff --git a/doc/modules/cassandra/examples/BASH/check_backups.sh b/doc/modules/cassandra/examples/BASH/check_backups.sh new file mode 100644 index 000000000000..212c3d2c7d47 --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/check_backups.sh @@ -0,0 +1 @@ +$ cd ./cassandra/data/data/cqlkeyspace/t-d132e240c21711e9bbee19821dcea330/backups && ls -l diff --git a/doc/modules/cassandra/examples/BASH/cqlsh_localhost.sh b/doc/modules/cassandra/examples/BASH/cqlsh_localhost.sh new file mode 100644 index 000000000000..7bc1c39525d3 --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/cqlsh_localhost.sh @@ -0,0 +1 @@ +$ bin/cqlsh localhost diff --git a/doc/modules/cassandra/examples/BASH/curl_install.sh b/doc/modules/cassandra/examples/BASH/curl_install.sh new file mode 100644 index 000000000000..23e7c0164860 --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/curl_install.sh @@ -0,0 +1 @@ +$ curl -OL http://apache.mirror.digitalpacific.com.au/cassandra/{cass-tag-3x}/apache-cassandra-{cass-tag-3x}-bin.tar.gz diff --git a/doc/modules/cassandra/examples/BASH/curl_verify_sha.sh b/doc/modules/cassandra/examples/BASH/curl_verify_sha.sh new file mode 100644 index 000000000000..bde80caf6488 --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/curl_verify_sha.sh @@ -0,0 +1 @@ +$ curl -L https://downloads.apache.org/cassandra/{cass-tag-3x}/apache-cassandra-{cass-tag-3x}-bin.tar.gz.sha256 diff --git a/doc/modules/cassandra/examples/BASH/docker_cqlsh.sh b/doc/modules/cassandra/examples/BASH/docker_cqlsh.sh new file mode 100644 index 000000000000..92a4a8f356d2 --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/docker_cqlsh.sh @@ -0,0 +1 @@ +docker exec -it cass_cluster cqlsh diff --git a/doc/modules/cassandra/examples/BASH/docker_pull.sh b/doc/modules/cassandra/examples/BASH/docker_pull.sh new file mode 100644 index 000000000000..67e5e22680a6 --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/docker_pull.sh @@ -0,0 +1 @@ +docker pull cassandra:{cass-docker-tag-3x} diff --git a/doc/modules/cassandra/examples/BASH/docker_remove.sh b/doc/modules/cassandra/examples/BASH/docker_remove.sh new file mode 100644 index 000000000000..bf9563075ff7 --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/docker_remove.sh @@ -0,0 +1 @@ +docker rm cassandra diff --git a/doc/modules/cassandra/examples/BASH/docker_run.sh b/doc/modules/cassandra/examples/BASH/docker_run.sh new file mode 100644 index 000000000000..bb4ecdb93e56 --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/docker_run.sh @@ -0,0 +1 @@ +docker run --name cass_cluster cassandra:{cass-docker-tag-3x} diff --git a/doc/modules/cassandra/examples/BASH/docker_run_qs.sh b/doc/modules/cassandra/examples/BASH/docker_run_qs.sh new file mode 100644 index 000000000000..7416f5dca1fc --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/docker_run_qs.sh @@ -0,0 +1,3 @@ +docker run --rm -it -v //scripts:/scripts \ +-v / --table -- diff --git a/doc/modules/cassandra/examples/BASH/snapshot_one_table2.sh b/doc/modules/cassandra/examples/BASH/snapshot_one_table2.sh new file mode 100644 index 000000000000..738771027642 --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/snapshot_one_table2.sh @@ -0,0 +1 @@ +$ nodetool snapshot --tag magazine --table magazine catalogkeyspace diff --git a/doc/modules/cassandra/examples/BASH/start_tarball.sh b/doc/modules/cassandra/examples/BASH/start_tarball.sh new file mode 100644 index 000000000000..63312704acc6 --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/start_tarball.sh @@ -0,0 +1 @@ +$ cd apache-cassandra-{cass-tag-3x}/ && bin/cassandra diff --git a/doc/modules/cassandra/examples/BASH/tail_syslog.sh b/doc/modules/cassandra/examples/BASH/tail_syslog.sh new file mode 100644 index 000000000000..b47575035db5 --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/tail_syslog.sh @@ -0,0 +1 @@ +$ tail -f logs/system.log diff --git a/doc/modules/cassandra/examples/BASH/tail_syslog_package.sh b/doc/modules/cassandra/examples/BASH/tail_syslog_package.sh new file mode 100644 index 000000000000..c9f00ede0579 --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/tail_syslog_package.sh @@ -0,0 +1 @@ +$ tail -f /var/log/cassandra/system.log diff --git a/doc/modules/cassandra/examples/BASH/tarball.sh b/doc/modules/cassandra/examples/BASH/tarball.sh new file mode 100644 index 000000000000..0ef448a11973 --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/tarball.sh @@ -0,0 +1 @@ +$ tar xzvf apache-cassandra-{cass-tag-3x}-bin.tar.gz diff --git a/doc/modules/cassandra/examples/BASH/verify_gpg.sh b/doc/modules/cassandra/examples/BASH/verify_gpg.sh new file mode 100644 index 000000000000..9a503da06521 --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/verify_gpg.sh @@ -0,0 +1 @@ +$ gpg --print-md SHA256 apache-cassandra-{cass-tag-3x}-bin.tar.gz diff --git a/doc/modules/cassandra/examples/BASH/yum_cass.sh b/doc/modules/cassandra/examples/BASH/yum_cass.sh new file mode 100644 index 000000000000..cd8217b112ee --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/yum_cass.sh @@ -0,0 +1 @@ +$ sudo yum install cassandra diff --git a/doc/modules/cassandra/examples/BASH/yum_start.sh b/doc/modules/cassandra/examples/BASH/yum_start.sh new file mode 100644 index 000000000000..4930d1ab11fc --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/yum_start.sh @@ -0,0 +1 @@ +$ sudo service cassandra start diff --git a/doc/modules/cassandra/examples/BASH/yum_update.sh b/doc/modules/cassandra/examples/BASH/yum_update.sh new file mode 100644 index 000000000000..2e815b2a0651 --- /dev/null +++ b/doc/modules/cassandra/examples/BASH/yum_update.sh @@ -0,0 +1 @@ +$ sudo yum update diff --git a/doc/modules/cassandra/examples/BNF/aggregate_name.bnf b/doc/modules/cassandra/examples/BNF/aggregate_name.bnf new file mode 100644 index 000000000000..a7ccdc3e1d3a --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/aggregate_name.bnf @@ -0,0 +1 @@ +aggregate_name::= [keyspace_name '.' ] name \ No newline at end of file diff --git a/doc/modules/cassandra/examples/BNF/alter_ks.bnf b/doc/modules/cassandra/examples/BNF/alter_ks.bnf new file mode 100644 index 000000000000..5f82d34e43a3 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/alter_ks.bnf @@ -0,0 +1,2 @@ +alter_keyspace_statement::= ALTER KEYSPACE keyspace_name + WITH options diff --git a/doc/modules/cassandra/examples/BNF/alter_mv_statement.bnf b/doc/modules/cassandra/examples/BNF/alter_mv_statement.bnf new file mode 100644 index 000000000000..ff97edb96176 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/alter_mv_statement.bnf @@ -0,0 +1 @@ +alter_materialized_view_statement::= ALTER MATERIALIZED VIEW view_name WITH table_options diff --git a/doc/modules/cassandra/examples/BNF/alter_role_statement.bnf b/doc/modules/cassandra/examples/BNF/alter_role_statement.bnf new file mode 100644 index 000000000000..36958d7fa901 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/alter_role_statement.bnf @@ -0,0 +1 @@ +alter_role_statement ::= ALTER ROLE role_name WITH role_options diff --git a/doc/modules/cassandra/examples/BNF/alter_table.bnf b/doc/modules/cassandra/examples/BNF/alter_table.bnf new file mode 100644 index 000000000000..bf1b4b7ab531 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/alter_table.bnf @@ -0,0 +1,4 @@ +alter_table_statement::= ALTER TABLE table_name alter_table_instruction +alter_table_instruction::= ADD column_name cql_type ( ',' column_name cql_type )* + | DROP column_name ( column_name )* + | WITH options diff --git a/doc/modules/cassandra/examples/BNF/alter_udt_statement.bnf b/doc/modules/cassandra/examples/BNF/alter_udt_statement.bnf new file mode 100644 index 000000000000..4f409e609e5e --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/alter_udt_statement.bnf @@ -0,0 +1,3 @@ +alter_type_statement::= ALTER TYPE udt_name alter_type_modification +alter_type_modification::= ADD field_definition + | RENAME identifier TO identifier( identifier TO identifier )* diff --git a/doc/modules/cassandra/examples/BNF/alter_user_statement.bnf b/doc/modules/cassandra/examples/BNF/alter_user_statement.bnf new file mode 100644 index 000000000000..129607c1bca1 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/alter_user_statement.bnf @@ -0,0 +1 @@ +alter_user_statement ::= ALTER USER role_name [ WITH PASSWORD string] [ user_option] diff --git a/doc/modules/cassandra/examples/BNF/batch_statement.bnf b/doc/modules/cassandra/examples/BNF/batch_statement.bnf new file mode 100644 index 000000000000..2cc2559bfe40 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/batch_statement.bnf @@ -0,0 +1,5 @@ +batch_statement ::= BEGIN [ UNLOGGED | COUNTER ] BATCH + [ USING update_parameter( AND update_parameter)* ] + modification_statement ( ';' modification_statement )* + APPLY BATCH +modification_statement ::= insert_statement | update_statement | delete_statement diff --git a/doc/modules/cassandra/examples/BNF/collection_literal.bnf b/doc/modules/cassandra/examples/BNF/collection_literal.bnf new file mode 100644 index 000000000000..83a46a240523 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/collection_literal.bnf @@ -0,0 +1,4 @@ +collection_literal::= map_literal | set_literal | list_literal +map_literal::= '\{' [ term ':' term (',' term : term)* ] '}' +set_literal::= '\{' [ term (',' term)* ] '}' +list_literal::= '[' [ term (',' term)* ] ']' diff --git a/doc/modules/cassandra/examples/BNF/collection_type.bnf b/doc/modules/cassandra/examples/BNF/collection_type.bnf new file mode 100644 index 000000000000..37e6cd1de894 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/collection_type.bnf @@ -0,0 +1,3 @@ +collection_type::= MAP '<' cql_type',' cql_type'>' + | SET '<' cql_type '>' + | LIST '<' cql_type'>' diff --git a/doc/modules/cassandra/examples/BNF/column.bnf b/doc/modules/cassandra/examples/BNF/column.bnf new file mode 100644 index 000000000000..136a45c7e5d2 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/column.bnf @@ -0,0 +1 @@ +column_name::= identifier diff --git a/doc/modules/cassandra/examples/BNF/constant.bnf b/doc/modules/cassandra/examples/BNF/constant.bnf new file mode 100644 index 000000000000..4a2953aa21fa --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/constant.bnf @@ -0,0 +1,8 @@ +constant::= string | integer | float | boolean | uuid | blob | NULL +string::= ''' (any character where ' can appear if doubled)+ ''' : '$$' (any character other than '$$') '$$' +integer::= re('-?[0-9]+') +float::= re('-?[0-9]+(.[0-9]*)?([eE][+-]?[0-9+])?') | NAN | INFINITY +boolean::= TRUE | FALSE +uuid::= hex\{8}-hex\{4}-hex\{4}-hex\{4}-hex\{12} +hex::= re("[0-9a-fA-F]") +blob::= '0' ('x' | 'X') hex+ diff --git a/doc/modules/cassandra/examples/BNF/cql_statement.bnf b/doc/modules/cassandra/examples/BNF/cql_statement.bnf new file mode 100644 index 000000000000..8d4ae2148302 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/cql_statement.bnf @@ -0,0 +1,48 @@ +cql_statement::= statement [ ';' ] +statement:=: ddl_statement : + | dml_statement + | secondary_index_statement + | materialized_view_statement + | role_or_permission_statement + | udf_statement + | udt_statement + | trigger_statement +ddl_statement::= use_statement + | create_keyspace_statement + | alter_keyspace_statement + | drop_keyspace_statement + | create_table_statement + | alter_table_statement + | drop_table_statement + | truncate_statement +dml_statement::= select_statement + | insert_statement + | update_statement + | delete_statement + | batch_statement +secondary_index_statement::= create_index_statement + | drop_index_statement +materialized_view_statement::= create_materialized_view_statement + | drop_materialized_view_statement +role_or_permission_statement::= create_role_statement + | alter_role_statement + | drop_role_statement + | grant_role_statement + | revoke_role_statement + | list_roles_statement + | grant_permission_statement + | revoke_permission_statement + | list_permissions_statement + | create_user_statement + | alter_user_statement + | drop_user_statement + | list_users_statement +udf_statement::= create_function_statement + | drop_function_statement + | create_aggregate_statement + | drop_aggregate_statement +udt_statement::= create_type_statement + | alter_type_statement + | drop_type_statement +trigger_statement::= create_trigger_statement + | drop_trigger_statement diff --git a/doc/modules/cassandra/examples/BNF/cql_type.bnf b/doc/modules/cassandra/examples/BNF/cql_type.bnf new file mode 100644 index 000000000000..4e2e5d1765dd --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/cql_type.bnf @@ -0,0 +1 @@ +cql_type::= native_type| collection_type| user_defined_type | tuple_type | custom_type diff --git a/doc/modules/cassandra/examples/BNF/create_aggregate_statement.bnf b/doc/modules/cassandra/examples/BNF/create_aggregate_statement.bnf new file mode 100644 index 000000000000..c0126a23ffd8 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/create_aggregate_statement.bnf @@ -0,0 +1,6 @@ +create_aggregate_statement ::= CREATE [ OR REPLACE ] AGGREGATE [ IF NOT EXISTS ] + function_name '(' arguments_signature')' + SFUNC function_name + STYPE cql_type: + [ FINALFUNC function_name] + [ INITCOND term ] diff --git a/doc/modules/cassandra/examples/BNF/create_function_statement.bnf b/doc/modules/cassandra/examples/BNF/create_function_statement.bnf new file mode 100644 index 000000000000..0da769a11fb0 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/create_function_statement.bnf @@ -0,0 +1,6 @@ +create_function_statement::= CREATE [ OR REPLACE ] FUNCTION [ IF NOT EXISTS] + function_name '(' arguments_declaration ')' + [ CALLED | RETURNS NULL ] ON NULL INPUT + RETURNS cql_type + LANGUAGE identifier + AS string arguments_declaration: identifier cql_type ( ',' identifier cql_type )* diff --git a/doc/modules/cassandra/examples/BNF/create_index_statement.bnf b/doc/modules/cassandra/examples/BNF/create_index_statement.bnf new file mode 100644 index 000000000000..6e7694724342 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/create_index_statement.bnf @@ -0,0 +1,5 @@ +create_index_statement::= CREATE [ CUSTOM ] INDEX [ IF NOT EXISTS ] [ index_name ] + ON table_name '(' index_identifier ')' + [ USING string [ WITH OPTIONS = map_literal ] ] +index_identifier::= column_name + | ( KEYS | VALUES | ENTRIES | FULL ) '(' column_name ')' diff --git a/doc/modules/cassandra/examples/BNF/create_ks.bnf b/doc/modules/cassandra/examples/BNF/create_ks.bnf new file mode 100644 index 000000000000..ba3e240e0fae --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/create_ks.bnf @@ -0,0 +1,2 @@ +create_keyspace_statement::= CREATE KEYSPACE [ IF NOT EXISTS ] keyspace_name + WITH options diff --git a/doc/modules/cassandra/examples/BNF/create_mv_statement.bnf b/doc/modules/cassandra/examples/BNF/create_mv_statement.bnf new file mode 100644 index 000000000000..9bdb60dc5b15 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/create_mv_statement.bnf @@ -0,0 +1,4 @@ +create_materialized_view_statement::= CREATE MATERIALIZED VIEW [ IF NOT EXISTS ] view_name + AS select_statement + PRIMARY KEY '(' primary_key')' + WITH table_options diff --git a/doc/modules/cassandra/examples/BNF/create_role_statement.bnf b/doc/modules/cassandra/examples/BNF/create_role_statement.bnf new file mode 100644 index 000000000000..bc93fbca3bc4 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/create_role_statement.bnf @@ -0,0 +1,9 @@ +create_role_statement ::= CREATE ROLE [ IF NOT EXISTS ] role_name + [ WITH role_options# ] +role_options ::= role_option ( AND role_option)* +role_option ::= PASSWORD '=' string + | LOGIN '=' boolean + | SUPERUSER '=' boolean + | OPTIONS '=' map_literal + | ACCESS TO DATACENTERS set_literal + | ACCESS TO ALL DATACENTERS diff --git a/doc/modules/cassandra/examples/BNF/create_table.bnf b/doc/modules/cassandra/examples/BNF/create_table.bnf new file mode 100644 index 000000000000..840573c7b08a --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/create_table.bnf @@ -0,0 +1,12 @@ +create_table_statement::= CREATE TABLE [ IF NOT EXISTS ] table_name '(' + column_definition ( ',' column_definition )* + [ ',' PRIMARY KEY '(' primary_key ')' ] + ')' [ WITH table_options ] +column_definition::= column_name cql_type [ STATIC ] [ PRIMARY KEY] +primary_key::= partition_key [ ',' clustering_columns ] +partition_key::= column_name | '(' column_name ( ',' column_name )* ')' +clustering_columns::= column_name ( ',' column_name )* +table_options:=: COMPACT STORAGE [ AND table_options ] + | CLUSTERING ORDER BY '(' clustering_order ')' + [ AND table_options ] | options +clustering_order::= column_name (ASC | DESC) ( ',' column_name (ASC | DESC) )* diff --git a/doc/modules/cassandra/examples/BNF/create_trigger_statement.bnf b/doc/modules/cassandra/examples/BNF/create_trigger_statement.bnf new file mode 100644 index 000000000000..f7442da15d74 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/create_trigger_statement.bnf @@ -0,0 +1,3 @@ +create_trigger_statement ::= CREATE TRIGGER [ IF NOT EXISTS ] trigger_name + ON table_name + USING string diff --git a/doc/modules/cassandra/examples/BNF/create_type.bnf b/doc/modules/cassandra/examples/BNF/create_type.bnf new file mode 100644 index 000000000000..aebe9ebfbac6 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/create_type.bnf @@ -0,0 +1,3 @@ +create_type_statement::= CREATE TYPE [ IF NOT EXISTS ] udt_name + '(' field_definition ( ',' field_definition)* ')' +field_definition::= identifier cql_type diff --git a/doc/modules/cassandra/examples/BNF/create_user_statement.bnf b/doc/modules/cassandra/examples/BNF/create_user_statement.bnf new file mode 100644 index 000000000000..19f9903921ee --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/create_user_statement.bnf @@ -0,0 +1,4 @@ +create_user_statement ::= CREATE USER [ IF NOT EXISTS ] role_name + [ WITH PASSWORD string ] + [ user_option ] +user_option: SUPERUSER | NOSUPERUSER diff --git a/doc/modules/cassandra/examples/BNF/custom_type.bnf b/doc/modules/cassandra/examples/BNF/custom_type.bnf new file mode 100644 index 000000000000..ce4890f61762 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/custom_type.bnf @@ -0,0 +1 @@ +custom_type::= string diff --git a/doc/modules/cassandra/examples/BNF/delete_statement.bnf b/doc/modules/cassandra/examples/BNF/delete_statement.bnf new file mode 100644 index 000000000000..5f456ba2ded8 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/delete_statement.bnf @@ -0,0 +1,5 @@ +delete_statement::= DELETE [ simple_selection ( ',' simple_selection ) ] + FROM table_name + [ USING update_parameter ( AND update_parameter# )* ] + WHERE where_clause + [ IF ( EXISTS | condition ( AND condition)*) ] diff --git a/doc/modules/cassandra/examples/BNF/describe_aggregate_statement.bnf b/doc/modules/cassandra/examples/BNF/describe_aggregate_statement.bnf new file mode 100644 index 000000000000..b94526bc5a2e --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/describe_aggregate_statement.bnf @@ -0,0 +1 @@ +describe_aggregate_statement::= DESCRIBE AGGREGATE aggregate_name; \ No newline at end of file diff --git a/doc/modules/cassandra/examples/BNF/describe_aggregates_statement.bnf b/doc/modules/cassandra/examples/BNF/describe_aggregates_statement.bnf new file mode 100644 index 000000000000..049afef24b27 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/describe_aggregates_statement.bnf @@ -0,0 +1 @@ +describe_aggregates_statement::= DESCRIBE AGGREGATES; \ No newline at end of file diff --git a/doc/modules/cassandra/examples/BNF/describe_cluster_statement.bnf b/doc/modules/cassandra/examples/BNF/describe_cluster_statement.bnf new file mode 100644 index 000000000000..8f58ac84c253 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/describe_cluster_statement.bnf @@ -0,0 +1 @@ +describe_cluster_statement::= DESCRIBE CLUSTER; \ No newline at end of file diff --git a/doc/modules/cassandra/examples/BNF/describe_function_statement.bnf b/doc/modules/cassandra/examples/BNF/describe_function_statement.bnf new file mode 100644 index 000000000000..9145e928b0f2 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/describe_function_statement.bnf @@ -0,0 +1 @@ +describe_function_statement::= DESCRIBE FUNCTION function_name; \ No newline at end of file diff --git a/doc/modules/cassandra/examples/BNF/describe_functions_statement.bnf b/doc/modules/cassandra/examples/BNF/describe_functions_statement.bnf new file mode 100644 index 000000000000..4e3b82299533 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/describe_functions_statement.bnf @@ -0,0 +1 @@ +describe_functions_statement::= DESCRIBE FUNCTIONS; \ No newline at end of file diff --git a/doc/modules/cassandra/examples/BNF/describe_index_statement.bnf b/doc/modules/cassandra/examples/BNF/describe_index_statement.bnf new file mode 100644 index 000000000000..907c175b30b3 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/describe_index_statement.bnf @@ -0,0 +1 @@ +describe_index_statement::= DESCRIBE INDEX index; \ No newline at end of file diff --git a/doc/modules/cassandra/examples/BNF/describe_keyspace_statement.bnf b/doc/modules/cassandra/examples/BNF/describe_keyspace_statement.bnf new file mode 100644 index 000000000000..771e755d55a4 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/describe_keyspace_statement.bnf @@ -0,0 +1 @@ +describe_keyspace_statement::= DESCRIBE [ONLY] KEYSPACE [keyspace_name] [WITH INTERNALS]; \ No newline at end of file diff --git a/doc/modules/cassandra/examples/BNF/describe_keyspaces_statement.bnf b/doc/modules/cassandra/examples/BNF/describe_keyspaces_statement.bnf new file mode 100644 index 000000000000..51b3c26c149b --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/describe_keyspaces_statement.bnf @@ -0,0 +1 @@ +describe_keyspaces_statement::= DESCRIBE KEYSPACES; \ No newline at end of file diff --git a/doc/modules/cassandra/examples/BNF/describe_materialized_view_statement.bnf b/doc/modules/cassandra/examples/BNF/describe_materialized_view_statement.bnf new file mode 100644 index 000000000000..3297c0ebcf90 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/describe_materialized_view_statement.bnf @@ -0,0 +1 @@ +describe_materialized_view_statement::= DESCRIBE MATERIALIZED VIEW materialized_view; \ No newline at end of file diff --git a/doc/modules/cassandra/examples/BNF/describe_object_statement.bnf b/doc/modules/cassandra/examples/BNF/describe_object_statement.bnf new file mode 100644 index 000000000000..d8addae4a27e --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/describe_object_statement.bnf @@ -0,0 +1 @@ +describe_object_statement::= DESCRIBE object_name [WITH INTERNALS]; \ No newline at end of file diff --git a/doc/modules/cassandra/examples/BNF/describe_schema_statement.bnf b/doc/modules/cassandra/examples/BNF/describe_schema_statement.bnf new file mode 100644 index 000000000000..73440819243c --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/describe_schema_statement.bnf @@ -0,0 +1 @@ +describe_schema_statement::= DESCRIBE [FULL] SCHEMA [WITH INTERNALS]; \ No newline at end of file diff --git a/doc/modules/cassandra/examples/BNF/describe_table_statement.bnf b/doc/modules/cassandra/examples/BNF/describe_table_statement.bnf new file mode 100644 index 000000000000..1a0cd736c129 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/describe_table_statement.bnf @@ -0,0 +1 @@ +describe_table_statement::= DESCRIBE TABLE table_name [WITH INTERNALS]; \ No newline at end of file diff --git a/doc/modules/cassandra/examples/BNF/describe_tables_statement.bnf b/doc/modules/cassandra/examples/BNF/describe_tables_statement.bnf new file mode 100644 index 000000000000..061452c0fcd9 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/describe_tables_statement.bnf @@ -0,0 +1 @@ +describe_tables_statement::= DESCRIBE TABLES; \ No newline at end of file diff --git a/doc/modules/cassandra/examples/BNF/describe_type_statement.bnf b/doc/modules/cassandra/examples/BNF/describe_type_statement.bnf new file mode 100644 index 000000000000..f592af41d054 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/describe_type_statement.bnf @@ -0,0 +1 @@ +describe_type_statement::= DESCRIBE TYPE udt_name; \ No newline at end of file diff --git a/doc/modules/cassandra/examples/BNF/describe_types_statement.bnf b/doc/modules/cassandra/examples/BNF/describe_types_statement.bnf new file mode 100644 index 000000000000..73f282795513 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/describe_types_statement.bnf @@ -0,0 +1 @@ +describe_types_statement::= DESCRIBE TYPES; \ No newline at end of file diff --git a/doc/modules/cassandra/examples/BNF/drop_aggregate_statement.bnf b/doc/modules/cassandra/examples/BNF/drop_aggregate_statement.bnf new file mode 100644 index 000000000000..28e8a4fcb934 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/drop_aggregate_statement.bnf @@ -0,0 +1,2 @@ +drop_aggregate_statement::= DROP AGGREGATE [ IF EXISTS ] function_name[ '(' arguments_signature ')' +] diff --git a/doc/modules/cassandra/examples/BNF/drop_function_statement.bnf b/doc/modules/cassandra/examples/BNF/drop_function_statement.bnf new file mode 100644 index 000000000000..2639bd0d66fb --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/drop_function_statement.bnf @@ -0,0 +1,2 @@ +drop_function_statement::= DROP FUNCTION [ IF EXISTS ] function_name [ '(' arguments_signature ')' ] +arguments_signature::= cql_type ( ',' cql_type )* diff --git a/doc/modules/cassandra/examples/BNF/drop_index_statement.bnf b/doc/modules/cassandra/examples/BNF/drop_index_statement.bnf new file mode 100644 index 000000000000..49f36d1eb32c --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/drop_index_statement.bnf @@ -0,0 +1 @@ +drop_index_statement::= DROP INDEX [ IF EXISTS ] index_name diff --git a/doc/modules/cassandra/examples/BNF/drop_ks.bnf b/doc/modules/cassandra/examples/BNF/drop_ks.bnf new file mode 100644 index 000000000000..4e21b7bbce35 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/drop_ks.bnf @@ -0,0 +1 @@ +drop_keyspace_statement::= DROP KEYSPACE [ IF EXISTS ] keyspace_name diff --git a/doc/modules/cassandra/examples/BNF/drop_mv_statement.bnf b/doc/modules/cassandra/examples/BNF/drop_mv_statement.bnf new file mode 100644 index 000000000000..1a9d8dc980e3 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/drop_mv_statement.bnf @@ -0,0 +1 @@ +drop_materialized_view_statement::= DROP MATERIALIZED VIEW [ IF EXISTS ] view_name; diff --git a/doc/modules/cassandra/examples/BNF/drop_role_statement.bnf b/doc/modules/cassandra/examples/BNF/drop_role_statement.bnf new file mode 100644 index 000000000000..15e1791d72c9 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/drop_role_statement.bnf @@ -0,0 +1 @@ +drop_role_statement ::= DROP ROLE [ IF EXISTS ] role_name diff --git a/doc/modules/cassandra/examples/BNF/drop_table.bnf b/doc/modules/cassandra/examples/BNF/drop_table.bnf new file mode 100644 index 000000000000..cabd17a42cf6 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/drop_table.bnf @@ -0,0 +1 @@ +drop_table_statement::= DROP TABLE [ IF EXISTS ] table_name diff --git a/doc/modules/cassandra/examples/BNF/drop_trigger_statement.bnf b/doc/modules/cassandra/examples/BNF/drop_trigger_statement.bnf new file mode 100644 index 000000000000..c1d3e594230b --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/drop_trigger_statement.bnf @@ -0,0 +1 @@ +drop_trigger_statement ::= DROP TRIGGER [ IF EXISTS ] trigger_nameON table_name diff --git a/doc/modules/cassandra/examples/BNF/drop_udt_statement.bnf b/doc/modules/cassandra/examples/BNF/drop_udt_statement.bnf new file mode 100644 index 000000000000..276b57c60b8d --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/drop_udt_statement.bnf @@ -0,0 +1 @@ +drop_type_statement::= DROP TYPE [ IF EXISTS ] udt_name diff --git a/doc/modules/cassandra/examples/BNF/drop_user_statement.bnf b/doc/modules/cassandra/examples/BNF/drop_user_statement.bnf new file mode 100644 index 000000000000..9b226083d1a1 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/drop_user_statement.bnf @@ -0,0 +1 @@ +drop_user_statement ::= DROP USER [ IF EXISTS ] role_name diff --git a/doc/modules/cassandra/examples/BNF/function.bnf b/doc/modules/cassandra/examples/BNF/function.bnf new file mode 100644 index 000000000000..7e054306ed76 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/function.bnf @@ -0,0 +1 @@ +function_name ::= [ keyspace_name'.' ] name diff --git a/doc/modules/cassandra/examples/BNF/grant_permission_statement.bnf b/doc/modules/cassandra/examples/BNF/grant_permission_statement.bnf new file mode 100644 index 000000000000..40f1df327034 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/grant_permission_statement.bnf @@ -0,0 +1,12 @@ +grant_permission_statement ::= GRANT permissions ON resource TO role_name +permissions ::= ALL [ PERMISSIONS ] | permission [ PERMISSION ] +permission ::= CREATE | ALTER | DROP | SELECT | MODIFY | AUTHORIZE | DESCRIBE | EXECUTE +resource ::= ALL KEYSPACES + | KEYSPACE keyspace_name + | [ TABLE ] table_name + | ALL ROLES + | ROLE role_name + | ALL FUNCTIONS [ IN KEYSPACE keyspace_name ] + | FUNCTION function_name '(' [ cql_type( ',' cql_type )* ] ')' + | ALL MBEANS + | ( MBEAN | MBEANS ) string diff --git a/doc/modules/cassandra/examples/BNF/grant_role_statement.bnf b/doc/modules/cassandra/examples/BNF/grant_role_statement.bnf new file mode 100644 index 000000000000..d965cc2658bd --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/grant_role_statement.bnf @@ -0,0 +1 @@ +grant_role_statement ::= GRANT role_name TO role_name diff --git a/doc/modules/cassandra/examples/BNF/identifier.bnf b/doc/modules/cassandra/examples/BNF/identifier.bnf new file mode 100644 index 000000000000..7bc34314f933 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/identifier.bnf @@ -0,0 +1,3 @@ +identifier::= unquoted_identifier | quoted_identifier +unquoted_identifier::= re('[a-zA-Z][link:[a-zA-Z0-9]]*') +quoted_identifier::= '"' (any character where " can appear if doubled)+ '"' diff --git a/doc/modules/cassandra/examples/BNF/index.bnf b/doc/modules/cassandra/examples/BNF/index.bnf new file mode 100644 index 000000000000..70835016590e --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/index.bnf @@ -0,0 +1 @@ +index::= [keyspace_name '.' ] index_name \ No newline at end of file diff --git a/doc/modules/cassandra/examples/BNF/index_name.bnf b/doc/modules/cassandra/examples/BNF/index_name.bnf new file mode 100644 index 000000000000..c322755839a0 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/index_name.bnf @@ -0,0 +1 @@ +index_name::= re('[a-zA-Z_0-9]+') diff --git a/doc/modules/cassandra/examples/BNF/insert_statement.bnf b/doc/modules/cassandra/examples/BNF/insert_statement.bnf new file mode 100644 index 000000000000..ed80c3ed05b7 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/insert_statement.bnf @@ -0,0 +1,6 @@ +insert_statement::= INSERT INTO table_name ( names_values | json_clause ) + [ IF NOT EXISTS ] + [ USING update_parameter ( AND update_parameter )* ] +names_values::= names VALUES tuple_literal +json_clause::= JSON string [ DEFAULT ( NULL | UNSET ) ] +names::= '(' column_name ( ',' column_name )* ')' diff --git a/doc/modules/cassandra/examples/BNF/ks_table.bnf b/doc/modules/cassandra/examples/BNF/ks_table.bnf new file mode 100644 index 000000000000..20ee6dababcb --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/ks_table.bnf @@ -0,0 +1,5 @@ +keyspace_name::= name +table_name::= [keyspace_name '.' ] name +name::= unquoted_name | quoted_name +unquoted_name::= re('[a-zA-Z_0-9]\{1, 48}') +quoted_name::= '"' unquoted_name '"' diff --git a/doc/modules/cassandra/examples/BNF/list_permissions_statement.bnf b/doc/modules/cassandra/examples/BNF/list_permissions_statement.bnf new file mode 100644 index 000000000000..a11e2cc01d74 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/list_permissions_statement.bnf @@ -0,0 +1 @@ +list_permissions_statement ::= LIST permissions [ ON resource] [ OF role_name[ NORECURSIVE ] ] diff --git a/doc/modules/cassandra/examples/BNF/list_roles_statement.bnf b/doc/modules/cassandra/examples/BNF/list_roles_statement.bnf new file mode 100644 index 000000000000..bbe3d9b500ea --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/list_roles_statement.bnf @@ -0,0 +1 @@ +list_roles_statement ::= LIST ROLES [ OF role_name] [ NORECURSIVE ] diff --git a/doc/modules/cassandra/examples/BNF/list_users_statement.bnf b/doc/modules/cassandra/examples/BNF/list_users_statement.bnf new file mode 100644 index 000000000000..5750de6c0ca9 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/list_users_statement.bnf @@ -0,0 +1 @@ +list_users_statement::= LIST USERS diff --git a/doc/modules/cassandra/examples/BNF/materialized_view.bnf b/doc/modules/cassandra/examples/BNF/materialized_view.bnf new file mode 100644 index 000000000000..48543a31561c --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/materialized_view.bnf @@ -0,0 +1 @@ +materialized_view::= [keyspace_name '.' ] view_name \ No newline at end of file diff --git a/doc/modules/cassandra/examples/BNF/native_type.bnf b/doc/modules/cassandra/examples/BNF/native_type.bnf new file mode 100644 index 000000000000..c4e9c268db3e --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/native_type.bnf @@ -0,0 +1,4 @@ +native_type::= ASCII | BIGINT | BLOB | BOOLEAN | COUNTER | DATE +| DECIMAL | DOUBLE | DURATION | FLOAT | INET | INT | +SMALLINT | TEXT | TIME | TIMESTAMP | TIMEUUID | TINYINT | +UUID | VARCHAR | VARINT diff --git a/doc/modules/cassandra/examples/BNF/options.bnf b/doc/modules/cassandra/examples/BNF/options.bnf new file mode 100644 index 000000000000..9887165a263a --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/options.bnf @@ -0,0 +1,4 @@ +options::= option ( AND option )* +option::= identifier '=' ( identifier + | constant + | map_literal ) diff --git a/doc/modules/cassandra/examples/BNF/revoke_permission_statement.bnf b/doc/modules/cassandra/examples/BNF/revoke_permission_statement.bnf new file mode 100644 index 000000000000..fd061f9394d7 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/revoke_permission_statement.bnf @@ -0,0 +1 @@ +revoke_permission_statement ::= REVOKE permissions ON resource FROM role_name diff --git a/doc/modules/cassandra/examples/BNF/revoke_role_statement.bnf b/doc/modules/cassandra/examples/BNF/revoke_role_statement.bnf new file mode 100644 index 000000000000..c344eb006f20 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/revoke_role_statement.bnf @@ -0,0 +1 @@ +revoke_role_statement ::= REVOKE role_name FROM role_name diff --git a/doc/modules/cassandra/examples/BNF/role_name.bnf b/doc/modules/cassandra/examples/BNF/role_name.bnf new file mode 100644 index 000000000000..103f84bd26fe --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/role_name.bnf @@ -0,0 +1 @@ +role_name ::= identifier | string diff --git a/doc/modules/cassandra/examples/BNF/select_statement.bnf b/doc/modules/cassandra/examples/BNF/select_statement.bnf new file mode 100644 index 000000000000..f53da41da57c --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/select_statement.bnf @@ -0,0 +1,21 @@ +select_statement::= SELECT [ JSON | DISTINCT ] ( select_clause | '*' ) + FROM `table_name` + [ WHERE `where_clause` ] + [ GROUP BY `group_by_clause` ] + [ ORDER BY `ordering_clause` ] + [ PER PARTITION LIMIT (`integer` | `bind_marker`) ] + [ LIMIT (`integer` | `bind_marker`) ] + [ ALLOW FILTERING ] +select_clause::= `selector` [ AS `identifier` ] ( ',' `selector` [ AS `identifier` ] ) +selector::== `column_name` + | `term` + | CAST '(' `selector` AS `cql_type` ')' + | `function_name` '(' [ `selector` ( ',' `selector` )_ ] ')' + | COUNT '(' '_' ')' +where_clause::= `relation` ( AND `relation` )* +relation::= column_name operator term + '(' column_name ( ',' column_name )* ')' operator tuple_literal + TOKEN '(' column_name# ( ',' column_name )* ')' operator term +operator::= '=' | '<' | '>' | '<=' | '>=' | '!=' | IN | CONTAINS | CONTAINS KEY +group_by_clause::= column_name ( ',' column_name )* +ordering_clause::= column_name [ ASC | DESC ] ( ',' column_name [ ASC | DESC ] )* diff --git a/doc/modules/cassandra/examples/BNF/term.bnf b/doc/modules/cassandra/examples/BNF/term.bnf new file mode 100644 index 000000000000..504c4c40d8e1 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/term.bnf @@ -0,0 +1,6 @@ +term::= constant | literal | function_call | arithmetic_operation | type_hint | bind_marker +literal::= collection_literal | udt_literal | tuple_literal +function_call::= identifier '(' [ term (',' term)* ] ')' +arithmetic_operation::= '-' term | term ('+' | '-' | '*' | '/' | '%') term +type_hint::= '(' cql_type ')' term +bind_marker::= '?' | ':' identifier diff --git a/doc/modules/cassandra/examples/BNF/trigger_name.bnf b/doc/modules/cassandra/examples/BNF/trigger_name.bnf new file mode 100644 index 000000000000..18a4a7e22238 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/trigger_name.bnf @@ -0,0 +1 @@ +trigger_name ::= identifier diff --git a/doc/modules/cassandra/examples/BNF/truncate_table.bnf b/doc/modules/cassandra/examples/BNF/truncate_table.bnf new file mode 100644 index 000000000000..9c7d3012a2ab --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/truncate_table.bnf @@ -0,0 +1 @@ +truncate_statement::= TRUNCATE [ TABLE ] table_name diff --git a/doc/modules/cassandra/examples/BNF/tuple.bnf b/doc/modules/cassandra/examples/BNF/tuple.bnf new file mode 100644 index 000000000000..f339d5758460 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/tuple.bnf @@ -0,0 +1,2 @@ +tuple_type::= TUPLE '<' cql_type( ',' cql_type)* '>' +tuple_literal::= '(' term( ',' term )* ')' diff --git a/doc/modules/cassandra/examples/BNF/udt.bnf b/doc/modules/cassandra/examples/BNF/udt.bnf new file mode 100644 index 000000000000..c06a5f638b79 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/udt.bnf @@ -0,0 +1,2 @@ +user_defined_type::= udt_name +udt_name::= [ keyspace_name '.' ] identifier diff --git a/doc/modules/cassandra/examples/BNF/udt_literal.bnf b/doc/modules/cassandra/examples/BNF/udt_literal.bnf new file mode 100644 index 000000000000..8c996e5ed143 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/udt_literal.bnf @@ -0,0 +1 @@ +udt_literal::= '{' identifier ':' term ( ',' identifier ':' term)* '}' diff --git a/doc/modules/cassandra/examples/BNF/update_statement.bnf b/doc/modules/cassandra/examples/BNF/update_statement.bnf new file mode 100644 index 000000000000..1a9bdb48544c --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/update_statement.bnf @@ -0,0 +1,13 @@ +update_statement ::= UPDATE table_name + [ USING update_parameter ( AND update_parameter )* ] + SET assignment( ',' assignment )* + WHERE where_clause + [ IF ( EXISTS | condition ( AND condition)*) ] +update_parameter ::= ( TIMESTAMP | TTL ) ( integer | bind_marker ) +assignment: simple_selection'=' term + `| column_name'=' column_name ( '+' | '-' ) term + | column_name'=' list_literal'+' column_name +simple_selection ::= column_name + | column_name '[' term']' + | column_name'.' field_name +condition ::= `simple_selection operator term diff --git a/doc/modules/cassandra/examples/BNF/use_ks.bnf b/doc/modules/cassandra/examples/BNF/use_ks.bnf new file mode 100644 index 000000000000..0347e52d5a2a --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/use_ks.bnf @@ -0,0 +1 @@ +use_statement::= USE keyspace_name diff --git a/doc/modules/cassandra/examples/BNF/view_name.bnf b/doc/modules/cassandra/examples/BNF/view_name.bnf new file mode 100644 index 000000000000..69253677dd84 --- /dev/null +++ b/doc/modules/cassandra/examples/BNF/view_name.bnf @@ -0,0 +1 @@ +view_name::= re('[a-zA-Z_0-9]+') diff --git a/doc/modules/cassandra/examples/CQL/allow_filtering.cql b/doc/modules/cassandra/examples/CQL/allow_filtering.cql new file mode 100644 index 000000000000..c3bf3c69e16b --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/allow_filtering.cql @@ -0,0 +1,9 @@ +CREATE TABLE users ( + username text PRIMARY KEY, + firstname text, + lastname text, + birth_year int, + country text +); + +CREATE INDEX ON users(birth_year); diff --git a/doc/modules/cassandra/examples/CQL/alter_ks.cql b/doc/modules/cassandra/examples/CQL/alter_ks.cql new file mode 100644 index 000000000000..319ed241b1d8 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/alter_ks.cql @@ -0,0 +1,2 @@ +ALTER KEYSPACE excelsior + WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 4}; diff --git a/doc/modules/cassandra/examples/CQL/alter_role.cql b/doc/modules/cassandra/examples/CQL/alter_role.cql new file mode 100644 index 000000000000..c5f7d3d3991a --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/alter_role.cql @@ -0,0 +1 @@ +ALTER ROLE bob WITH PASSWORD = 'PASSWORD_B' AND SUPERUSER = false; diff --git a/doc/modules/cassandra/examples/CQL/alter_table_add_column.cql b/doc/modules/cassandra/examples/CQL/alter_table_add_column.cql new file mode 100644 index 000000000000..e7703ed6ec57 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/alter_table_add_column.cql @@ -0,0 +1 @@ +ALTER TABLE addamsFamily ADD gravesite varchar; diff --git a/doc/modules/cassandra/examples/CQL/alter_table_spec_retry.cql b/doc/modules/cassandra/examples/CQL/alter_table_spec_retry.cql new file mode 100644 index 000000000000..bb9aa618402a --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/alter_table_spec_retry.cql @@ -0,0 +1 @@ +ALTER TABLE users WITH speculative_retry = '10ms'; diff --git a/doc/modules/cassandra/examples/CQL/alter_table_spec_retry_percent.cql b/doc/modules/cassandra/examples/CQL/alter_table_spec_retry_percent.cql new file mode 100644 index 000000000000..a5351c68feb6 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/alter_table_spec_retry_percent.cql @@ -0,0 +1 @@ +ALTER TABLE users WITH speculative_retry = '99PERCENTILE'; diff --git a/doc/modules/cassandra/examples/CQL/alter_table_with_comment.cql b/doc/modules/cassandra/examples/CQL/alter_table_with_comment.cql new file mode 100644 index 000000000000..9b82d7243f29 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/alter_table_with_comment.cql @@ -0,0 +1,2 @@ +ALTER TABLE addamsFamily + WITH comment = 'A most excellent and useful table'; diff --git a/doc/modules/cassandra/examples/CQL/alter_user.cql b/doc/modules/cassandra/examples/CQL/alter_user.cql new file mode 100644 index 000000000000..97de7ba1dd79 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/alter_user.cql @@ -0,0 +1,2 @@ +ALTER USER alice WITH PASSWORD 'PASSWORD_A'; +ALTER USER bob SUPERUSER; diff --git a/doc/modules/cassandra/examples/CQL/as.cql b/doc/modules/cassandra/examples/CQL/as.cql new file mode 100644 index 000000000000..a8b9f035689e --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/as.cql @@ -0,0 +1,13 @@ +// Without alias +SELECT intAsBlob(4) FROM t; + +// intAsBlob(4) +// -------------- +// 0x00000004 + +// With alias +SELECT intAsBlob(4) AS four FROM t; + +// four +// ------------ +// 0x00000004 diff --git a/doc/modules/cassandra/examples/CQL/autoexpand_exclude_dc.cql b/doc/modules/cassandra/examples/CQL/autoexpand_exclude_dc.cql new file mode 100644 index 000000000000..c320c52fc158 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/autoexpand_exclude_dc.cql @@ -0,0 +1,4 @@ +CREATE KEYSPACE excalibur + WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor' : 3, 'DC2': 0}; + +DESCRIBE KEYSPACE excalibur; diff --git a/doc/modules/cassandra/examples/CQL/autoexpand_ks.cql b/doc/modules/cassandra/examples/CQL/autoexpand_ks.cql new file mode 100644 index 000000000000..d5bef55acad0 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/autoexpand_ks.cql @@ -0,0 +1,4 @@ +CREATE KEYSPACE excalibur + WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor' : 3}; + +DESCRIBE KEYSPACE excalibur; diff --git a/doc/modules/cassandra/examples/CQL/autoexpand_ks_override.cql b/doc/modules/cassandra/examples/CQL/autoexpand_ks_override.cql new file mode 100644 index 000000000000..d6800fbe0512 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/autoexpand_ks_override.cql @@ -0,0 +1,4 @@ +CREATE KEYSPACE excalibur + WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor' : 3, 'DC2': 2}; + +DESCRIBE KEYSPACE excalibur; diff --git a/doc/modules/cassandra/examples/CQL/avg.cql b/doc/modules/cassandra/examples/CQL/avg.cql new file mode 100644 index 000000000000..2882327520e9 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/avg.cql @@ -0,0 +1 @@ +SELECT AVG (players) FROM plays; diff --git a/doc/modules/cassandra/examples/CQL/batch_statement.cql b/doc/modules/cassandra/examples/CQL/batch_statement.cql new file mode 100644 index 000000000000..e9148e82410e --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/batch_statement.cql @@ -0,0 +1,6 @@ +BEGIN BATCH + INSERT INTO users (userid, password, name) VALUES ('user2', 'ch@ngem3b', 'second user'); + UPDATE users SET password = 'ps22dhds' WHERE userid = 'user3'; + INSERT INTO users (userid, password) VALUES ('user4', 'ch@ngem3c'); + DELETE name FROM users WHERE userid = 'user1'; +APPLY BATCH; diff --git a/doc/modules/cassandra/examples/CQL/caching_option.cql b/doc/modules/cassandra/examples/CQL/caching_option.cql new file mode 100644 index 000000000000..b48b171ec32b --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/caching_option.cql @@ -0,0 +1,6 @@ +CREATE TABLE simple ( +id int, +key text, +value text, +PRIMARY KEY (key, value) +) WITH caching = {'keys': 'ALL', 'rows_per_partition': 10}; diff --git a/doc/modules/cassandra/examples/CQL/chunk_length.cql b/doc/modules/cassandra/examples/CQL/chunk_length.cql new file mode 100644 index 000000000000..b3504fe04098 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/chunk_length.cql @@ -0,0 +1,6 @@ +CREATE TABLE simple ( + id int, + key text, + value text, + PRIMARY KEY (key, value) +) WITH compression = {'class': 'LZ4Compressor', 'chunk_length_in_kb': 4}; diff --git a/doc/modules/cassandra/examples/CQL/count.cql b/doc/modules/cassandra/examples/CQL/count.cql new file mode 100644 index 000000000000..1993c0e4a676 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/count.cql @@ -0,0 +1,2 @@ +SELECT COUNT (*) FROM plays; +SELECT COUNT (1) FROM plays; diff --git a/doc/modules/cassandra/examples/CQL/count_nonnull.cql b/doc/modules/cassandra/examples/CQL/count_nonnull.cql new file mode 100644 index 000000000000..6543b996326c --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/count_nonnull.cql @@ -0,0 +1 @@ +SELECT COUNT (scores) FROM plays; diff --git a/doc/modules/cassandra/examples/CQL/create_function.cql b/doc/modules/cassandra/examples/CQL/create_function.cql new file mode 100644 index 000000000000..e7d5823a0b3e --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/create_function.cql @@ -0,0 +1,15 @@ +CREATE OR REPLACE FUNCTION somefunction(somearg int, anotherarg text, complexarg frozen, listarg list) + RETURNS NULL ON NULL INPUT + RETURNS text + LANGUAGE java + AS $$ + // some Java code + $$; + +CREATE FUNCTION IF NOT EXISTS akeyspace.fname(someArg int) + CALLED ON NULL INPUT + RETURNS text + LANGUAGE java + AS $$ + // some Java code + $$; diff --git a/doc/modules/cassandra/examples/CQL/create_index.cql b/doc/modules/cassandra/examples/CQL/create_index.cql new file mode 100644 index 000000000000..f84452aa1d59 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/create_index.cql @@ -0,0 +1,8 @@ +CREATE INDEX userIndex ON NerdMovies (user); +CREATE INDEX ON Mutants (abilityId); +CREATE INDEX ON users (keys(favs)); +CREATE CUSTOM INDEX ON users (email) + USING 'path.to.the.IndexClass'; +CREATE CUSTOM INDEX ON users (email) + USING 'path.to.the.IndexClass' + WITH OPTIONS = {'storage': '/mnt/ssd/indexes/'}; diff --git a/doc/modules/cassandra/examples/CQL/create_ks.cql b/doc/modules/cassandra/examples/CQL/create_ks.cql new file mode 100644 index 000000000000..e81d7f7bf36c --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/create_ks.cql @@ -0,0 +1,6 @@ +CREATE KEYSPACE excelsior + WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 3}; + +CREATE KEYSPACE excalibur + WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1' : 1, 'DC2' : 3} + AND durable_writes = false; diff --git a/doc/modules/cassandra/examples/CQL/create_ks2_backup.cql b/doc/modules/cassandra/examples/CQL/create_ks2_backup.cql new file mode 100644 index 000000000000..52f9308f9759 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/create_ks2_backup.cql @@ -0,0 +1,2 @@ +CREATE KEYSPACE catalogkeyspace + WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 3}; diff --git a/doc/modules/cassandra/examples/CQL/create_ks_backup.cql b/doc/modules/cassandra/examples/CQL/create_ks_backup.cql new file mode 100644 index 000000000000..593490474a5a --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/create_ks_backup.cql @@ -0,0 +1,2 @@ +CREATE KEYSPACE cqlkeyspace + WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 3}; diff --git a/doc/modules/cassandra/examples/CQL/create_ks_trans_repl.cql b/doc/modules/cassandra/examples/CQL/create_ks_trans_repl.cql new file mode 100644 index 000000000000..afff433eec8f --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/create_ks_trans_repl.cql @@ -0,0 +1,2 @@ +CREATE KEYSPACE some_keyspace + WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1' : '3/1'', 'DC2' : '5/2'}; diff --git a/doc/modules/cassandra/examples/CQL/create_mv_statement.cql b/doc/modules/cassandra/examples/CQL/create_mv_statement.cql new file mode 100644 index 000000000000..0792c3e027d8 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/create_mv_statement.cql @@ -0,0 +1,5 @@ +CREATE MATERIALIZED VIEW monkeySpecies_by_population AS + SELECT * FROM monkeySpecies + WHERE population IS NOT NULL AND species IS NOT NULL + PRIMARY KEY (population, species) + WITH comment='Allow query by population instead of species'; diff --git a/doc/modules/cassandra/examples/CQL/create_role.cql b/doc/modules/cassandra/examples/CQL/create_role.cql new file mode 100644 index 000000000000..c8d0d640de52 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/create_role.cql @@ -0,0 +1,6 @@ +CREATE ROLE new_role; +CREATE ROLE alice WITH PASSWORD = 'password_a' AND LOGIN = true; +CREATE ROLE bob WITH PASSWORD = 'password_b' AND LOGIN = true AND SUPERUSER = true; +CREATE ROLE carlos WITH OPTIONS = { 'custom_option1' : 'option1_value', 'custom_option2' : 99 }; +CREATE ROLE alice WITH PASSWORD = 'password_a' AND LOGIN = true AND ACCESS TO DATACENTERS {'DC1', 'DC3'}; +CREATE ROLE alice WITH PASSWORD = 'password_a' AND LOGIN = true AND ACCESS TO ALL DATACENTERS; diff --git a/doc/modules/cassandra/examples/CQL/create_role_ifnotexists.cql b/doc/modules/cassandra/examples/CQL/create_role_ifnotexists.cql new file mode 100644 index 000000000000..0b9600f9c4ca --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/create_role_ifnotexists.cql @@ -0,0 +1,2 @@ +CREATE ROLE other_role; +CREATE ROLE IF NOT EXISTS other_role; diff --git a/doc/modules/cassandra/examples/CQL/create_static_column.cql b/doc/modules/cassandra/examples/CQL/create_static_column.cql new file mode 100644 index 000000000000..95e8ff21ec25 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/create_static_column.cql @@ -0,0 +1,7 @@ +CREATE TABLE t ( + pk int, + t int, + v text, + s text static, + PRIMARY KEY (pk, t) +); diff --git a/doc/modules/cassandra/examples/CQL/create_table.cql b/doc/modules/cassandra/examples/CQL/create_table.cql new file mode 100644 index 000000000000..57b557dace86 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/create_table.cql @@ -0,0 +1,23 @@ +CREATE TABLE monkey_species ( + species text PRIMARY KEY, + common_name text, + population varint, + average_size int +) WITH comment='Important biological records'; + +CREATE TABLE timeline ( + userid uuid, + posted_month int, + posted_time uuid, + body text, + posted_by text, + PRIMARY KEY (userid, posted_month, posted_time) +) WITH compaction = { 'class' : 'LeveledCompactionStrategy' }; + +CREATE TABLE loads ( + machine inet, + cpu int, + mtime timeuuid, + load float, + PRIMARY KEY ((machine, cpu), mtime) +) WITH CLUSTERING ORDER BY (mtime DESC); diff --git a/doc/modules/cassandra/examples/CQL/create_table2_backup.cql b/doc/modules/cassandra/examples/CQL/create_table2_backup.cql new file mode 100644 index 000000000000..f3393008f520 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/create_table2_backup.cql @@ -0,0 +1,14 @@ +USE catalogkeyspace; +CREATE TABLE journal ( + id int, + name text, + publisher text, + PRIMARY KEY (id) +); + +CREATE TABLE magazine ( + id int, + name text, + publisher text, + PRIMARY KEY (id) +); diff --git a/doc/modules/cassandra/examples/CQL/create_table_backup.cql b/doc/modules/cassandra/examples/CQL/create_table_backup.cql new file mode 100644 index 000000000000..c80b99969d37 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/create_table_backup.cql @@ -0,0 +1,13 @@ +USE cqlkeyspace; +CREATE TABLE t ( + id int, + k int, + v text, + PRIMARY KEY (id) +); +CREATE TABLE t2 ( + id int, + k int, + v text, + PRIMARY KEY (id) +); diff --git a/doc/modules/cassandra/examples/CQL/create_table_clustercolumn.cql b/doc/modules/cassandra/examples/CQL/create_table_clustercolumn.cql new file mode 100644 index 000000000000..f7de266b1b8a --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/create_table_clustercolumn.cql @@ -0,0 +1,7 @@ +CREATE TABLE t2 ( + a int, + b int, + c int, + d int, + PRIMARY KEY (a, b, c) +); diff --git a/doc/modules/cassandra/examples/CQL/create_table_compound_pk.cql b/doc/modules/cassandra/examples/CQL/create_table_compound_pk.cql new file mode 100644 index 000000000000..eb199c73146c --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/create_table_compound_pk.cql @@ -0,0 +1,7 @@ +CREATE TABLE t ( + a int, + b int, + c int, + d int, + PRIMARY KEY ((a, b), c, d) +); diff --git a/doc/modules/cassandra/examples/CQL/create_table_simple.cql b/doc/modules/cassandra/examples/CQL/create_table_simple.cql new file mode 100644 index 000000000000..0ebe7475bc42 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/create_table_simple.cql @@ -0,0 +1,4 @@ +CREATE TABLE users ( + userid text PRIMARY KEY, + username text, +); diff --git a/doc/modules/cassandra/examples/CQL/create_table_single_pk.cql b/doc/modules/cassandra/examples/CQL/create_table_single_pk.cql new file mode 100644 index 000000000000..ce6fff8d720e --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/create_table_single_pk.cql @@ -0,0 +1 @@ +CREATE TABLE t (k text PRIMARY KEY); diff --git a/doc/modules/cassandra/examples/CQL/create_trigger.cql b/doc/modules/cassandra/examples/CQL/create_trigger.cql new file mode 100644 index 000000000000..9bbf2f240577 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/create_trigger.cql @@ -0,0 +1 @@ +CREATE TRIGGER myTrigger ON myTable USING 'org.apache.cassandra.triggers.InvertedIndex'; diff --git a/doc/modules/cassandra/examples/CQL/create_user.cql b/doc/modules/cassandra/examples/CQL/create_user.cql new file mode 100644 index 000000000000..b6531ebbc482 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/create_user.cql @@ -0,0 +1,2 @@ +CREATE USER alice WITH PASSWORD 'password_a' SUPERUSER; +CREATE USER bob WITH PASSWORD 'password_b' NOSUPERUSER; diff --git a/doc/modules/cassandra/examples/CQL/create_user_role.cql b/doc/modules/cassandra/examples/CQL/create_user_role.cql new file mode 100644 index 000000000000..810f76ca9c39 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/create_user_role.cql @@ -0,0 +1,14 @@ +CREATE USER alice WITH PASSWORD 'password_a' SUPERUSER; +CREATE ROLE alice WITH PASSWORD = 'password_a' AND LOGIN = true AND SUPERUSER = true; + +CREATE USER IF NOT EXISTS alice WITH PASSWORD 'password_a' SUPERUSER; +CREATE ROLE IF NOT EXISTS alice WITH PASSWORD = 'password_a' AND LOGIN = true AND SUPERUSER = true; + +CREATE USER alice WITH PASSWORD 'password_a' NOSUPERUSER; +CREATE ROLE alice WITH PASSWORD = 'password_a' AND LOGIN = true AND SUPERUSER = false; + +CREATE USER alice WITH PASSWORD 'password_a' NOSUPERUSER; +CREATE ROLE alice WITH PASSWORD = 'password_a' AND LOGIN = true; + +CREATE USER alice WITH PASSWORD 'password_a'; +CREATE ROLE alice WITH PASSWORD = 'password_a' AND LOGIN = true; diff --git a/doc/modules/cassandra/examples/CQL/currentdate.cql b/doc/modules/cassandra/examples/CQL/currentdate.cql new file mode 100644 index 000000000000..0bed1b2b9e8d --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/currentdate.cql @@ -0,0 +1 @@ +SELECT * FROM myTable WHERE date >= currentDate() - 2d; diff --git a/doc/modules/cassandra/examples/CQL/datetime_arithmetic.cql b/doc/modules/cassandra/examples/CQL/datetime_arithmetic.cql new file mode 100644 index 000000000000..310bf3bab69d --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/datetime_arithmetic.cql @@ -0,0 +1 @@ +SELECT * FROM myTable WHERE t = '2017-01-01' - 2d; diff --git a/doc/modules/cassandra/examples/CQL/delete_all_elements_list.cql b/doc/modules/cassandra/examples/CQL/delete_all_elements_list.cql new file mode 100644 index 000000000000..3d026683b39c --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/delete_all_elements_list.cql @@ -0,0 +1 @@ +UPDATE plays SET scores = scores - [ 12, 21 ] WHERE id = '123-afde'; diff --git a/doc/modules/cassandra/examples/CQL/delete_element_list.cql b/doc/modules/cassandra/examples/CQL/delete_element_list.cql new file mode 100644 index 000000000000..26b3e58f00b0 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/delete_element_list.cql @@ -0,0 +1 @@ +DELETE scores[1] FROM plays WHERE id = '123-afde'; diff --git a/doc/modules/cassandra/examples/CQL/delete_map.cql b/doc/modules/cassandra/examples/CQL/delete_map.cql new file mode 100644 index 000000000000..e16b1340553c --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/delete_map.cql @@ -0,0 +1,2 @@ +DELETE favs['author'] FROM users WHERE id = 'jsmith'; +UPDATE users SET favs = favs - { 'movie', 'band'} WHERE id = 'jsmith'; diff --git a/doc/modules/cassandra/examples/CQL/delete_set.cql b/doc/modules/cassandra/examples/CQL/delete_set.cql new file mode 100644 index 000000000000..308da3ceace9 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/delete_set.cql @@ -0,0 +1 @@ +UPDATE images SET tags = tags - { 'cat' } WHERE name = 'cat.jpg'; diff --git a/doc/modules/cassandra/examples/CQL/delete_statement.cql b/doc/modules/cassandra/examples/CQL/delete_statement.cql new file mode 100644 index 000000000000..b574e7167d6e --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/delete_statement.cql @@ -0,0 +1,5 @@ +DELETE FROM NerdMovies USING TIMESTAMP 1240003134 + WHERE movie = 'Serenity'; + +DELETE phone FROM Users + WHERE userid IN (C73DE1D3-AF08-40F3-B124-3FF3E5109F22, B70DE1D0-9908-4AE3-BE34-5573E5B09F14); diff --git a/doc/modules/cassandra/examples/CQL/drop_aggregate.cql b/doc/modules/cassandra/examples/CQL/drop_aggregate.cql new file mode 100644 index 000000000000..f05b69ae8b13 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/drop_aggregate.cql @@ -0,0 +1,4 @@ +DROP AGGREGATE myAggregate; +DROP AGGREGATE myKeyspace.anAggregate; +DROP AGGREGATE someAggregate ( int ); +DROP AGGREGATE someAggregate ( text ); diff --git a/doc/modules/cassandra/examples/CQL/drop_function.cql b/doc/modules/cassandra/examples/CQL/drop_function.cql new file mode 100644 index 000000000000..6d444c170661 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/drop_function.cql @@ -0,0 +1,4 @@ +DROP FUNCTION myfunction; +DROP FUNCTION mykeyspace.afunction; +DROP FUNCTION afunction ( int ); +DROP FUNCTION afunction ( text ); diff --git a/doc/modules/cassandra/examples/CQL/drop_ks.cql b/doc/modules/cassandra/examples/CQL/drop_ks.cql new file mode 100644 index 000000000000..46a920dbbd7c --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/drop_ks.cql @@ -0,0 +1 @@ +DROP KEYSPACE excelsior; diff --git a/doc/modules/cassandra/examples/CQL/drop_trigger.cql b/doc/modules/cassandra/examples/CQL/drop_trigger.cql new file mode 100644 index 000000000000..05a7a95c1172 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/drop_trigger.cql @@ -0,0 +1 @@ +DROP TRIGGER myTrigger ON myTable; diff --git a/doc/modules/cassandra/examples/CQL/function_dollarsign.cql b/doc/modules/cassandra/examples/CQL/function_dollarsign.cql new file mode 100644 index 000000000000..878d04449e63 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/function_dollarsign.cql @@ -0,0 +1,15 @@ +CREATE FUNCTION some_function ( arg int ) + RETURNS NULL ON NULL INPUT + RETURNS int + LANGUAGE java + AS $$ return arg; $$; + +SELECT some_function(column) FROM atable ...; +UPDATE atable SET col = some_function(?) ...; + +CREATE TYPE custom_type (txt text, i int); +CREATE FUNCTION fct_using_udt ( udtarg frozen ) + RETURNS NULL ON NULL INPUT + RETURNS text + LANGUAGE java + AS $$ return udtarg.getString("txt"); $$; diff --git a/doc/modules/cassandra/examples/CQL/function_overload.cql b/doc/modules/cassandra/examples/CQL/function_overload.cql new file mode 100644 index 000000000000..d70e8e9ff744 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/function_overload.cql @@ -0,0 +1,2 @@ +CREATE FUNCTION sample ( arg int ) ...; +CREATE FUNCTION sample ( arg text ) ...; diff --git a/doc/modules/cassandra/examples/CQL/function_udfcontext.cql b/doc/modules/cassandra/examples/CQL/function_udfcontext.cql new file mode 100644 index 000000000000..87f89fef6e60 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/function_udfcontext.cql @@ -0,0 +1,11 @@ +CREATE TYPE custom_type (txt text, i int); +CREATE FUNCTION fct\_using\_udt ( somearg int ) + RETURNS NULL ON NULL INPUT + RETURNS custom_type + LANGUAGE java + AS $$ + UDTValue udt = udfContext.newReturnUDTValue(); + udt.setString("txt", "some string"); + udt.setInt("i", 42); + return udt; + $$; diff --git a/doc/modules/cassandra/examples/CQL/grant_describe.cql b/doc/modules/cassandra/examples/CQL/grant_describe.cql new file mode 100644 index 000000000000..721814565091 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/grant_describe.cql @@ -0,0 +1 @@ +GRANT DESCRIBE ON ALL ROLES TO role_admin; diff --git a/doc/modules/cassandra/examples/CQL/grant_drop.cql b/doc/modules/cassandra/examples/CQL/grant_drop.cql new file mode 100644 index 000000000000..745369d4298d --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/grant_drop.cql @@ -0,0 +1 @@ +GRANT DROP ON keyspace1.table1 TO schema_owner; diff --git a/doc/modules/cassandra/examples/CQL/grant_execute.cql b/doc/modules/cassandra/examples/CQL/grant_execute.cql new file mode 100644 index 000000000000..96b34de99dcf --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/grant_execute.cql @@ -0,0 +1 @@ +GRANT EXECUTE ON FUNCTION keyspace1.user_function( int ) TO report_writer; diff --git a/doc/modules/cassandra/examples/CQL/grant_modify.cql b/doc/modules/cassandra/examples/CQL/grant_modify.cql new file mode 100644 index 000000000000..7f9a30b225dc --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/grant_modify.cql @@ -0,0 +1 @@ +GRANT MODIFY ON KEYSPACE keyspace1 TO data_writer; diff --git a/doc/modules/cassandra/examples/CQL/grant_perm.cql b/doc/modules/cassandra/examples/CQL/grant_perm.cql new file mode 100644 index 000000000000..1dc9a7b18dd5 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/grant_perm.cql @@ -0,0 +1 @@ +GRANT SELECT ON ALL KEYSPACES TO data_reader; diff --git a/doc/modules/cassandra/examples/CQL/grant_role.cql b/doc/modules/cassandra/examples/CQL/grant_role.cql new file mode 100644 index 000000000000..1adffb309288 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/grant_role.cql @@ -0,0 +1 @@ +GRANT report_writer TO alice; diff --git a/doc/modules/cassandra/examples/CQL/insert_data2_backup.cql b/doc/modules/cassandra/examples/CQL/insert_data2_backup.cql new file mode 100644 index 000000000000..35e20a3bd20a --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/insert_data2_backup.cql @@ -0,0 +1,5 @@ +INSERT INTO journal (id, name, publisher) VALUES (0, 'Apache Cassandra Magazine', 'Apache Cassandra'); +INSERT INTO journal (id, name, publisher) VALUES (1, 'Couchbase Magazine', 'Couchbase'); + +INSERT INTO magazine (id, name, publisher) VALUES (0, 'Apache Cassandra Magazine', 'Apache Cassandra'); +INSERT INTO magazine (id, name, publisher) VALUES (1, 'Couchbase Magazine', 'Couchbase'); diff --git a/doc/modules/cassandra/examples/CQL/insert_data_backup.cql b/doc/modules/cassandra/examples/CQL/insert_data_backup.cql new file mode 100644 index 000000000000..15eb37575f1d --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/insert_data_backup.cql @@ -0,0 +1,6 @@ +INSERT INTO t (id, k, v) VALUES (0, 0, 'val0'); +INSERT INTO t (id, k, v) VALUES (1, 1, 'val1'); + +INSERT INTO t2 (id, k, v) VALUES (0, 0, 'val0'); +INSERT INTO t2 (id, k, v) VALUES (1, 1, 'val1'); +INSERT INTO t2 (id, k, v) VALUES (2, 2, 'val2'); diff --git a/doc/modules/cassandra/examples/CQL/insert_duration.cql b/doc/modules/cassandra/examples/CQL/insert_duration.cql new file mode 100644 index 000000000000..b52801bbc2a2 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/insert_duration.cql @@ -0,0 +1,6 @@ +INSERT INTO RiderResults (rider, race, result) + VALUES ('Christopher Froome', 'Tour de France', 89h4m48s); +INSERT INTO RiderResults (rider, race, result) + VALUES ('BARDET Romain', 'Tour de France', PT89H8M53S); +INSERT INTO RiderResults (rider, race, result) + VALUES ('QUINTANA Nairo', 'Tour de France', P0000-00-00T89:09:09); diff --git a/doc/modules/cassandra/examples/CQL/insert_json.cql b/doc/modules/cassandra/examples/CQL/insert_json.cql new file mode 100644 index 000000000000..d3a5deca8b89 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/insert_json.cql @@ -0,0 +1 @@ +INSERT INTO mytable JSON '{ "\"myKey\"": 0, "value": 0}'; diff --git a/doc/modules/cassandra/examples/CQL/insert_statement.cql b/doc/modules/cassandra/examples/CQL/insert_statement.cql new file mode 100644 index 000000000000..0f7a9435df34 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/insert_statement.cql @@ -0,0 +1,5 @@ +INSERT INTO NerdMovies (movie, director, main_actor, year) + VALUES ('Serenity', 'Joss Whedon', 'Nathan Fillion', 2005) + USING TTL 86400; + +INSERT INTO NerdMovies JSON '{"movie": "Serenity", "director": "Joss Whedon", "year": 2005}'; diff --git a/doc/modules/cassandra/examples/CQL/insert_static_data.cql b/doc/modules/cassandra/examples/CQL/insert_static_data.cql new file mode 100644 index 000000000000..c6a588f95981 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/insert_static_data.cql @@ -0,0 +1,2 @@ +INSERT INTO t (pk, t, v, s) VALUES (0, 0, 'val0', 'static0'); +INSERT INTO t (pk, t, v, s) VALUES (0, 1, 'val1', 'static1'); diff --git a/doc/modules/cassandra/examples/CQL/insert_table_cc_addl.cql b/doc/modules/cassandra/examples/CQL/insert_table_cc_addl.cql new file mode 100644 index 000000000000..f574d539120b --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/insert_table_cc_addl.cql @@ -0,0 +1 @@ +INSERT INTO t3 (a,b,c,d) VALUES (0,0,0,9); diff --git a/doc/modules/cassandra/examples/CQL/insert_table_clustercolumn.cql b/doc/modules/cassandra/examples/CQL/insert_table_clustercolumn.cql new file mode 100644 index 000000000000..449f921d5ca9 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/insert_table_clustercolumn.cql @@ -0,0 +1,5 @@ +INSERT INTO t2 (a, b, c, d) VALUES (0,0,0,0); +INSERT INTO t2 (a, b, c, d) VALUES (0,0,1,1); +INSERT INTO t2 (a, b, c, d) VALUES (0,1,2,2); +INSERT INTO t2 (a, b, c, d) VALUES (0,1,3,3); +INSERT INTO t2 (a, b, c, d) VALUES (1,1,4,4); diff --git a/doc/modules/cassandra/examples/CQL/insert_table_clustercolumn2.cql b/doc/modules/cassandra/examples/CQL/insert_table_clustercolumn2.cql new file mode 100644 index 000000000000..a048c9f7fb0e --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/insert_table_clustercolumn2.cql @@ -0,0 +1,5 @@ +INSERT INTO t3 (a, b, c, d) VALUES (0,0,0,0); +INSERT INTO t3 (a, b, c, d) VALUES (0,0,1,1); +INSERT INTO t3 (a, b, c, d) VALUES (0,1,2,2); +INSERT INTO t3 (a, b, c, d) VALUES (0,1,3,3); +INSERT INTO t3 (a, b, c, d) VALUES (1,1,4,4); diff --git a/doc/modules/cassandra/examples/CQL/insert_table_compound_pk.cql b/doc/modules/cassandra/examples/CQL/insert_table_compound_pk.cql new file mode 100644 index 000000000000..3ce1953fe86a --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/insert_table_compound_pk.cql @@ -0,0 +1,5 @@ +INSERT INTO t (a, b, c, d) VALUES (0,0,0,0); +INSERT INTO t (a, b, c, d) VALUES (0,0,1,1); +INSERT INTO t (a, b, c, d) VALUES (0,1,2,2); +INSERT INTO t (a, b, c, d) VALUES (0,1,3,3); +INSERT INTO t (a, b, c, d) VALUES (1,1,4,4); diff --git a/doc/modules/cassandra/examples/CQL/insert_udt.cql b/doc/modules/cassandra/examples/CQL/insert_udt.cql new file mode 100644 index 000000000000..5c6f1766ef48 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/insert_udt.cql @@ -0,0 +1,17 @@ +INSERT INTO user (name, addresses) + VALUES ('z3 Pr3z1den7', { + 'home' : { + street: '1600 Pennsylvania Ave NW', + city: 'Washington', + zip: '20500', + phones: { 'cell' : { country_code: 1, number: '202 456-1111' }, + 'landline' : { country_code: 1, number: '...' } } + }, + 'work' : { + street: '1600 Pennsylvania Ave NW', + city: 'Washington', + zip: '20500', + phones: { 'fax' : { country_code: 1, number: '...' } } + } + } +); diff --git a/doc/modules/cassandra/examples/CQL/list.cql b/doc/modules/cassandra/examples/CQL/list.cql new file mode 100644 index 000000000000..4d1ef13f863b --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/list.cql @@ -0,0 +1,12 @@ +CREATE TABLE plays ( + id text PRIMARY KEY, + game text, + players int, + scores list // A list of integers +) + +INSERT INTO plays (id, game, players, scores) + VALUES ('123-afde', 'quake', 3, [17, 4, 2]); + +// Replace the existing list entirely +UPDATE plays SET scores = [ 3, 9, 4] WHERE id = '123-afde'; diff --git a/doc/modules/cassandra/examples/CQL/list_all_perm.cql b/doc/modules/cassandra/examples/CQL/list_all_perm.cql new file mode 100644 index 000000000000..efbcfc86e740 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/list_all_perm.cql @@ -0,0 +1 @@ +LIST ALL PERMISSIONS ON keyspace1.table1 OF bob; diff --git a/doc/modules/cassandra/examples/CQL/list_perm.cql b/doc/modules/cassandra/examples/CQL/list_perm.cql new file mode 100644 index 000000000000..094bf0933504 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/list_perm.cql @@ -0,0 +1 @@ +LIST ALL PERMISSIONS OF alice; diff --git a/doc/modules/cassandra/examples/CQL/list_roles.cql b/doc/modules/cassandra/examples/CQL/list_roles.cql new file mode 100644 index 000000000000..5c0f0631aca8 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/list_roles.cql @@ -0,0 +1 @@ +LIST ROLES; diff --git a/doc/modules/cassandra/examples/CQL/list_roles_nonrecursive.cql b/doc/modules/cassandra/examples/CQL/list_roles_nonrecursive.cql new file mode 100644 index 000000000000..eea62189445c --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/list_roles_nonrecursive.cql @@ -0,0 +1 @@ +LIST ROLES OF bob NORECURSIVE; diff --git a/doc/modules/cassandra/examples/CQL/list_roles_of.cql b/doc/modules/cassandra/examples/CQL/list_roles_of.cql new file mode 100644 index 000000000000..c338ca3452ec --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/list_roles_of.cql @@ -0,0 +1 @@ +LIST ROLES OF alice; diff --git a/doc/modules/cassandra/examples/CQL/list_select_perm.cql b/doc/modules/cassandra/examples/CQL/list_select_perm.cql new file mode 100644 index 000000000000..c085df47ce98 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/list_select_perm.cql @@ -0,0 +1 @@ +LIST SELECT PERMISSIONS OF carlos; diff --git a/doc/modules/cassandra/examples/CQL/map.cql b/doc/modules/cassandra/examples/CQL/map.cql new file mode 100644 index 000000000000..ca9ca5e2e4df --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/map.cql @@ -0,0 +1,11 @@ +CREATE TABLE users ( + id text PRIMARY KEY, + name text, + favs map // A map of text keys, and text values +); + +INSERT INTO users (id, name, favs) + VALUES ('jsmith', 'John Smith', { 'fruit' : 'Apple', 'band' : 'Beatles' }); + +// Replace the existing map entirely. +UPDATE users SET favs = { 'fruit' : 'Banana' } WHERE id = 'jsmith'; diff --git a/doc/modules/cassandra/examples/CQL/min_max.cql b/doc/modules/cassandra/examples/CQL/min_max.cql new file mode 100644 index 000000000000..3f31cc5bac4f --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/min_max.cql @@ -0,0 +1 @@ +SELECT MIN (players), MAX (players) FROM plays WHERE game = 'quake'; diff --git a/doc/modules/cassandra/examples/CQL/mv_table_def.cql b/doc/modules/cassandra/examples/CQL/mv_table_def.cql new file mode 100644 index 000000000000..106fe1181390 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/mv_table_def.cql @@ -0,0 +1,8 @@ +CREATE TABLE t ( + k int, + c1 int, + c2 int, + v1 int, + v2 int, + PRIMARY KEY (k, c1, c2) +); diff --git a/doc/modules/cassandra/examples/CQL/mv_table_error.cql b/doc/modules/cassandra/examples/CQL/mv_table_error.cql new file mode 100644 index 000000000000..e7560f92a16f --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/mv_table_error.cql @@ -0,0 +1,13 @@ +// Error: cannot include both v1 and v2 in the primary key as both are not in the base table primary key + +CREATE MATERIALIZED VIEW mv1 AS + SELECT * FROM t + WHERE k IS NOT NULL AND c1 IS NOT NULL AND c2 IS NOT NULL AND v1 IS NOT NULL + PRIMARY KEY (v1, v2, k, c1, c2); + +// Error: must include k in the primary as it's a base table primary key column + +CREATE MATERIALIZED VIEW mv1 AS + SELECT * FROM t + WHERE c1 IS NOT NULL AND c2 IS NOT NULL + PRIMARY KEY (c1, c2); diff --git a/doc/modules/cassandra/examples/CQL/mv_table_from_base.cql b/doc/modules/cassandra/examples/CQL/mv_table_from_base.cql new file mode 100644 index 000000000000..bd2f9f2d5e8e --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/mv_table_from_base.cql @@ -0,0 +1,9 @@ +CREATE MATERIALIZED VIEW mv1 AS + SELECT * FROM t + WHERE k IS NOT NULL AND c1 IS NOT NULL AND c2 IS NOT NULL + PRIMARY KEY (c1, k, c2); + +CREATE MATERIALIZED VIEW mv1 AS + SELECT * FROM t + WHERE k IS NOT NULL AND c1 IS NOT NULL AND c2 IS NOT NULL + PRIMARY KEY (v1, k, c1, c2); diff --git a/doc/modules/cassandra/examples/CQL/no_revoke.cql b/doc/modules/cassandra/examples/CQL/no_revoke.cql new file mode 100644 index 000000000000..b6a044cf2038 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/no_revoke.cql @@ -0,0 +1,5 @@ +* `system_schema.keyspaces` +* `system_schema.columns` +* `system_schema.tables` +* `system.local` +* `system.peers` diff --git a/doc/modules/cassandra/examples/CQL/qs_create_ks.cql b/doc/modules/cassandra/examples/CQL/qs_create_ks.cql new file mode 100644 index 000000000000..2dba1bd5d195 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/qs_create_ks.cql @@ -0,0 +1,2 @@ +# Create a keyspace +CREATE KEYSPACE IF NOT EXISTS store WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : '1' }; diff --git a/doc/modules/cassandra/examples/CQL/qs_create_table.cql b/doc/modules/cassandra/examples/CQL/qs_create_table.cql new file mode 100644 index 000000000000..daeef5f49dfb --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/qs_create_table.cql @@ -0,0 +1,6 @@ +# Create a table +CREATE TABLE IF NOT EXISTS store.shopping_cart ( + userid text PRIMARY KEY, + item_count int, + last_update_timestamp timestamp +); diff --git a/doc/modules/cassandra/examples/CQL/qs_insert_data.cql b/doc/modules/cassandra/examples/CQL/qs_insert_data.cql new file mode 100644 index 000000000000..130f90134697 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/qs_insert_data.cql @@ -0,0 +1,7 @@ +# Insert some data +INSERT INTO store.shopping_cart +(userid, item_count, last_update_timestamp) +VALUES ('9876', 2, toTimeStamp(toDate(now)))); +INSERT INTO store.shopping_cart +(userid, item_count, last_update_timestamp) +VALUES (1234, 5, toTimeStamp(toDate(now)))); diff --git a/doc/modules/cassandra/examples/CQL/qs_insert_data_again.cql b/doc/modules/cassandra/examples/CQL/qs_insert_data_again.cql new file mode 100644 index 000000000000..b95473f0fe6f --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/qs_insert_data_again.cql @@ -0,0 +1 @@ +INSERT (userid, item_count) VALUES (4567, 20) INTO store.shopping_cart; diff --git a/doc/modules/cassandra/examples/CQL/qs_select_data.cql b/doc/modules/cassandra/examples/CQL/qs_select_data.cql new file mode 100644 index 000000000000..e9e55db2093e --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/qs_select_data.cql @@ -0,0 +1 @@ +SELECT * FROM store.shopping_cart; diff --git a/doc/modules/cassandra/examples/CQL/query_allow_filtering.cql b/doc/modules/cassandra/examples/CQL/query_allow_filtering.cql new file mode 100644 index 000000000000..c4aaf394ec2e --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/query_allow_filtering.cql @@ -0,0 +1,5 @@ +// All users are returned +SELECT * FROM users; + +// All users with a particular birth year are returned +SELECT * FROM users WHERE birth_year = 1981; diff --git a/doc/modules/cassandra/examples/CQL/query_fail_allow_filtering.cql b/doc/modules/cassandra/examples/CQL/query_fail_allow_filtering.cql new file mode 100644 index 000000000000..2e6c63bffd24 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/query_fail_allow_filtering.cql @@ -0,0 +1 @@ +SELECT * FROM users WHERE birth_year = 1981 AND country = 'FR'; diff --git a/doc/modules/cassandra/examples/CQL/query_nofail_allow_filtering.cql b/doc/modules/cassandra/examples/CQL/query_nofail_allow_filtering.cql new file mode 100644 index 000000000000..88aed561954c --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/query_nofail_allow_filtering.cql @@ -0,0 +1 @@ +SELECT * FROM users WHERE birth_year = 1981 AND country = 'FR' ALLOW FILTERING; diff --git a/doc/modules/cassandra/examples/CQL/rename_udt_field.cql b/doc/modules/cassandra/examples/CQL/rename_udt_field.cql new file mode 100644 index 000000000000..7718788b3e83 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/rename_udt_field.cql @@ -0,0 +1 @@ +ALTER TYPE address RENAME zip TO zipcode; diff --git a/doc/modules/cassandra/examples/CQL/revoke_perm.cql b/doc/modules/cassandra/examples/CQL/revoke_perm.cql new file mode 100644 index 000000000000..d4ac1edb1408 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/revoke_perm.cql @@ -0,0 +1,5 @@ +REVOKE SELECT ON ALL KEYSPACES FROM data_reader; +REVOKE MODIFY ON KEYSPACE keyspace1 FROM data_writer; +REVOKE DROP ON keyspace1.table1 FROM schema_owner; +REVOKE EXECUTE ON FUNCTION keyspace1.user_function( int ) FROM report_writer; +REVOKE DESCRIBE ON ALL ROLES FROM role_admin; diff --git a/doc/modules/cassandra/examples/CQL/revoke_role.cql b/doc/modules/cassandra/examples/CQL/revoke_role.cql new file mode 100644 index 000000000000..acf506660179 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/revoke_role.cql @@ -0,0 +1 @@ +REVOKE report_writer FROM alice; diff --git a/doc/modules/cassandra/examples/CQL/role_error.cql b/doc/modules/cassandra/examples/CQL/role_error.cql new file mode 100644 index 000000000000..fa061a2ea9f7 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/role_error.cql @@ -0,0 +1,6 @@ +GRANT role_a TO role_b; +GRANT role_b TO role_a; + +GRANT role_a TO role_b; +GRANT role_b TO role_c; +GRANT role_c TO role_a; diff --git a/doc/modules/cassandra/examples/CQL/select_data2_backup.cql b/doc/modules/cassandra/examples/CQL/select_data2_backup.cql new file mode 100644 index 000000000000..7a409d75264f --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/select_data2_backup.cql @@ -0,0 +1,2 @@ +SELECT * FROM catalogkeyspace.journal; +SELECT * FROM catalogkeyspace.magazine; diff --git a/doc/modules/cassandra/examples/CQL/select_data_backup.cql b/doc/modules/cassandra/examples/CQL/select_data_backup.cql new file mode 100644 index 000000000000..4468467a5ce4 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/select_data_backup.cql @@ -0,0 +1,2 @@ +SELECT * FROM t; +SELECT * FROM t2; diff --git a/doc/modules/cassandra/examples/CQL/select_range.cql b/doc/modules/cassandra/examples/CQL/select_range.cql new file mode 100644 index 000000000000..fcf3bd583338 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/select_range.cql @@ -0,0 +1 @@ +SELECT * FROM t2 WHERE a = 0 AND b > 0 and b <= 3; diff --git a/doc/modules/cassandra/examples/CQL/select_statement.cql b/doc/modules/cassandra/examples/CQL/select_statement.cql new file mode 100644 index 000000000000..cee5a1916383 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/select_statement.cql @@ -0,0 +1,11 @@ +SELECT name, occupation FROM users WHERE userid IN (199, 200, 207); +SELECT JSON name, occupation FROM users WHERE userid = 199; +SELECT name AS user_name, occupation AS user_occupation FROM users; + +SELECT time, value +FROM events +WHERE event_type = 'myEvent' + AND time > '2011-02-03' + AND time <= '2012-01-01' + +SELECT COUNT (*) AS user_count FROM users; diff --git a/doc/modules/cassandra/examples/CQL/select_static_data.cql b/doc/modules/cassandra/examples/CQL/select_static_data.cql new file mode 100644 index 000000000000..8bca9375bfe4 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/select_static_data.cql @@ -0,0 +1 @@ +SELECT * FROM t; diff --git a/doc/modules/cassandra/examples/CQL/select_table_clustercolumn.cql b/doc/modules/cassandra/examples/CQL/select_table_clustercolumn.cql new file mode 100644 index 000000000000..60bb2cf95b07 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/select_table_clustercolumn.cql @@ -0,0 +1 @@ +SELECT * FROM t2; diff --git a/doc/modules/cassandra/examples/CQL/select_table_compound_pk.cql b/doc/modules/cassandra/examples/CQL/select_table_compound_pk.cql new file mode 100644 index 000000000000..8bca9375bfe4 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/select_table_compound_pk.cql @@ -0,0 +1 @@ +SELECT * FROM t; diff --git a/doc/modules/cassandra/examples/CQL/set.cql b/doc/modules/cassandra/examples/CQL/set.cql new file mode 100644 index 000000000000..607981b8be5e --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/set.cql @@ -0,0 +1,11 @@ +CREATE TABLE images ( + name text PRIMARY KEY, + owner text, + tags set // A set of text values +); + +INSERT INTO images (name, owner, tags) + VALUES ('cat.jpg', 'jsmith', { 'pet', 'cute' }); + +// Replace the existing set entirely +UPDATE images SET tags = { 'kitten', 'cat', 'lol' } WHERE name = 'cat.jpg'; diff --git a/doc/modules/cassandra/examples/CQL/spec_retry_values.cql b/doc/modules/cassandra/examples/CQL/spec_retry_values.cql new file mode 100644 index 000000000000..bcd8d26dfce5 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/spec_retry_values.cql @@ -0,0 +1,6 @@ +min(99percentile,50ms) +max(99p,50MS) +MAX(99P,50ms) +MIN(99.9PERCENTILE,50ms) +max(90percentile,100MS) +MAX(100.0PERCENTILE,60ms) diff --git a/doc/modules/cassandra/examples/CQL/sum.cql b/doc/modules/cassandra/examples/CQL/sum.cql new file mode 100644 index 000000000000..bccfcbc81ec5 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/sum.cql @@ -0,0 +1 @@ +SELECT SUM (players) FROM plays; diff --git a/doc/modules/cassandra/examples/CQL/table_for_where.cql b/doc/modules/cassandra/examples/CQL/table_for_where.cql new file mode 100644 index 000000000000..f5ed5001ebf9 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/table_for_where.cql @@ -0,0 +1,9 @@ +CREATE TABLE posts ( + userid text, + blog_title text, + posted_at timestamp, + entry_title text, + content text, + category int, + PRIMARY KEY (userid, blog_title, posted_at) +); diff --git a/doc/modules/cassandra/examples/CQL/timeuuid_min_max.cql b/doc/modules/cassandra/examples/CQL/timeuuid_min_max.cql new file mode 100644 index 000000000000..81353f53d8b0 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/timeuuid_min_max.cql @@ -0,0 +1,3 @@ +SELECT * FROM myTable + WHERE t > maxTimeuuid('2013-01-01 00:05+0000') + AND t < minTimeuuid('2013-02-02 10:00+0000'); diff --git a/doc/modules/cassandra/examples/CQL/timeuuid_now.cql b/doc/modules/cassandra/examples/CQL/timeuuid_now.cql new file mode 100644 index 000000000000..54c2cc4817f6 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/timeuuid_now.cql @@ -0,0 +1 @@ +SELECT * FROM myTable WHERE t = now(); diff --git a/doc/modules/cassandra/examples/CQL/token.cql b/doc/modules/cassandra/examples/CQL/token.cql new file mode 100644 index 000000000000..b5c7f8b82bd4 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/token.cql @@ -0,0 +1,2 @@ +SELECT * FROM posts + WHERE token(userid) > token('tom') AND token(userid) < token('bob'); diff --git a/doc/modules/cassandra/examples/CQL/tuple.cql b/doc/modules/cassandra/examples/CQL/tuple.cql new file mode 100644 index 000000000000..b612d078aa1e --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/tuple.cql @@ -0,0 +1,6 @@ +CREATE TABLE durations ( + event text, + duration tuple, +); + +INSERT INTO durations (event, duration) VALUES ('ev1', (3, 'hours')); diff --git a/doc/modules/cassandra/examples/CQL/uda.cql b/doc/modules/cassandra/examples/CQL/uda.cql new file mode 100644 index 000000000000..b40dd113f04b --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/uda.cql @@ -0,0 +1,41 @@ +CREATE OR REPLACE FUNCTION test.averageState(state tuple, val int) + CALLED ON NULL INPUT + RETURNS tuple + LANGUAGE java + AS $$ + if (val != null) { + state.setInt(0, state.getInt(0)+1); + state.setLong(1, state.getLong(1)+val.intValue()); + } + return state; + $$; + +CREATE OR REPLACE FUNCTION test.averageFinal (state tuple) + CALLED ON NULL INPUT + RETURNS double + LANGUAGE java + AS $$ + double r = 0; + if (state.getInt(0) == 0) return null; + r = state.getLong(1); + r /= state.getInt(0); + return Double.valueOf(r); + $$; + +CREATE OR REPLACE AGGREGATE test.average(int) + SFUNC averageState + STYPE tuple + FINALFUNC averageFinal + INITCOND (0, 0); + +CREATE TABLE test.atable ( + pk int PRIMARY KEY, + val int +); + +INSERT INTO test.atable (pk, val) VALUES (1,1); +INSERT INTO test.atable (pk, val) VALUES (2,2); +INSERT INTO test.atable (pk, val) VALUES (3,3); +INSERT INTO test.atable (pk, val) VALUES (4,4); + +SELECT test.average(val) FROM atable; diff --git a/doc/modules/cassandra/examples/CQL/udt.cql b/doc/modules/cassandra/examples/CQL/udt.cql new file mode 100644 index 000000000000..defcc821e62b --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/udt.cql @@ -0,0 +1,16 @@ +CREATE TYPE phone ( + country_code int, + number text, +); + +CREATE TYPE address ( + street text, + city text, + zip text, + phones map +); + +CREATE TABLE user ( + name text PRIMARY KEY, + addresses map> +); diff --git a/doc/modules/cassandra/examples/CQL/update_list.cql b/doc/modules/cassandra/examples/CQL/update_list.cql new file mode 100644 index 000000000000..70aacf55d94a --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/update_list.cql @@ -0,0 +1,2 @@ +UPDATE plays SET players = 5, scores = scores + [ 14, 21 ] WHERE id = '123-afde'; +UPDATE plays SET players = 6, scores = [ 3 ] + scores WHERE id = '123-afde'; diff --git a/doc/modules/cassandra/examples/CQL/update_map.cql b/doc/modules/cassandra/examples/CQL/update_map.cql new file mode 100644 index 000000000000..870f46343b5e --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/update_map.cql @@ -0,0 +1,2 @@ +UPDATE users SET favs['author'] = 'Ed Poe' WHERE id = 'jsmith'; +UPDATE users SET favs = favs + { 'movie' : 'Cassablanca', 'band' : 'ZZ Top' } WHERE id = 'jsmith'; diff --git a/doc/modules/cassandra/examples/CQL/update_particular_list_element.cql b/doc/modules/cassandra/examples/CQL/update_particular_list_element.cql new file mode 100644 index 000000000000..604ad34cc977 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/update_particular_list_element.cql @@ -0,0 +1 @@ +UPDATE plays SET scores[1] = 7 WHERE id = '123-afde'; diff --git a/doc/modules/cassandra/examples/CQL/update_set.cql b/doc/modules/cassandra/examples/CQL/update_set.cql new file mode 100644 index 000000000000..16e6eb23e4b7 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/update_set.cql @@ -0,0 +1 @@ +UPDATE images SET tags = tags + { 'gray', 'cuddly' } WHERE name = 'cat.jpg'; diff --git a/doc/modules/cassandra/examples/CQL/update_statement.cql b/doc/modules/cassandra/examples/CQL/update_statement.cql new file mode 100644 index 000000000000..7e1cfa76fecf --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/update_statement.cql @@ -0,0 +1,10 @@ +UPDATE NerdMovies USING TTL 400 + SET director = 'Joss Whedon', + main_actor = 'Nathan Fillion', + year = 2005 + WHERE movie = 'Serenity'; + +UPDATE UserActions + SET total = total + 2 + WHERE user = B70DE1D0-9908-4AE3-BE34-5573E5B09F14 + AND action = 'click'; diff --git a/doc/modules/cassandra/examples/CQL/update_ttl_map.cql b/doc/modules/cassandra/examples/CQL/update_ttl_map.cql new file mode 100644 index 000000000000..d2db9bdcdf1c --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/update_ttl_map.cql @@ -0,0 +1 @@ +UPDATE users USING TTL 10 SET favs['color'] = 'green' WHERE id = 'jsmith'; diff --git a/doc/modules/cassandra/examples/CQL/use_ks.cql b/doc/modules/cassandra/examples/CQL/use_ks.cql new file mode 100644 index 000000000000..b3aaaf3ea848 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/use_ks.cql @@ -0,0 +1 @@ +USE excelsior; diff --git a/doc/modules/cassandra/examples/CQL/where.cql b/doc/modules/cassandra/examples/CQL/where.cql new file mode 100644 index 000000000000..22d4bca3c4c6 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/where.cql @@ -0,0 +1,4 @@ +SELECT entry_title, content FROM posts + WHERE userid = 'john doe' + AND blog_title='John''s Blog' + AND posted_at >= '2012-01-01' AND posted_at < '2012-01-31'; diff --git a/doc/modules/cassandra/examples/CQL/where_fail.cql b/doc/modules/cassandra/examples/CQL/where_fail.cql new file mode 100644 index 000000000000..57413dfb0d05 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/where_fail.cql @@ -0,0 +1,5 @@ +// Needs a blog_title to be set to select ranges of posted_at + +SELECT entry_title, content FROM posts + WHERE userid = 'john doe' + AND posted_at >= '2012-01-01' AND posted_at < '2012-01-31'; diff --git a/doc/modules/cassandra/examples/CQL/where_group_cluster_columns.cql b/doc/modules/cassandra/examples/CQL/where_group_cluster_columns.cql new file mode 100644 index 000000000000..1efb55ecd795 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/where_group_cluster_columns.cql @@ -0,0 +1,3 @@ +SELECT * FROM posts + WHERE userid = 'john doe' + AND (blog_title, posted_at) > ('John''s Blog', '2012-01-01'); diff --git a/doc/modules/cassandra/examples/CQL/where_in_tuple.cql b/doc/modules/cassandra/examples/CQL/where_in_tuple.cql new file mode 100644 index 000000000000..1d558046dc30 --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/where_in_tuple.cql @@ -0,0 +1,3 @@ +SELECT * FROM posts + WHERE userid = 'john doe' + AND (blog_title, posted_at) IN (('John''s Blog', '2012-01-01'), ('Extreme Chess', '2014-06-01')); diff --git a/doc/modules/cassandra/examples/CQL/where_no_group_cluster_columns.cql b/doc/modules/cassandra/examples/CQL/where_no_group_cluster_columns.cql new file mode 100644 index 000000000000..6681ba5c85ef --- /dev/null +++ b/doc/modules/cassandra/examples/CQL/where_no_group_cluster_columns.cql @@ -0,0 +1,4 @@ +SELECT * FROM posts + WHERE userid = 'john doe' + AND blog_title > 'John''s Blog' + AND posted_at > '2012-01-01'; diff --git a/doc/modules/cassandra/examples/JAVA/udf_imports.java b/doc/modules/cassandra/examples/JAVA/udf_imports.java new file mode 100644 index 000000000000..6b883bf32e3f --- /dev/null +++ b/doc/modules/cassandra/examples/JAVA/udf_imports.java @@ -0,0 +1,8 @@ +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.cassandra.cql3.functions.UDFContext; +import com.datastax.driver.core.TypeCodec; +import com.datastax.driver.core.TupleValue; +import com.datastax.driver.core.UDTValue; diff --git a/doc/modules/cassandra/examples/JAVA/udfcontext.java b/doc/modules/cassandra/examples/JAVA/udfcontext.java new file mode 100644 index 000000000000..65e0c7fc0b3e --- /dev/null +++ b/doc/modules/cassandra/examples/JAVA/udfcontext.java @@ -0,0 +1,11 @@ +public interface UDFContext +{ + UDTValue newArgUDTValue(String argName); + UDTValue newArgUDTValue(int argNum); + UDTValue newReturnUDTValue(); + UDTValue newUDTValue(String udtName); + TupleValue newArgTupleValue(String argName); + TupleValue newArgTupleValue(int argNum); + TupleValue newReturnTupleValue(); + TupleValue newTupleValue(String cqlDefinition); +} diff --git a/doc/modules/cassandra/examples/RESULTS/add_repo_keys.result b/doc/modules/cassandra/examples/RESULTS/add_repo_keys.result new file mode 100644 index 000000000000..4736ecea23fa --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/add_repo_keys.result @@ -0,0 +1,4 @@ +% Total % Received % Xferd Average Speed Time Time Time Current + Dload Upload Total Spent Left Speed +100 266k 100 266k 0 0 320k 0 --:--:-- --:--:-- --:--:-- 320k +OK diff --git a/doc/modules/cassandra/examples/RESULTS/add_yum_repo.result b/doc/modules/cassandra/examples/RESULTS/add_yum_repo.result new file mode 100644 index 000000000000..8fdb78c9a4e3 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/add_yum_repo.result @@ -0,0 +1,6 @@ +[cassandra] +name=Apache Cassandra +baseurl=https://downloads.apache.org/cassandra/redhat/311x/ +gpgcheck=1 +repo_gpgcheck=1 +gpgkey=https://downloads.apache.org/cassandra/KEYS diff --git a/doc/modules/cassandra/examples/RESULTS/autoexpand_exclude_dc.result b/doc/modules/cassandra/examples/RESULTS/autoexpand_exclude_dc.result new file mode 100644 index 000000000000..6d5a8a42244b --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/autoexpand_exclude_dc.result @@ -0,0 +1 @@ +CREATE KEYSPACE excalibur WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1': '3'} AND durable_writes = true; diff --git a/doc/modules/cassandra/examples/RESULTS/autoexpand_ks.result b/doc/modules/cassandra/examples/RESULTS/autoexpand_ks.result new file mode 100644 index 000000000000..fcc8855e4bef --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/autoexpand_ks.result @@ -0,0 +1 @@ +CREATE KEYSPACE excalibur WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1': '3', 'DC2': '3'} AND durable_writes = true; diff --git a/doc/modules/cassandra/examples/RESULTS/autoexpand_ks_override.result b/doc/modules/cassandra/examples/RESULTS/autoexpand_ks_override.result new file mode 100644 index 000000000000..b76189dcedea --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/autoexpand_ks_override.result @@ -0,0 +1 @@ +CREATE KEYSPACE excalibur WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1': '3', 'DC2': '2'} AND durable_writes = true; diff --git a/doc/modules/cassandra/examples/RESULTS/cqlsh_localhost.result b/doc/modules/cassandra/examples/RESULTS/cqlsh_localhost.result new file mode 100644 index 000000000000..b5a19082d6b8 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/cqlsh_localhost.result @@ -0,0 +1,11 @@ +Connected to Test Cluster at localhost:9042. +[cqlsh 5.0.1 | Cassandra 3.8 | CQL spec 3.4.2 | Native protocol v4] +Use HELP for help. +cqlsh> SELECT cluster_name, listen_address FROM system.local; + + cluster_name | listen_address +--------------+---------------- + Test Cluster | 127.0.0.1 + +(1 rows) +cqlsh> diff --git a/doc/modules/cassandra/examples/RESULTS/curl_verify_sha.result b/doc/modules/cassandra/examples/RESULTS/curl_verify_sha.result new file mode 100644 index 000000000000..ac77d264202d --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/curl_verify_sha.result @@ -0,0 +1 @@ +28757dde589f70410f9a6a95c39ee7e6cde63440e2b06b91ae6b200614fa364d diff --git a/doc/modules/cassandra/examples/RESULTS/find_backups.result b/doc/modules/cassandra/examples/RESULTS/find_backups.result new file mode 100644 index 000000000000..156b5694a09d --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/find_backups.result @@ -0,0 +1,4 @@ +./cassandra/data/data/cqlkeyspace/t-d132e240c21711e9bbee19821dcea330/backups +./cassandra/data/data/cqlkeyspace/t2-d993a390c22911e9b1350d927649052c/backups +./cassandra/data/data/catalogkeyspace/journal-296a2d30c22a11e9b1350d927649052c/backups +./cassandra/data/data/catalogkeyspace/magazine-446eae30c22a11e9b1350d927649052c/backups diff --git a/doc/modules/cassandra/examples/RESULTS/find_backups_table.result b/doc/modules/cassandra/examples/RESULTS/find_backups_table.result new file mode 100644 index 000000000000..7e01fa617b55 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/find_backups_table.result @@ -0,0 +1 @@ +./cassandra/data/data/cqlkeyspace/t-d132e240c21711e9bbee19821dcea330/backups diff --git a/doc/modules/cassandra/examples/RESULTS/find_two_snapshots.result b/doc/modules/cassandra/examples/RESULTS/find_two_snapshots.result new file mode 100644 index 000000000000..9cfb693bd4b6 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/find_two_snapshots.result @@ -0,0 +1,3 @@ +total 0 +drwxrwxr-x. 2 ec2-user ec2-user 265 Aug 19 02:44 catalog-ks +drwxrwxr-x. 2 ec2-user ec2-user 265 Aug 19 02:52 multi-ks diff --git a/doc/modules/cassandra/examples/RESULTS/flush_and_check.result b/doc/modules/cassandra/examples/RESULTS/flush_and_check.result new file mode 100644 index 000000000000..33863adf23c0 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/flush_and_check.result @@ -0,0 +1,9 @@ +total 36 +-rw-rw-r--. 2 ec2-user ec2-user 47 Aug 19 00:32 na-1-big-CompressionInfo.db +-rw-rw-r--. 2 ec2-user ec2-user 43 Aug 19 00:32 na-1-big-Data.db +-rw-rw-r--. 2 ec2-user ec2-user 10 Aug 19 00:32 na-1-big-Digest.crc32 +-rw-rw-r--. 2 ec2-user ec2-user 16 Aug 19 00:32 na-1-big-Filter.db +-rw-rw-r--. 2 ec2-user ec2-user 8 Aug 19 00:32 na-1-big-Index.db +-rw-rw-r--. 2 ec2-user ec2-user 4673 Aug 19 00:32 na-1-big-Statistics.db +-rw-rw-r--. 2 ec2-user ec2-user 56 Aug 19 00:32 na-1-big-Summary.db +-rw-rw-r--. 2 ec2-user ec2-user 92 Aug 19 00:32 na-1-big-TOC.txt diff --git a/doc/modules/cassandra/examples/RESULTS/flush_and_check2.result b/doc/modules/cassandra/examples/RESULTS/flush_and_check2.result new file mode 100644 index 000000000000..d89b99126fa3 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/flush_and_check2.result @@ -0,0 +1,17 @@ +total 72 +-rw-rw-r--. 2 ec2-user ec2-user 47 Aug 19 00:32 na-1-big-CompressionInfo.db +-rw-rw-r--. 2 ec2-user ec2-user 43 Aug 19 00:32 na-1-big-Data.db +-rw-rw-r--. 2 ec2-user ec2-user 10 Aug 19 00:32 na-1-big-Digest.crc32 +-rw-rw-r--. 2 ec2-user ec2-user 16 Aug 19 00:32 na-1-big-Filter.db +-rw-rw-r--. 2 ec2-user ec2-user 8 Aug 19 00:32 na-1-big-Index.db +-rw-rw-r--. 2 ec2-user ec2-user 4673 Aug 19 00:32 na-1-big-Statistics.db +-rw-rw-r--. 2 ec2-user ec2-user 56 Aug 19 00:32 na-1-big-Summary.db +-rw-rw-r--. 2 ec2-user ec2-user 92 Aug 19 00:32 na-1-big-TOC.txt +-rw-rw-r--. 2 ec2-user ec2-user 47 Aug 19 00:35 na-2-big-CompressionInfo.db +-rw-rw-r--. 2 ec2-user ec2-user 41 Aug 19 00:35 na-2-big-Data.db +-rw-rw-r--. 2 ec2-user ec2-user 10 Aug 19 00:35 na-2-big-Digest.crc32 +-rw-rw-r--. 2 ec2-user ec2-user 16 Aug 19 00:35 na-2-big-Filter.db +-rw-rw-r--. 2 ec2-user ec2-user 8 Aug 19 00:35 na-2-big-Index.db +-rw-rw-r--. 2 ec2-user ec2-user 4673 Aug 19 00:35 na-2-big-Statistics.db +-rw-rw-r--. 2 ec2-user ec2-user 56 Aug 19 00:35 na-2-big-Summary.db +-rw-rw-r--. 2 ec2-user ec2-user 92 Aug 19 00:35 na-2-big-TOC.txt diff --git a/doc/modules/cassandra/examples/RESULTS/insert_data2_backup.result b/doc/modules/cassandra/examples/RESULTS/insert_data2_backup.result new file mode 100644 index 000000000000..23e3902d20c3 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/insert_data2_backup.result @@ -0,0 +1,13 @@ +id | name | publisher +----+---------------------------+------------------ + 1 | Couchbase Magazine | Couchbase + 0 | Apache Cassandra Magazine | Apache Cassandra + + (2 rows) + +id | name | publisher +----+---------------------------+------------------ + 1 | Couchbase Magazine | Couchbase + 0 | Apache Cassandra Magazine | Apache Cassandra + + (2 rows) diff --git a/doc/modules/cassandra/examples/RESULTS/insert_table_cc_addl.result b/doc/modules/cassandra/examples/RESULTS/insert_table_cc_addl.result new file mode 100644 index 000000000000..d9af0c6192e7 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/insert_table_cc_addl.result @@ -0,0 +1,9 @@ + a | b | c | d +---+---+---+--- + 1 | 1 | 4 | 4 + 0 | 0 | 0 | 9 <1> + 0 | 0 | 1 | 1 + 0 | 1 | 2 | 2 + 0 | 1 | 3 | 3 + +(5 rows) diff --git a/doc/modules/cassandra/examples/RESULTS/java_verify.result b/doc/modules/cassandra/examples/RESULTS/java_verify.result new file mode 100644 index 000000000000..3ea962560c18 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/java_verify.result @@ -0,0 +1,3 @@ +openjdk version "1.8.0_222" +OpenJDK Runtime Environment (build 1.8.0_222-8u222-b10-1ubuntu1~16.04.1-b10) +OpenJDK 64-Bit Server VM (build 25.222-b10, mixed mode) diff --git a/doc/modules/cassandra/examples/RESULTS/no_bups.result b/doc/modules/cassandra/examples/RESULTS/no_bups.result new file mode 100644 index 000000000000..92811047f08e --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/no_bups.result @@ -0,0 +1 @@ +total 0 diff --git a/doc/modules/cassandra/examples/RESULTS/nodetool_list_snapshots.result b/doc/modules/cassandra/examples/RESULTS/nodetool_list_snapshots.result new file mode 100644 index 000000000000..15503eded916 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/nodetool_list_snapshots.result @@ -0,0 +1,13 @@ +Snapshot Details: +Snapshot name Keyspace name Column family name True size Size on disk +multi-table cqlkeyspace t2 4.86 KiB 5.67 KiB +multi-table cqlkeyspace t 4.89 KiB 5.7 KiB +multi-ks cqlkeyspace t 4.89 KiB 5.7 KiB +multi-ks catalogkeyspace journal 4.9 KiB 5.73 KiB +magazine catalogkeyspace magazine 4.9 KiB 5.73 KiB +multi-table-2 cqlkeyspace t2 4.86 KiB 5.67 KiB +multi-table-2 cqlkeyspace t 4.89 KiB 5.7 KiB +catalog-ks catalogkeyspace journal 4.9 KiB 5.73 KiB +catalog-ks catalogkeyspace magazine 4.9 KiB 5.73 KiB + +Total TrueDiskSpaceUsed: 44.02 KiB diff --git a/doc/modules/cassandra/examples/RESULTS/nodetool_snapshot_help.result b/doc/modules/cassandra/examples/RESULTS/nodetool_snapshot_help.result new file mode 100644 index 000000000000..a58360872a28 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/nodetool_snapshot_help.result @@ -0,0 +1,54 @@ +NAME + nodetool snapshot - Take a snapshot of specified keyspaces or a snapshot + of the specified table + +SYNOPSIS + nodetool [(-h | --host )] [(-p | --port )] + [(-pp | --print-port)] [(-pw | --password )] + [(-pwf | --password-file )] + [(-u | --username )] snapshot + [(-cf
| --column-family
| --table
)] + [(-kt | --kt-list | -kc | --kc.list )] + [(-sf | --skip-flush)] [(-t | --tag )] [--] [] + +OPTIONS + -cf
, --column-family
, --table
+ The table name (you must specify one and only one keyspace for using + this option) + + -h , --host + Node hostname or ip address + + -kt , --kt-list , -kc , --kc.list + The list of Keyspace.table to take snapshot.(you must not specify + only keyspace) + + -p , --port + Remote jmx agent port number + + -pp, --print-port + Operate in 4.0 mode with hosts disambiguated by port number + + -pw , --password + Remote jmx agent password + + -pwf , --password-file + Path to the JMX password file + + -sf, --skip-flush + Do not flush memtables before snapshotting (snapshot will not + contain unflushed data) + + -t , --tag + The name of the snapshot + + -u , --username + Remote jmx agent username + + -- + This option can be used to separate command-line options from the + list of argument, (useful when arguments might be mistaken for + command-line options + + [] + List of keyspaces. By default, all keyspaces diff --git a/doc/modules/cassandra/examples/RESULTS/select_data2_backup.result b/doc/modules/cassandra/examples/RESULTS/select_data2_backup.result new file mode 100644 index 000000000000..23e3902d20c3 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/select_data2_backup.result @@ -0,0 +1,13 @@ +id | name | publisher +----+---------------------------+------------------ + 1 | Couchbase Magazine | Couchbase + 0 | Apache Cassandra Magazine | Apache Cassandra + + (2 rows) + +id | name | publisher +----+---------------------------+------------------ + 1 | Couchbase Magazine | Couchbase + 0 | Apache Cassandra Magazine | Apache Cassandra + + (2 rows) diff --git a/doc/modules/cassandra/examples/RESULTS/select_data_backup.result b/doc/modules/cassandra/examples/RESULTS/select_data_backup.result new file mode 100644 index 000000000000..5d6a9e33bce1 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/select_data_backup.result @@ -0,0 +1,15 @@ +id | k | v +----+---+------ + 1 | 1 | val1 + 0 | 0 | val0 + + (2 rows) + + +id | k | v +----+---+------ + 1 | 1 | val1 + 0 | 0 | val0 + 2 | 2 | val2 + + (3 rows) diff --git a/doc/modules/cassandra/examples/RESULTS/select_range.result b/doc/modules/cassandra/examples/RESULTS/select_range.result new file mode 100644 index 000000000000..a3d1c7651446 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/select_range.result @@ -0,0 +1,6 @@ + a | b | c | d +---+---+---+--- + 0 | 1 | 2 | 2 + 0 | 1 | 3 | 3 + +(2 rows) diff --git a/doc/modules/cassandra/examples/RESULTS/select_static_data.result b/doc/modules/cassandra/examples/RESULTS/select_static_data.result new file mode 100644 index 000000000000..f1e8decde8c7 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/select_static_data.result @@ -0,0 +1,4 @@ + pk | t | v | s + ----+---+--------+----------- + 0 | 0 | 'val0' | 'static1' + 0 | 1 | 'val1' | 'static1' diff --git a/doc/modules/cassandra/examples/RESULTS/select_table_clustercolumn.result b/doc/modules/cassandra/examples/RESULTS/select_table_clustercolumn.result new file mode 100644 index 000000000000..1d3899db9298 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/select_table_clustercolumn.result @@ -0,0 +1,9 @@ + a | b | c | d +---+---+---+--- + 1 | 1 | 4 | 4 <1> + 0 | 0 | 0 | 0 + 0 | 0 | 1 | 1 + 0 | 1 | 2 | 2 + 0 | 1 | 3 | 3 + +(5 rows) diff --git a/doc/modules/cassandra/examples/RESULTS/select_table_compound_pk.result b/doc/modules/cassandra/examples/RESULTS/select_table_compound_pk.result new file mode 100644 index 000000000000..d098516b1125 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/select_table_compound_pk.result @@ -0,0 +1,9 @@ + a | b | c | d +---+---+---+--- + 0 | 0 | 0 | 0 <1> + 0 | 0 | 1 | 1 + 0 | 1 | 2 | 2 <2> + 0 | 1 | 3 | 3 + 1 | 1 | 4 | 4 <3> + +(5 rows) diff --git a/doc/modules/cassandra/examples/RESULTS/snapshot_all.result b/doc/modules/cassandra/examples/RESULTS/snapshot_all.result new file mode 100644 index 000000000000..6ec55a023cac --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/snapshot_all.result @@ -0,0 +1,4 @@ +./cassandra/data/data/cqlkeyspace/t-d132e240c21711e9bbee19821dcea330/snapshots +./cassandra/data/data/cqlkeyspace/t2-d993a390c22911e9b1350d927649052c/snapshots +./cassandra/data/data/catalogkeyspace/journal-296a2d30c22a11e9b1350d927649052c/snapshots +./cassandra/data/data/catalogkeyspace/magazine-446eae30c22a11e9b1350d927649052c/snapshots diff --git a/doc/modules/cassandra/examples/RESULTS/snapshot_backup2.result b/doc/modules/cassandra/examples/RESULTS/snapshot_backup2.result new file mode 100644 index 000000000000..8276d520394a --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/snapshot_backup2.result @@ -0,0 +1,3 @@ +Requested creating snapshot(s) for [catalogkeyspace] with snapshot name [catalog-ks] and +options {skipFlush=false} +Snapshot directory: catalog-ks diff --git a/doc/modules/cassandra/examples/RESULTS/snapshot_backup2_find.result b/doc/modules/cassandra/examples/RESULTS/snapshot_backup2_find.result new file mode 100644 index 000000000000..88b549976899 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/snapshot_backup2_find.result @@ -0,0 +1,2 @@ +./cassandra/data/data/catalogkeyspace/journal-296a2d30c22a11e9b1350d927649052c/snapshots +./cassandra/data/data/catalogkeyspace/magazine-446eae30c22a11e9b1350d927649052c/snapshots diff --git a/doc/modules/cassandra/examples/RESULTS/snapshot_files.result b/doc/modules/cassandra/examples/RESULTS/snapshot_files.result new file mode 100644 index 000000000000..8dd91b5ce805 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/snapshot_files.result @@ -0,0 +1,11 @@ +total 44 +-rw-rw-r--. 1 ec2-user ec2-user 31 Aug 19 02:44 manifest.jsonZ +-rw-rw-r--. 4 ec2-user ec2-user 47 Aug 19 02:38 na-1-big-CompressionInfo.db +-rw-rw-r--. 4 ec2-user ec2-user 97 Aug 19 02:38 na-1-big-Data.db +-rw-rw-r--. 4 ec2-user ec2-user 10 Aug 19 02:38 na-1-big-Digest.crc32 +-rw-rw-r--. 4 ec2-user ec2-user 16 Aug 19 02:38 na-1-big-Filter.db +-rw-rw-r--. 4 ec2-user ec2-user 16 Aug 19 02:38 na-1-big-Index.db +-rw-rw-r--. 4 ec2-user ec2-user 4687 Aug 19 02:38 na-1-big-Statistics.db +-rw-rw-r--. 4 ec2-user ec2-user 56 Aug 19 02:38 na-1-big-Summary.db +-rw-rw-r--. 4 ec2-user ec2-user 92 Aug 19 02:38 na-1-big-TOC.txt +-rw-rw-r--. 1 ec2-user ec2-user 814 Aug 19 02:44 schema.cql diff --git a/doc/modules/cassandra/examples/RESULTS/snapshot_mult_ks.result b/doc/modules/cassandra/examples/RESULTS/snapshot_mult_ks.result new file mode 100644 index 000000000000..61dff939e276 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/snapshot_mult_ks.result @@ -0,0 +1,3 @@ +Requested creating snapshot(s) for [catalogkeyspace.journal,cqlkeyspace.t] with snapshot +name [multi-ks] and options {skipFlush=false} +Snapshot directory: multi-ks diff --git a/doc/modules/cassandra/examples/RESULTS/snapshot_mult_tables.result b/doc/modules/cassandra/examples/RESULTS/snapshot_mult_tables.result new file mode 100644 index 000000000000..557a6a488c31 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/snapshot_mult_tables.result @@ -0,0 +1,3 @@ +Requested creating snapshot(s) for ["CQLKeyspace".t,"CQLKeyspace".t2] with snapshot name [multi- +table] and options {skipFlush=false} +Snapshot directory: multi-table diff --git a/doc/modules/cassandra/examples/RESULTS/snapshot_mult_tables_again.result b/doc/modules/cassandra/examples/RESULTS/snapshot_mult_tables_again.result new file mode 100644 index 000000000000..6c09e71e9080 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/snapshot_mult_tables_again.result @@ -0,0 +1,3 @@ +Requested creating snapshot(s) for ["CQLKeyspace".t,"CQLKeyspace".t2] with snapshot name [multi- +table-2] and options {skipFlush=false} +Snapshot directory: multi-table-2 diff --git a/doc/modules/cassandra/examples/RESULTS/snapshot_one_table2.result b/doc/modules/cassandra/examples/RESULTS/snapshot_one_table2.result new file mode 100644 index 000000000000..c147889242c7 --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/snapshot_one_table2.result @@ -0,0 +1,3 @@ +Requested creating snapshot(s) for [catalogkeyspace] with snapshot name [magazine] and +options {skipFlush=false} +Snapshot directory: magazine diff --git a/doc/modules/cassandra/examples/RESULTS/tail_syslog.result b/doc/modules/cassandra/examples/RESULTS/tail_syslog.result new file mode 100644 index 000000000000..cb32dc04388d --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/tail_syslog.result @@ -0,0 +1 @@ +INFO [main] 2019-12-17 03:03:37,526 Server.java:156 - Starting listening for CQL clients on localhost/127.0.0.1:9042 (unencrypted)... diff --git a/doc/modules/cassandra/examples/RESULTS/verify_gpg.result b/doc/modules/cassandra/examples/RESULTS/verify_gpg.result new file mode 100644 index 000000000000..da6273651c7b --- /dev/null +++ b/doc/modules/cassandra/examples/RESULTS/verify_gpg.result @@ -0,0 +1,2 @@ +apache-cassandra-3.11.10-bin.tar.gz: 28757DDE 589F7041 0F9A6A95 C39EE7E6 + CDE63440 E2B06B91 AE6B2006 14FA364D diff --git a/doc/modules/cassandra/examples/TEXT/tarball_install_dirs.txt b/doc/modules/cassandra/examples/TEXT/tarball_install_dirs.txt new file mode 100644 index 000000000000..99b1a1487491 --- /dev/null +++ b/doc/modules/cassandra/examples/TEXT/tarball_install_dirs.txt @@ -0,0 +1,11 @@ +/ + bin/ <1> + conf/ <2> + data/ <3> + doc/ + interface/ + javadoc/ + lib/ + logs/ <4> + pylib/ + tools/ <5> diff --git a/doc/modules/cassandra/examples/YAML/auto_snapshot.yaml b/doc/modules/cassandra/examples/YAML/auto_snapshot.yaml new file mode 100644 index 000000000000..8f5033df4e8a --- /dev/null +++ b/doc/modules/cassandra/examples/YAML/auto_snapshot.yaml @@ -0,0 +1 @@ +auto_snapshot: false diff --git a/doc/modules/cassandra/examples/YAML/incremental_bups.yaml b/doc/modules/cassandra/examples/YAML/incremental_bups.yaml new file mode 100644 index 000000000000..95fccdb18950 --- /dev/null +++ b/doc/modules/cassandra/examples/YAML/incremental_bups.yaml @@ -0,0 +1 @@ +incremental_backups: true diff --git a/doc/modules/cassandra/examples/YAML/snapshot_before_compaction.yaml b/doc/modules/cassandra/examples/YAML/snapshot_before_compaction.yaml new file mode 100644 index 000000000000..4ee1b17a6bcb --- /dev/null +++ b/doc/modules/cassandra/examples/YAML/snapshot_before_compaction.yaml @@ -0,0 +1 @@ +snapshot_before_compaction: false diff --git a/doc/modules/cassandra/examples/YAML/stress-example.yaml b/doc/modules/cassandra/examples/YAML/stress-example.yaml new file mode 100644 index 000000000000..4a671028174f --- /dev/null +++ b/doc/modules/cassandra/examples/YAML/stress-example.yaml @@ -0,0 +1,62 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +spacenam: example # idenitifier for this spec if running with multiple yaml files +keyspace: example + +# Would almost always be network topology unless running something locally +keyspace_definition: | + CREATE KEYSPACE example WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3}; + +table: staff_activities + +# The table under test. Start with a partition per staff member +# Is this a good idea? +table_definition: | + CREATE TABLE staff_activities ( + name text, + when timeuuid, + what text, + PRIMARY KEY(name, when) + ) + +columnspec: + - name: name + size: uniform(5..10) # The names of the staff members are between 5-10 characters + population: uniform(1..10) # 10 possible staff members to pick from + - name: when + cluster: uniform(20..500) # Staff members do between 20 and 500 events + - name: what + size: normal(10..100,50) + +insert: + # we only update a single partition in any given insert + partitions: fixed(1) + # we want to insert a single row per partition and we have between 20 and 500 + # rows per partition + select: fixed(1)/500 + batchtype: UNLOGGED # Single partition unlogged batches are essentially noops + +queries: + events: + cql: select * from staff_activities where name = ? + fields: samerow + latest_event: + cql: select * from staff_activities where name = ? LIMIT 1 + fields: samerow + diff --git a/doc/modules/cassandra/examples/YAML/stress-lwt-example.yaml b/doc/modules/cassandra/examples/YAML/stress-lwt-example.yaml new file mode 100644 index 000000000000..1f12c2491e60 --- /dev/null +++ b/doc/modules/cassandra/examples/YAML/stress-lwt-example.yaml @@ -0,0 +1,88 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Keyspace Name +keyspace: stresscql + +# The CQL for creating a keyspace (optional if it already exists) +# Would almost always be network topology unless running something locall +keyspace_definition: | + CREATE KEYSPACE stresscql WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}; + +# Table name +table: blogposts + +# The CQL for creating a table you wish to stress (optional if it already exists) +table_definition: | + CREATE TABLE blogposts ( + domain text, + published_date timeuuid, + url text, + author text, + title text, + body text, + PRIMARY KEY(domain, published_date) + ) WITH CLUSTERING ORDER BY (published_date DESC) + AND compaction = { 'class':'LeveledCompactionStrategy' } + AND comment='A table to hold blog posts' + +### Column Distribution Specifications ### + +columnspec: + - name: domain + size: gaussian(5..100) #domain names are relatively short + population: uniform(1..10M) #10M possible domains to pick from + + - name: published_date + cluster: fixed(1000) #under each domain we will have max 1000 posts + + - name: url + size: uniform(30..300) + + - name: title #titles shouldn't go beyond 200 chars + size: gaussian(10..200) + + - name: author + size: uniform(5..20) #author names should be short + + - name: body + size: gaussian(100..5000) #the body of the blog post can be long + +### Batch Ratio Distribution Specifications ### + +insert: + partitions: fixed(1) # Our partition key is the domain so only insert one per batch + + select: fixed(1)/1000 # We have 1000 posts per domain so 1/1000 will allow 1 post per batch + + batchtype: UNLOGGED # Unlogged batches + + +# +# A list of queries you wish to run against the schema +# +queries: + singlepost: + cql: select * from blogposts where domain = ? LIMIT 1 + fields: samerow + regularupdate: + cql: update blogposts set author = ? where domain = ? and published_date = ? + fields: samerow + updatewithlwt: + cql: update blogposts set author = ? where domain = ? and published_date = ? IF body = ? AND url = ? + fields: samerow diff --git a/doc/modules/cassandra/nav.adoc b/doc/modules/cassandra/nav.adoc new file mode 100644 index 000000000000..c0d30eddf99b --- /dev/null +++ b/doc/modules/cassandra/nav.adoc @@ -0,0 +1,97 @@ +* Cassandra +** xref:getting_started/index.adoc[Getting Started] +*** xref:getting_started/installing.adoc[Installing Cassandra] +*** xref:getting_started/configuring.adoc[Configuring Cassandra] +*** xref:getting_started/querying.adoc[Inserting and querying] +*** xref:getting_started/drivers.adoc[Client drivers] +*** xref:getting_started/production.adoc[Production recommendations] + +** xref:architecture/index.adoc[Architecture] +*** xref:architecture/overview.adoc[Overview] +*** xref:architecture/dynamo.adoc[Dynamo] +*** xref:architecture/storage_engine.adoc[Storage engine] +*** xref:architecture/guarantees.adoc[Guarantees] + +** xref:data_modeling/index.adoc[Data modeling] +*** xref:data_modeling/intro.adoc[Introduction] +*** xref:data_modeling/data_modeling_conceptual.adoc[Conceptual data modeling] +*** xref:data_modeling/data_modeling_rdbms.adoc[RDBMS design] +*** xref:data_modeling/data_modeling_queries.adoc[Defining application queries] +*** xref:data_modeling/data_modeling_logical.adoc[Logical data modeling] +*** xref:data_modeling/data_modeling_physical.adoc[Physical data modeling] +*** xref:data_modeling/data_modeling_refining.adoc[Evaluating and refining data models] +*** xref:data_modeling/data_modeling_schema.adoc[Defining database schema] +*** xref:data_modeling/data_modeling_tools.adoc[Cassandra data modeling tools] + +** xref:cql/index.adoc[Cassandra Query Language (CQL)] +*** xref:cql/definitions.adoc[Definitions] +*** xref:cql/types.adoc[Data types] +*** xref:cql/ddl.adoc[Data definition (DDL)] +*** xref:cql/dml.adoc[Data manipulation (DML)] +*** xref:cql/operators.adoc[Operators] +*** xref:cql/indexes.adoc[Secondary indexes] +*** xref:cql/mvs.adoc[Materialized views] +*** xref:cql/functions.adoc[Functions] +*** xref:cql/json.adoc[JSON] +*** xref:cql/security.adoc[Security] +*** xref:cql/triggers.adoc[Triggers] +*** xref:cql/appendices.adoc[Appendices] +*** xref:cql/changes.adoc[Changes] +*** xref:cql/SASI.adoc[SASI] +*** xref:cql/cql_singlefile.adoc[Single file of CQL information] + +** xref:configuration/index.adoc[Configuration] +*** xref:configuration/cass_yaml_file.adoc[cassandra.yaml] +*** xref:configuration/cass_rackdc_file.adoc[cassandra-rackdc.properties] +*** xref:configuration/cass_env_sh_file.adoc[cassandra-env.sh] +*** xref:configuration/cass_topo_file.adoc[cassandra-topologies.properties] +*** xref:configuration/cass_cl_archive_file.adoc[commitlog-archiving.properties] +*** xref:configuration/cass_logback_xml_file.adoc[logback.xml] +*** xref:configuration/cass_jvm_options_file.adoc[jvm-* files] + +** xref:operating/index.adoc[Operating] +*** xref:operating/snitch.adoc[Snitches] +*** xref:operating/topo_changes.adoc[Topology changes] +*** xref:operating/repair.adoc[Repair] +*** xref:operating/read_repair.adoc[Read repair] +*** xref:operating/hints.adoc[Hints] +*** xref:operating/bloom_filters.adoc[Bloom filters] +*** xref:operating/compression.adoc[Compression] +*** xref:operating/cdc.adoc[Change Data Capture (CDC)] +*** xref:operating/backups.adoc[Backups] +*** xref:operating/bulk_loading.adoc[Bulk loading] +*** xref:operating/metrics.adoc[Metrics] +*** xref:operating/security.adoc[Security] +*** xref:operating/hardware.adoc[Hardware] +*** xref:operating/audit_logging.adoc[Audit logging] +*** xref:operating/compaction/index.adoc[Compaction] + +** xref:tools/index.adoc[Tools] +*** xref:tools/cqlsh.adoc[cqlsh: the CQL shell] +*** xref:tools/nodetool/nodetool.adoc[nodetool] +*** xref:tools/sstable/index.adoc[SSTable tools] +*** xref:tools/cassandra_stress.adoc[cassandra-stress] + +** xref:troubleshooting/index.adoc[Troubleshooting] +*** xref:troubleshooting/finding_nodes.adoc[Finding misbehaving nodes] +*** xref:troubleshooting/reading_logs.adoc[Reading Cassandra logs] +*** xref:troubleshooting/use_nodetool.adoc[Using nodetool] +*** xref:troubleshooting/use_tools.adoc[Using external tools to deep-dive] + +** xref:master@_:ROOT:development/index.adoc[Development] +*** xref:master@_:ROOT:development/gettingstarted.adoc[Getting started] +*** xref:master@_:ROOT:development/ide.adoc[Building and IDE integration] +*** xref:master@_:ROOT:development/testing.adoc[Testing] +*** xref:master@_:ROOT:development/patches.adoc[Contributing code changes] +*** xref:master@_:ROOT:development/code_style.adoc[Code style] +*** xref:master@_:ROOT:development/how_to_review.adoc[Review checklist] +*** xref:master@_:ROOT:development/how_to_commit.adoc[How to commit] +*** xref:master@_:ROOT:development/documentation.adoc[Working on documentation] +*** xref:master@_:ROOT:development/ci.adoc[Jenkins CI environment] +*** xref:master@_:ROOT:development/dependencies.adoc[Dependency management] +*** xref:master@_:ROOT:development/release_process.adoc[Release process] + +** xref:faq/index.adoc[FAQ] + +** xref:plugins/index.adoc[Plug-ins] + diff --git a/doc/modules/cassandra/pages/architecture/dynamo.adoc b/doc/modules/cassandra/pages/architecture/dynamo.adoc new file mode 100644 index 000000000000..e90390a7cbb8 --- /dev/null +++ b/doc/modules/cassandra/pages/architecture/dynamo.adoc @@ -0,0 +1,531 @@ += Dynamo + +Apache Cassandra relies on a number of techniques from Amazon's +http://courses.cse.tamu.edu/caverlee/csce438/readings/dynamo-paper.pdf[Dynamo] +distributed storage key-value system. Each node in the Dynamo system has +three main components: + +* Request coordination over a partitioned dataset +* Ring membership and failure detection +* A local persistence (storage) engine + +Cassandra primarily draws from the first two clustering components, +while using a storage engine based on a Log Structured Merge Tree +(http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.44.2782&rep=rep1&type=pdf[LSM]). +In particular, Cassandra relies on Dynamo style: + +* Dataset partitioning using consistent hashing +* Multi-master replication using versioned data and tunable consistency +* Distributed cluster membership and failure detection via a gossip +protocol +* Incremental scale-out on commodity hardware + +Cassandra was designed this way to meet large-scale (PiB+) +business-critical storage requirements. In particular, as applications +demanded full global replication of petabyte scale datasets along with +always available low-latency reads and writes, it became imperative to +design a new kind of database model as the relational database systems +of the time struggled to meet the new requirements of global scale +applications. + +== Dataset Partitioning: Consistent Hashing + +Cassandra achieves horizontal scalability by +https://en.wikipedia.org/wiki/Partition_(database)[partitioning] all +data stored in the system using a hash function. Each partition is +replicated to multiple physical nodes, often across failure domains such +as racks and even datacenters. As every replica can independently accept +mutations to every key that it owns, every key must be versioned. Unlike +in the original Dynamo paper where deterministic versions and vector +clocks were used to reconcile concurrent updates to a key, Cassandra +uses a simpler last write wins model where every mutation is timestamped +(including deletes) and then the latest version of data is the "winning" +value. Formally speaking, Cassandra uses a Last-Write-Wins Element-Set +conflict-free replicated data type for each CQL row, or +https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type LWW-Element-Set_(Last-Write-Wins-Element-Set)[LWW-Element-Set +CRDT], to resolve conflicting mutations on replica sets. + +=== Consistent Hashing using a Token Ring + +Cassandra partitions data over storage nodes using a special form of +hashing called +https://en.wikipedia.org/wiki/Consistent_hashing[consistent hashing]. In +naive data hashing, you typically allocate keys to buckets by taking a +hash of the key modulo the number of buckets. For example, if you want +to distribute data to 100 nodes using naive hashing you might assign +every node to a bucket between 0 and 100, hash the input key modulo 100, +and store the data on the associated bucket. In this naive scheme, +however, adding a single node might invalidate almost all of the +mappings. + +Cassandra instead maps every node to one or more tokens on a continuous +hash ring, and defines ownership by hashing a key onto the ring and then +"walking" the ring in one direction, similar to the +https://pdos.csail.mit.edu/papers/chord:sigcomm01/chord_sigcomm.pdf[Chord] +algorithm. The main difference of consistent hashing to naive data +hashing is that when the number of nodes (buckets) to hash into changes, +consistent hashing only has to move a small fraction of the keys. + +For example, if we have an eight node cluster with evenly spaced tokens, +and a replication factor (RF) of 3, then to find the owning nodes for a +key we first hash that key to generate a token (which is just the hash +of the key), and then we "walk" the ring in a clockwise fashion until we +encounter three distinct nodes, at which point we have found all the +replicas of that key. This example of an eight node cluster with +gRF=3 can be visualized as follows: + +image::ring.svg[image] + +You can see that in a Dynamo like system, ranges of keys, also known as +*token ranges*, map to the same physical set of nodes. In this example, +all keys that fall in the token range excluding token 1 and including +token 2 (grange(t1, t2]) are stored on nodes 2, 3 and 4. + +=== Multiple Tokens per Physical Node (vnodes) + +Simple single token consistent hashing works well if you have many +physical nodes to spread data over, but with evenly spaced tokens and a +small number of physical nodes, incremental scaling (adding just a few +nodes of capacity) is difficult because there are no token selections +for new nodes that can leave the ring balanced. Cassandra seeks to avoid +token imbalance because uneven token ranges lead to uneven request load. +For example, in the previous example there is no way to add a ninth +token without causing imbalance; instead we would have to insert `8` +tokens in the midpoints of the existing ranges. + +The Dynamo paper advocates for the use of "virtual nodes" to solve this +imbalance problem. Virtual nodes solve the problem by assigning multiple +tokens in the token ring to each physical node. By allowing a single +physical node to take multiple positions in the ring, we can make small +clusters look larger and therefore even with a single physical node +addition we can make it look like we added many more nodes, effectively +taking many smaller pieces of data from more ring neighbors when we add +even a single node. + +Cassandra introduces some nomenclature to handle these concepts: + +* *Token*: A single position on the dynamo style hash +ring. +* *Endpoint*: A single physical IP and port on the network. +* *Host ID*: A unique identifier for a single "physical" node, usually +present at one gEndpoint and containing one or more +gTokens. +* *Virtual Node* (or *vnode*): A gToken on the hash ring +owned by the same physical node, one with the same gHost +ID. + +The mapping of *Tokens* to *Endpoints* gives rise to the *Token Map* +where Cassandra keeps track of what ring positions map to which physical +endpoints. For example, in the following figure we can represent an +eight node cluster using only four physical nodes by assigning two +tokens to every node: + +image::vnodes.svg[image] + +Multiple tokens per physical node provide the following benefits: + +[arabic] +. When a new node is added it accepts approximately equal amounts of +data from other nodes in the ring, resulting in equal distribution of +data across the cluster. +. When a node is decommissioned, it loses data roughly equally to other +members of the ring, again keeping equal distribution of data across the +cluster. +. If a node becomes unavailable, query load (especially token aware +query load), is evenly distributed across many other nodes. + +Multiple tokens, however, can also have disadvantages: + +[arabic] +. Every token introduces up to `2 * (RF - 1)` additional neighbors on +the token ring, which means that there are more combinations of node +failures where we lose availability for a portion of the token ring. The +more tokens you have, +https://jolynch.github.io/pdf/cassandra-availability-virtual.pdf[the +higher the probability of an outage]. +. Cluster-wide maintenance operations are often slowed. For example, as +the number of tokens per node is increased, the number of discrete +repair operations the cluster must do also increases. +. Performance of operations that span token ranges could be affected. + +Note that in Cassandra `2.x`, the only token allocation algorithm +available was picking random tokens, which meant that to keep balance +the default number of tokens per node had to be quite high, at `256`. +This had the effect of coupling many physical endpoints together, +increasing the risk of unavailability. That is why in `3.x +` the new +deterministic token allocator was added which intelligently picks tokens +such that the ring is optimally balanced while requiring a much lower +number of tokens per physical node. + +== Multi-master Replication: Versioned Data and Tunable Consistency + +Cassandra replicates every partition of data to many nodes across the +cluster to maintain high availability and durability. When a mutation +occurs, the coordinator hashes the partition key to determine the token +range the data belongs to and then replicates the mutation to the +replicas of that data according to the +`Replication Strategy`. + +All replication strategies have the notion of a *replication factor* +(`RF`), which indicates to Cassandra how many copies of the partition +should exist. For example with a `RF=3` keyspace, the data will be +written to three distinct *replicas*. Replicas are always chosen such +that they are distinct physical nodes which is achieved by skipping +virtual nodes if needed. Replication strategies may also choose to skip +nodes present in the same failure domain such as racks or datacenters so +that Cassandra clusters can tolerate failures of whole racks and even +datacenters of nodes. + +=== Replication Strategy + +Cassandra supports pluggable *replication strategies*, which determine +which physical nodes act as replicas for a given token range. Every +keyspace of data has its own replication strategy. All production +deployments should use the `NetworkTopologyStrategy` while the +`SimpleStrategy` replication strategy is useful only for testing +clusters where you do not yet know the datacenter layout of the cluster. + +[[network-topology-strategy]] +==== `NetworkTopologyStrategy` + +`NetworkTopologyStrategy` requires a specified replication factor +for each datacenter in the cluster. Even if your cluster only uses a +single datacenter, `NetworkTopologyStrategy` is recommended over +`SimpleStrategy` to make it easier to add new physical or virtual +datacenters to the cluster later, if required. + +In addition to allowing the replication factor to be specified +individually by datacenter, `NetworkTopologyStrategy` also attempts to +choose replicas within a datacenter from different racks as specified by +the `Snitch`. If the number of racks is greater than or equal +to the replication factor for the datacenter, each replica is guaranteed +to be chosen from a different rack. Otherwise, each rack will hold at +least one replica, but some racks may hold more than one. Note that this +rack-aware behavior has some potentially +https://issues.apache.org/jira/browse/CASSANDRA-3810[surprising +implications]. For example, if there are not an even number of nodes in +each rack, the data load on the smallest rack may be much higher. +Similarly, if a single node is bootstrapped into a brand new rack, it +will be considered a replica for the entire ring. For this reason, many +operators choose to configure all nodes in a single availability zone or +similar failure domain as a single "rack". + +[[simple-strategy]] +==== `SimpleStrategy` + +`SimpleStrategy` allows a single integer `replication_factor` to be +defined. This determines the number of nodes that should contain a copy +of each row. For example, if `replication_factor` is 3, then three +different nodes should store a copy of each row. + +`SimpleStrategy` treats all nodes identically, ignoring any configured +datacenters or racks. To determine the replicas for a token range, +Cassandra iterates through the tokens in the ring, starting with the +token range of interest. For each token, it checks whether the owning +node has been added to the set of replicas, and if it has not, it is +added to the set. This process continues until `replication_factor` +distinct nodes have been added to the set of replicas. + +==== Transient Replication + +Transient replication is an experimental feature in Cassandra {40_version} not +present in the original Dynamo paper. This feature allows configuration of a +subset of replicas to replicate only data that hasn't been incrementally +repaired. This configuration decouples data redundancy from availability. +For instance, if you have a keyspace replicated at RF=3, and alter it to +RF=5 with two transient replicas, you go from tolerating one +failed replica to tolerating two, without corresponding +increase in storage usage. Now, three nodes will replicate all +the data for a given token range, and the other two will only replicate +data that hasn't been incrementally repaired. + +To use transient replication, first enable the option in +`cassandra.yaml`. Once enabled, both `SimpleStrategy` and +`NetworkTopologyStrategy` can be configured to transiently replicate +data. Configure it by specifying replication factor as +`/` in the read path and +`Hinted handoff ` in the write path. + +These techniques are only best-effort, however, and to guarantee +eventual consistency Cassandra implements `anti-entropy +repair ` where replicas calculate hierarchical hash-trees over +their datasets called https://en.wikipedia.org/wiki/Merkle_tree[Merkle +trees] that can then be compared across replicas to identify mismatched +data. Like the original Dynamo paper Cassandra supports full repairs +where replicas hash their entire dataset, create Merkle trees, send them +to each other and sync any ranges that don't match. + +Unlike the original Dynamo paper, Cassandra also implements sub-range +repair and incremental repair. Sub-range repair allows Cassandra to +increase the resolution of the hash trees (potentially down to the +single partition level) by creating a larger number of trees that span +only a portion of the data range. Incremental repair allows Cassandra to +only repair the partitions that have changed since the last repair. + +=== Tunable Consistency + +Cassandra supports a per-operation tradeoff between consistency and +availability through *Consistency Levels*. Cassandra's consistency +levels are a version of Dynamo's `R + W > N` consistency mechanism where +operators could configure the number of nodes that must participate in +reads (`R`) and writes (`W`) to be larger than the replication factor +(`N`). In Cassandra, you instead choose from a menu of common +consistency levels which allow the operator to pick `R` and `W` behavior +without knowing the replication factor. Generally writes will be visible +to subsequent reads when the read consistency level contains enough +nodes to guarantee a quorum intersection with the write consistency +level. + +The following consistency levels are available: + +`ONE`:: + Only a single replica must respond. +`TWO`:: + Two replicas must respond. +`THREE`:: + Three replicas must respond. +`QUORUM`:: + A majority (n/2 + 1) of the replicas must respond. +`ALL`:: + All of the replicas must respond. +`LOCAL_QUORUM`:: + A majority of the replicas in the local datacenter (whichever + datacenter the coordinator is in) must respond. +`EACH_QUORUM`:: + A majority of the replicas in each datacenter must respond. +`LOCAL_ONE`:: + Only a single replica must respond. In a multi-datacenter cluster, + this also gaurantees that read requests are not sent to replicas in a + remote datacenter. +`ANY`:: + A single replica may respond, or the coordinator may store a hint. If + a hint is stored, the coordinator will later attempt to replay the + hint and deliver the mutation to the replicas. This consistency level + is only accepted for write operations. + +Write operations *are always sent to all replicas*, regardless of +consistency level. The consistency level simply controls how many +responses the coordinator waits for before responding to the client. + +For read operations, the coordinator generally only issues read commands +to enough replicas to satisfy the consistency level. The one exception +to this is when speculative retry may issue a redundant read request to +an extra replica if the original replicas have not responded within a +specified time window. + +==== Picking Consistency Levels + +It is common to pick read and write consistency levels such that the +replica sets overlap, resulting in all acknowledged writes being visible +to subsequent reads. This is typically expressed in the same terms +Dynamo does, in that `W + R > RF`, where `W` is the write consistency +level, `R` is the read consistency level, and `RF` is the replication +factor. For example, if `RF = 3`, a `QUORUM` request will require +responses from at least `2/3` replicas. If `QUORUM` is used for both +writes and reads, at least one of the replicas is guaranteed to +participate in _both_ the write and the read request, which in turn +guarantees that the quorums will overlap and the write will be visible +to the read. + +In a multi-datacenter environment, `LOCAL_QUORUM` can be used to provide +a weaker but still useful guarantee: reads are guaranteed to see the +latest write from within the same datacenter. This is often sufficient +as clients homed to a single datacenter will read their own writes. + +If this type of strong consistency isn't required, lower consistency +levels like `LOCAL_ONE` or `ONE` may be used to improve throughput, +latency, and availability. With replication spanning multiple +datacenters, `LOCAL_ONE` is typically less available than `ONE` but is +faster as a rule. Indeed `ONE` will succeed if a single replica is +available in any datacenter. + +== Distributed Cluster Membership and Failure Detection + +The replication protocols and dataset partitioning rely on knowing which +nodes are alive and dead in the cluster so that write and read +operations can be optimally routed. In Cassandra liveness information is +shared in a distributed fashion through a failure detection mechanism +based on a gossip protocol. + +=== Gossip + +Gossip is how Cassandra propagates basic cluster bootstrapping +information such as endpoint membership and internode network protocol +versions. In Cassandra's gossip system, nodes exchange state information +not only about themselves but also about other nodes they know about. +This information is versioned with a vector clock of +`(generation, version)` tuples, where the generation is a monotonic +timestamp and version is a logical clock the increments roughly every +second. These logical clocks allow Cassandra gossip to ignore old +versions of cluster state just by inspecting the logical clocks +presented with gossip messages. + +Every node in the Cassandra cluster runs the gossip task independently +and periodically. Every second, every node in the cluster: + +[arabic] +. Updates the local node's heartbeat state (the version) and constructs +the node's local view of the cluster gossip endpoint state. +. Picks a random other node in the cluster to exchange gossip endpoint +state with. +. Probabilistically attempts to gossip with any unreachable nodes (if +one exists) +. Gossips with a seed node if that didn't happen in step 2. + +When an operator first bootstraps a Cassandra cluster they designate +certain nodes as seed nodes. Any node can be a seed node and the only +difference between seed and non-seed nodes is seed nodes are allowed to +bootstrap into the ring without seeing any other seed nodes. +Furthermore, once a cluster is bootstrapped, seed nodes become +hotspots for gossip due to step 4 above. + +As non-seed nodes must be able to contact at least one seed node in +order to bootstrap into the cluster, it is common to include multiple +seed nodes, often one for each rack or datacenter. Seed nodes are often +chosen using existing off-the-shelf service discovery mechanisms. + +[NOTE] +.Note +==== +Nodes do not have to agree on the seed nodes, and indeed once a cluster +is bootstrapped, newly launched nodes can be configured to use any +existing nodes as seeds. The only advantage to picking the same nodes +as seeds is it increases their usefullness as gossip hotspots. +==== + +Currently, gossip also propagates token metadata and schema +_version_ information. This information forms the control plane for +scheduling data movements and schema pulls. For example, if a node sees +a mismatch in schema version in gossip state, it will schedule a schema +sync task with the other nodes. As token information propagates via +gossip it is also the control plane for teaching nodes which endpoints +own what data. + +=== Ring Membership and Failure Detection + +Gossip forms the basis of ring membership, but the *failure detector* +ultimately makes decisions about if nodes are `UP` or `DOWN`. Every node +in Cassandra runs a variant of the +https://www.computer.org/csdl/proceedings-article/srds/2004/22390066/12OmNvT2phv[Phi +Accrual Failure Detector], in which every node is constantly making an +independent decision of if their peer nodes are available or not. This +decision is primarily based on received heartbeat state. For example, if +a node does not see an increasing heartbeat from a node for a certain +amount of time, the failure detector "convicts" that node, at which +point Cassandra will stop routing reads to it (writes will typically be +written to hints). If/when the node starts heartbeating again, Cassandra +will try to reach out and connect, and if it can open communication +channels it will mark that node as available. + +[NOTE] +.Note +==== +`UP` and `DOWN` state are local node decisions and are not propagated with +gossip. Heartbeat state is propagated with gossip, but nodes will not +consider each other as `UP` until they can successfully message each +other over an actual network channel. +==== + +Cassandra will never remove a node from gossip state without +explicit instruction from an operator via a decommission operation or a +new node bootstrapping with a `replace_address_first_boot` option. This +choice is intentional to allow Cassandra nodes to temporarily fail +without causing data to needlessly re-balance. This also helps to +prevent simultaneous range movements, where multiple replicas of a token +range are moving at the same time, which can violate monotonic +consistency and can even cause data loss. + +== Incremental Scale-out on Commodity Hardware + +Cassandra scales-out to meet the requirements of growth in data size and +request rates. Scaling-out means adding additional nodes to the ring, +and every additional node brings linear improvements in compute and +storage. In contrast, scaling-up implies adding more capacity to the +existing database nodes. Cassandra is also capable of scale-up, and in +certain environments it may be preferable depending on the deployment. +Cassandra gives operators the flexibility to chose either scale-out or +scale-up. + +One key aspect of Dynamo that Cassandra follows is to attempt to run on +commodity hardware, and many engineering choices are made under this +assumption. For example, Cassandra assumes nodes can fail at any time, +auto-tunes to make the best use of CPU and memory resources available +and makes heavy use of advanced compression and caching techniques to +get the most storage out of limited memory and storage capabilities. + +=== Simple Query Model + +Cassandra, like Dynamo, chooses not to provide cross-partition +transactions that are common in SQL Relational Database Management +Systems (RDBMS). This both gives the programmer a simpler read and write +API, and allows Cassandra to more easily scale horizontally since +multi-partition transactions spanning multiple nodes are notoriously +difficult to implement and typically very latent. + +Instead, Cassanda chooses to offer fast, consistent, latency at any +scale for single partition operations, allowing retrieval of entire +partitions or only subsets of partitions based on primary key filters. +Furthermore, Cassandra does support single partition compare and swap +functionality via the lightweight transaction CQL API. + +=== Simple Interface for Storing Records + +Cassandra, in a slight departure from Dynamo, chooses a storage +interface that is more sophisticated then "simple key value" stores but +significantly less complex than SQL relational data models. Cassandra +presents a wide-column store interface, where partitions of data contain +multiple rows, each of which contains a flexible set of individually +typed columns. Every row is uniquely identified by the partition key and +one or more clustering keys, and every row can have as many columns as +needed. + +This allows users to flexibly add new columns to existing datasets as +new requirements surface. Schema changes involve only metadata changes +and run fully concurrently with live workloads. Therefore, users can +safely add columns to existing Cassandra databases while remaining +confident that query performance will not degrade. diff --git a/doc/modules/cassandra/pages/architecture/guarantees.adoc b/doc/modules/cassandra/pages/architecture/guarantees.adoc new file mode 100644 index 000000000000..3313a1140cf6 --- /dev/null +++ b/doc/modules/cassandra/pages/architecture/guarantees.adoc @@ -0,0 +1,108 @@ += Guarantees + +Apache Cassandra is a highly scalable and reliable database. Cassandra +is used in web based applications that serve large number of clients and +the quantity of data processed is web-scale (Petabyte) large. Cassandra +makes some guarantees about its scalability, availability and +reliability. To fully understand the inherent limitations of a storage +system in an environment in which a certain level of network partition +failure is to be expected and taken into account when designing the +system it is important to first briefly introduce the CAP theorem. + +== What is CAP? + +According to the CAP theorem it is not possible for a distributed data +store to provide more than two of the following guarantees +simultaneously. + +* Consistency: Consistency implies that every read receives the most +recent write or errors out +* Availability: Availability implies that every request receives a +response. It is not guaranteed that the response contains the most +recent write or data. +* Partition tolerance: Partition tolerance refers to the tolerance of a +storage system to failure of a network partition. Even if some of the +messages are dropped or delayed the system continues to operate. + +CAP theorem implies that when using a network partition, with the +inherent risk of partition failure, one has to choose between +consistency and availability and both cannot be guaranteed at the same +time. CAP theorem is illustrated in Figure 1. + +image::Figure_1_guarantees.jpg[image] + +Figure 1. CAP Theorem + +High availability is a priority in web based applications and to this +objective Cassandra chooses Availability and Partition Tolerance from +the CAP guarantees, compromising on data Consistency to some extent. + +Cassandra makes the following guarantees. + +* High Scalability +* High Availability +* Durability +* Eventual Consistency of writes to a single table +* Lightweight transactions with linearizable consistency +* Batched writes across multiple tables are guaranteed to succeed +completely or not at all +* Secondary indexes are guaranteed to be consistent with their local +replicas data + +== High Scalability + +Cassandra is a highly scalable storage system in which nodes may be +added/removed as needed. Using gossip-based protocol a unified and +consistent membership list is kept at each node. + +== High Availability + +Cassandra guarantees high availability of data by implementing a +fault-tolerant storage system. Failure detection in a node is detected +using a gossip-based protocol. + +== Durability + +Cassandra guarantees data durability by using replicas. Replicas are +multiple copies of a data stored on different nodes in a cluster. In a +multi-datacenter environment the replicas may be stored on different +datacenters. If one replica is lost due to unrecoverable node/datacenter +failure the data is not completely lost as replicas are still available. + +== Eventual Consistency + +Meeting the requirements of performance, reliability, scalability and +high availability in production Cassandra is an eventually consistent +storage system. Eventually consistent implies that all updates reach all +replicas eventually. Divergent versions of the same data may exist +temporarily but they are eventually reconciled to a consistent state. +Eventual consistency is a tradeoff to achieve high availability and it +involves some read and write latencies. + +== Lightweight transactions with linearizable consistency + +Data must be read and written in a sequential order. Paxos consensus +protocol is used to implement lightweight transactions. Paxos protocol +implements lightweight transactions that are able to handle concurrent +operations using linearizable consistency. Linearizable consistency is +sequential consistency with real-time constraints and it ensures +transaction isolation with compare and set (CAS) transaction. With CAS +replica data is compared and data that is found to be out of date is set +to the most consistent value. Reads with linearizable consistency allow +reading the current state of the data, which may possibly be +uncommitted, without making a new addition or update. + +== Batched Writes + +The guarantee for batched writes across multiple tables is that they +will eventually succeed, or none will. Batch data is first written to +batchlog system data, and when the batch data has been successfully +stored in the cluster the batchlog data is removed. The batch is +replicated to another node to ensure the full batch completes in the +event the coordinator node fails. + +== Secondary Indexes + +A secondary index is an index on a column and is used to query a table +that is normally not queryable. Secondary indexes when built are +guaranteed to be consistent with their local replicas. diff --git a/doc/modules/cassandra/pages/architecture/images/ring.svg b/doc/modules/cassandra/pages/architecture/images/ring.svg new file mode 100644 index 000000000000..d0db8c579e3e --- /dev/null +++ b/doc/modules/cassandra/pages/architecture/images/ring.svg @@ -0,0 +1,11 @@ + + + + + ... + diff --git a/doc/modules/cassandra/pages/architecture/images/vnodes.svg b/doc/modules/cassandra/pages/architecture/images/vnodes.svg new file mode 100644 index 000000000000..71b4fa2d8b90 --- /dev/null +++ b/doc/modules/cassandra/pages/architecture/images/vnodes.svg @@ -0,0 +1,11 @@ + + + + + + diff --git a/doc/modules/cassandra/pages/architecture/index.adoc b/doc/modules/cassandra/pages/architecture/index.adoc new file mode 100644 index 000000000000..c4bef05cfdf0 --- /dev/null +++ b/doc/modules/cassandra/pages/architecture/index.adoc @@ -0,0 +1,9 @@ += Architecture + +This section describes the general architecture of Apache Cassandra. + +* xref:architecture/overview.adoc[Overview] +* xref:architecture/dynamo.adoc[Dynamo] +* xref:architecture/storage_engine.adoc[Storage Engine] +* xref:architecture/guarantees.adoc[Guarantees] +* xref:architecture/snitch.adoc[Snitches] diff --git a/doc/modules/cassandra/pages/architecture/overview.adoc b/doc/modules/cassandra/pages/architecture/overview.adoc new file mode 100644 index 000000000000..605e347830af --- /dev/null +++ b/doc/modules/cassandra/pages/architecture/overview.adoc @@ -0,0 +1,101 @@ += Overview +:exper: experimental + +Apache Cassandra is an open source, distributed, NoSQL database. It +presents a partitioned wide column storage model with eventually +consistent semantics. + +Apache Cassandra was initially designed at +https://www.cs.cornell.edu/projects/ladis2009/papers/lakshman-ladis2009.pdf[Facebook] +using a staged event-driven architecture +(http://www.sosp.org/2001/papers/welsh.pdf[SEDA]) to implement a +combination of Amazon’s +http://courses.cse.tamu.edu/caverlee/csce438/readings/dynamo-paper.pdf[Dynamo] +distributed storage and replication techniques and Google's +https://static.googleusercontent.com/media/research.google.com/en//archive/bigtable-osdi06.pdf[Bigtable] +data and storage engine model. Dynamo and Bigtable were both developed +to meet emerging requirements for scalable, reliable and highly +available storage systems, but each had areas that could be improved. + +Cassandra was designed as a best-in-class combination of both systems to +meet emerging largescale, both in data footprint and query volume, +storage requirements. As applications began to require full global +replication and always available low-latency reads and writes, it became +imperative to design a new kind of database model as the relational +database systems of the time struggled to meet the new requirements of +global scale applications. + +Systems like Cassandra are designed for these challenges and seek the +following design objectives: + +* Full multi-master database replication +* Global availability at low latency +* Scaling out on commodity hardware +* Linear throughput increase with each additional processor +* Online load balancing and cluster growth +* Partitioned key-oriented queries +* Flexible schema + +== Features + +Cassandra provides the Cassandra Query Language (xref:cql/ddl.adoc[CQL]), an SQL-like +language, to create and update database schema and access data. CQL +allows users to organize data within a cluster of Cassandra nodes using: + +* *Keyspace*: Defines how a dataset is replicated, per datacenter. +Replication is the number of copies saved per cluster. +Keyspaces contain tables. +* *Table*: Defines the typed schema for a collection of partitions. +Tables contain partitions, which contain rows, which contain columns. +Cassandra tables can flexibly add new columns to tables with zero downtime. +* *Partition*: Defines the mandatory part of the primary key all rows in +Cassandra must have to identify the node in a cluster where the row is stored. +All performant queries supply the partition key in the query. +* *Row*: Contains a collection of columns identified by a unique primary +key made up of the partition key and optionally additional clustering +keys. +* *Column*: A single datum with a type which belongs to a row. + +CQL supports numerous advanced features over a partitioned dataset such +as: + +* Single partition lightweight transactions with atomic compare and set +semantics. +* User-defined types, functions and aggregates +* Collection types including sets, maps, and lists. +* Local secondary indices +* (Experimental) materialized views + +Cassandra explicitly chooses not to implement operations that require +cross partition coordination as they are typically slow and hard to +provide highly available global semantics. For example Cassandra does +not support: + +* Cross partition transactions +* Distributed joins +* Foreign keys or referential integrity. + +== Operating + +Apache Cassandra configuration settings are configured in the +`cassandra.yaml` file that can be edited by hand or with the aid of +configuration management tools. Some settings can be manipulated live +using an online interface, but others require a restart of the database +to take effect. + +Cassandra provides tools for managing a cluster. The `nodetool` command +interacts with Cassandra's live control interface, allowing runtime +manipulation of many settings from `cassandra.yaml`. The +`auditlogviewer` is used to view the audit logs. The `fqltool` is used +to view, replay and compare full query logs. The `auditlogviewer` and +`fqltool` are new tools in Apache Cassandra {40_version}. + +In addition, Cassandra supports out of the box atomic snapshot +functionality, which presents a point in time snapshot of Cassandra's +data for easy integration with many backup tools. Cassandra also +supports incremental backups where data can be backed up as it is +written. + +Apache Cassandra {40_version} has added several new features including virtual +tables, transient replication ({exper}), audit logging, full query logging, and +support for Java 11 ({exper}). diff --git a/doc/modules/cassandra/pages/architecture/snitch.adoc b/doc/modules/cassandra/pages/architecture/snitch.adoc new file mode 100644 index 000000000000..90b32fb2e2ca --- /dev/null +++ b/doc/modules/cassandra/pages/architecture/snitch.adoc @@ -0,0 +1,74 @@ += Snitch + +In cassandra, the snitch has two functions: + +* it teaches Cassandra enough about your network topology to route +requests efficiently. +* it allows Cassandra to spread replicas around your cluster to avoid +correlated failures. It does this by grouping machines into +"datacenters" and "racks." Cassandra will do its best not to have more +than one replica on the same "rack" (which may not actually be a +physical location). + +== Dynamic snitching + +The dynamic snitch monitor read latencies to avoid reading from hosts +that have slowed down. The dynamic snitch is configured with the +following properties on `cassandra.yaml`: + +* `dynamic_snitch`: whether the dynamic snitch should be enabled or +disabled. +* `dynamic_snitch_update_interval_in_ms`: controls how often to perform +the more expensive part of host score calculation. +* `dynamic_snitch_reset_interval_in_ms`: if set greater than zero, this +will allow 'pinning' of replicas to hosts in order to increase cache +capacity. +* `dynamic_snitch_badness_threshold:`: The badness threshold will +control how much worse the pinned host has to be before the dynamic +snitch will prefer other replicas over it. This is expressed as a double +which represents a percentage. Thus, a value of 0.2 means Cassandra +would continue to prefer the static snitch values until the pinned host +was 20% worse than the fastest. + +== Snitch classes + +The `endpoint_snitch` parameter in `cassandra.yaml` should be set to the +class that implements `IEndPointSnitch` which will be wrapped by the +dynamic snitch and decide if two endpoints are in the same data center +or on the same rack. Out of the box, Cassandra provides the snitch +implementations: + +GossipingPropertyFileSnitch:: + This should be your go-to snitch for production use. The rack and + datacenter for the local node are defined in + cassandra-rackdc.properties and propagated to other nodes via gossip. + If `cassandra-topology.properties` exists, it is used as a fallback, + allowing migration from the PropertyFileSnitch. +SimpleSnitch:: + Treats Strategy order as proximity. This can improve cache locality + when disabling read repair. Only appropriate for single-datacenter + deployments. +PropertyFileSnitch:: + Proximity is determined by rack and data center, which are explicitly + configured in `cassandra-topology.properties`. +Ec2Snitch:: + Appropriate for EC2 deployments in a single Region, or in multiple + regions with inter-region VPC enabled (available since the end of + 2017, see + https://aws.amazon.com/about-aws/whats-new/2017/11/announcing-support-for-inter-region-vpc-peering/[AWS + announcement]). Loads Region and Availability Zone information from + the EC2 API. The Region is treated as the datacenter, and the + Availability Zone as the rack. Only private IPs are used, so this will + work across multiple regions only if inter-region VPC is enabled. +Ec2MultiRegionSnitch:: + Uses public IPs as broadcast_address to allow cross-region + connectivity (thus, you should set seed addresses to the public IP as + well). You will need to open the `storage_port` or `ssl_storage_port` + on the public IP firewall (For intra-Region traffic, Cassandra will + switch to the private IP after establishing a connection). +RackInferringSnitch:: + Proximity is determined by rack and data center, which are assumed to + correspond to the 3rd and 2nd octet of each node's IP address, + respectively. Unless this happens to match your deployment + conventions, this is best used as an example of writing a custom + Snitch class and is provided in that spirit. diff --git a/doc/modules/cassandra/pages/architecture/storage_engine.adoc b/doc/modules/cassandra/pages/architecture/storage_engine.adoc new file mode 100644 index 000000000000..77c52e5d52f1 --- /dev/null +++ b/doc/modules/cassandra/pages/architecture/storage_engine.adoc @@ -0,0 +1,225 @@ += Storage Engine + +[[commit-log]] +== CommitLog + +Commitlogs are an append only log of all mutations local to a Cassandra +node. Any data written to Cassandra will first be written to a commit +log before being written to a memtable. This provides durability in the +case of unexpected shutdown. On startup, any mutations in the commit log +will be applied to memtables. + +All mutations write optimized by storing in commitlog segments, reducing +the number of seeks needed to write to disk. Commitlog Segments are +limited by the `commitlog_segment_size_in_mb` option, once the size is +reached, a new commitlog segment is created. Commitlog segments can be +archived, deleted, or recycled once all its data has been flushed to +SSTables. Commitlog segments are truncated when Cassandra has written +data older than a certain point to the SSTables. Running "nodetool +drain" before stopping Cassandra will write everything in the memtables +to SSTables and remove the need to sync with the commitlogs on startup. + +* `commitlog_segment_size_in_mb`: The default size is 32, which is +almost always fine, but if you are archiving commitlog segments (see +commitlog_archiving.properties), then you probably want a finer +granularity of archiving; 8 or 16 MB is reasonable. Max mutation size is +also configurable via `max_mutation_size_in_kb` setting in `cassandra.yaml`. +The default is half the size `commitlog_segment_size_in_mb * 1024`. + +**NOTE: If `max_mutation_size_in_kb` is set explicitly then +`commitlog_segment_size_in_mb` must be set to at least twice the size of +`max_mutation_size_in_kb / 1024`**. + +Commitlogs are an append only log of all mutations local to a Cassandra +node. Any data written to Cassandra will first be written to a commit +log before being written to a memtable. This provides durability in the +case of unexpected shutdown. On startup, any mutations in the commit log +will be applied. + +* `commitlog_sync`: may be either _periodic_ or _batch_. +** `batch`: In batch mode, Cassandra won’t ack writes until the commit +log has been fsynced to disk. It will wait +"commitlog_sync_batch_window_in_ms" milliseconds between fsyncs. This +window should be kept short because the writer threads will be unable to +do extra work while waiting. You may need to increase concurrent_writes +for the same reason. ++ +- `commitlog_sync_batch_window_in_ms`: Time to wait between "batch" +fsyncs _Default Value:_ 2 +** `periodic`: In periodic mode, writes are immediately ack'ed, and the +CommitLog is simply synced every "commitlog_sync_period_in_ms" +milliseconds. ++ +- `commitlog_sync_period_in_ms`: Time to wait between "periodic" fsyncs +_Default Value:_ 10000 + +_Default Value:_ batch + +** NOTE: In the event of an unexpected shutdown, Cassandra can lose up +to the sync period or more if the sync is delayed. If using "batch" +mode, it is recommended to store commitlogs in a separate, dedicated +device.* + +* `commitlog_directory`: This option is commented out by default When +running on magnetic HDD, this should be a separate spindle than the data +directories. If not set, the default directory is +$CASSANDRA_HOME/data/commitlog. + +_Default Value:_ /var/lib/cassandra/commitlog + +* `commitlog_compression`: Compression to apply to the commitlog. If +omitted, the commit log will be written uncompressed. LZ4, Snappy, +Deflate and Zstd compressors are supported. + +(Default Value: (complex option): + +[source, yaml] +---- +# - class_name: LZ4Compressor +# parameters: +---- + +* `commitlog_total_space_in_mb`: Total space to use for commit logs on +disk. + +If space gets above this value, Cassandra will flush every dirty CF in +the oldest segment and remove it. So a small total commitlog space will +tend to cause more flush activity on less-active columnfamilies. + +The default value is the smaller of 8192, and 1/4 of the total space of +the commitlog volume. + +_Default Value:_ 8192 + +== Memtables + +Memtables are in-memory structures where Cassandra buffers writes. In +general, there is one active memtable per table. Eventually, memtables +are flushed onto disk and become immutable link:#sstables[SSTables]. +This can be triggered in several ways: + +* The memory usage of the memtables exceeds the configured threshold +(see `memtable_cleanup_threshold`) +* The `commit-log` approaches its maximum size, and forces memtable +flushes in order to allow commitlog segments to be freed + +Memtables may be stored entirely on-heap or partially off-heap, +depending on `memtable_allocation_type`. + +== SSTables + +SSTables are the immutable data files that Cassandra uses for persisting +data on disk. + +As SSTables are flushed to disk from `memtables` or are streamed from +other nodes, Cassandra triggers compactions which combine multiple +SSTables into one. Once the new SSTable has been written, the old +SSTables can be removed. + +Each SSTable is comprised of multiple components stored in separate +files: + +`Data.db`:: + The actual data, i.e. the contents of rows. +`Index.db`:: + An index from partition keys to positions in the `Data.db` file. For + wide partitions, this may also include an index to rows within a + partition. +`Summary.db`:: + A sampling of (by default) every 128th entry in the `Index.db` file. +`Filter.db`:: + A Bloom Filter of the partition keys in the SSTable. +`CompressionInfo.db`:: + Metadata about the offsets and lengths of compression chunks in the + `Data.db` file. +`Statistics.db`:: + Stores metadata about the SSTable, including information about + timestamps, tombstones, clustering keys, compaction, repair, + compression, TTLs, and more. +`Digest.crc32`:: + A CRC-32 digest of the `Data.db` file. +`TOC.txt`:: + A plain text list of the component files for the SSTable. + +Within the `Data.db` file, rows are organized by partition. These +partitions are sorted in token order (i.e. by a hash of the partition +key when the default partitioner, `Murmur3Partition`, is used). Within a +partition, rows are stored in the order of their clustering keys. + +SSTables can be optionally compressed using block-based compression. + +== SSTable Versions + +This section was created using the following +https://gist.github.com/shyamsalimkumar/49a61e5bc6f403d20c55[gist] which +utilized this original +http://www.bajb.net/2013/03/cassandra-sstable-format-version-numbers/[source]. + +The version numbers, to date are: + +=== Version 0 + +* b (0.7.0): added version to sstable filenames +* c (0.7.0): bloom filter component computes hashes over raw key bytes +instead of strings +* d (0.7.0): row size in data component becomes a long instead of int +* e (0.7.0): stores undecorated keys in data and index components +* f (0.7.0): switched bloom filter implementations in data component +* g (0.8): tracks flushed-at context in metadata component + +=== Version 1 + +* h (1.0): tracks max client timestamp in metadata component +* hb (1.0.3): records compression ration in metadata component +* hc (1.0.4): records partitioner in metadata component +* hd (1.0.10): includes row tombstones in maxtimestamp +* he (1.1.3): includes ancestors generation in metadata component +* hf (1.1.6): marker that replay position corresponds to 1.1.5+ +millis-based id (see CASSANDRA-4782) +* ia (1.2.0): +** column indexes are promoted to the index file +** records estimated histogram of deletion times in tombstones +** bloom filter (keys and columns) upgraded to Murmur3 +* ib (1.2.1): tracks min client timestamp in metadata component +* ic (1.2.5): omits per-row bloom filter of column names + +=== Version 2 + +* ja (2.0.0): +** super columns are serialized as composites (note that there is no +real format change, this is mostly a marker to know if we should expect +super columns or not. We do need a major version bump however, because +we should not allow streaming of super columns into this new format) +** tracks max local deletiontime in sstable metadata +** records bloom_filter_fp_chance in metadata component +** remove data size and column count from data file (CASSANDRA-4180) +** tracks max/min column values (according to comparator) +* jb (2.0.1): +** switch from crc32 to adler32 for compression checksums +** checksum the compressed data +* ka (2.1.0): +** new Statistics.db file format +** index summaries can be downsampled and the sampling level is +persisted +** switch uncompressed checksums to adler32 +** tracks presense of legacy (local and remote) counter shards +* la (2.2.0): new file name format +* lb (2.2.7): commit log lower bound included + +=== Version 3 + +* ma (3.0.0): +** swap bf hash order +** store rows natively +* mb (3.0.7, 3.7): commit log lower bound included +* mc (3.0.8, 3.9): commit log intervals included + +=== Example Code + +The following example is useful for finding all sstables that do not +match the "ib" SSTable version + +[source,bash] +---- +include:example$find_sstables.sh[] +---- diff --git a/doc/modules/cassandra/pages/configuration/cass_cl_archive_file.adoc b/doc/modules/cassandra/pages/configuration/cass_cl_archive_file.adoc new file mode 100644 index 000000000000..f7b07887ed23 --- /dev/null +++ b/doc/modules/cassandra/pages/configuration/cass_cl_archive_file.adoc @@ -0,0 +1,48 @@ +[[cassandra-cl-archive]] +== commitlog-archiving.properties file + +The `commitlog-archiving.properties` configuration file can optionally +set commands that are executed when archiving or restoring a commitlog +segment. + +== Options + +`archive_command=` ------One command can be inserted with %path +and %name arguments. %path is the fully qualified path of the commitlog +segment to archive. %name is the filename of the commitlog. STDOUT, +STDIN, or multiple commands cannot be executed. If multiple commands are +required, add a pointer to a script in this option. + +*Example:* archive_command=/bin/ln %path /backup/%name + +*Default value:* blank + +`restore_command=` ------One command can be inserted with %from +and %to arguments. %from is the fully qualified path to an archived +commitlog segment using the specified restore directories. %to defines +the directory to the live commitlog location. + +*Example:* restore_command=/bin/cp -f %from %to + +*Default value:* blank + +`restore_directories=` ------Defines the directory to scan +the recovery files into. + +*Default value:* blank + +`restore_point_in_time=` ------Restore mutations created up +to and including this timestamp in GMT in the format +`yyyy:MM:dd HH:mm:ss`. Recovery will continue through the segment when +the first client-supplied timestamp greater than this time is +encountered, but only mutations less than or equal to this timestamp +will be applied. + +*Example:* 2020:04:31 20:43:12 + +*Default value:* blank + +`precision=` ------Precision of the timestamp used +in the inserts. Choice is generally MILLISECONDS or MICROSECONDS + +*Default value:* MICROSECONDS diff --git a/doc/modules/cassandra/pages/configuration/cass_env_sh_file.adoc b/doc/modules/cassandra/pages/configuration/cass_env_sh_file.adoc new file mode 100644 index 000000000000..d895186246e5 --- /dev/null +++ b/doc/modules/cassandra/pages/configuration/cass_env_sh_file.adoc @@ -0,0 +1,162 @@ += cassandra-env.sh file + +The `cassandra-env.sh` bash script file can be used to pass additional +options to the Java virtual machine (JVM), such as maximum and minimum +heap size, rather than setting them in the environment. If the JVM +settings are static and do not need to be computed from the node +characteristics, the `cassandra-jvm-options` files should be used +instead. For example, commonly computed values are the heap sizes, using +the system values. + +For example, add +`JVM_OPTS="$JVM_OPTS -Dcassandra.load_ring_state=false"` to the +`cassandra_env.sh` file and run the command-line `cassandra` to start. +The option is set from the `cassandra-env.sh` file, and is equivalent to +starting Cassandra with the command-line option +`cassandra -Dcassandra.load_ring_state=false`. + +The `-D` option specifies the start-up parameters in both the command +line and `cassandra-env.sh` file. The following options are available: + +== `cassandra.auto_bootstrap=false` + +Facilitates setting auto_bootstrap to false on initial set-up of the +cluster. The next time you start the cluster, you do not need to change +the `cassandra.yaml` file on each node to revert to true, the default +value. + +== `cassandra.available_processors=` + +In a multi-instance deployment, multiple Cassandra instances will +independently assume that all CPU processors are available to it. This +setting allows you to specify a smaller set of processors. + +== `cassandra.boot_without_jna=true` + +If JNA fails to initialize, Cassandra fails to boot. Use this command to +boot Cassandra without JNA. + +== `cassandra.config=` + +The directory location of the `cassandra.yaml file`. The default +location depends on the type of installation. + +== `cassandra.ignore_dynamic_snitch_severity=true|false` + +Setting this property to true causes the dynamic snitch to ignore the +severity indicator from gossip when scoring nodes. Explore failure +detection and recovery and dynamic snitching for more information. + +*Default:* false + +== `cassandra.initial_token=` + +Use when virtual nodes (vnodes) are not used. Sets the initial +partitioner token for a node the first time the node is started. Note: +Vnodes are highly recommended as they automatically select tokens. + +*Default:* disabled + +== `cassandra.join_ring=true|false` + +Set to false to start Cassandra on a node but not have the node join the +cluster. You can use `nodetool join` and a JMX call to join the ring +afterwards. + +*Default:* true + +== `cassandra.load_ring_state=true|false` + +Set to false to clear all gossip state for the node on restart. + +*Default:* true + +== `cassandra.metricsReporterConfigFile=` + +Enable pluggable metrics reporter. Explore pluggable metrics reporting +for more information. + +== `cassandra.partitioner=` + +Set the partitioner. + +*Default:* org.apache.cassandra.dht.Murmur3Partitioner + +== `cassandra.prepared_statements_cache_size_in_bytes=` + +Set the cache size for prepared statements. + +== `cassandra.replace_address=|` + +To replace a node that has died, restart a new node in its place +specifying the `listen_address` or `broadcast_address` that the new node +is assuming. The new node must not have any data in its data directory, +the same state as before bootstrapping. Note: The `broadcast_address` +defaults to the `listen_address` except when using the +`Ec2MultiRegionSnitch`. + +== `cassandra.replayList=
` + +Allow restoring specific tables from an archived commit log. + +== `cassandra.ring_delay_ms=` + +Defines the amount of time a node waits to hear from other nodes before +formally joining the ring. + +*Default:* 1000ms + +== `cassandra.native_transport_port=` + +Set the port on which the CQL native transport listens for clients. + +*Default:* 9042 + +== `cassandra.rpc_port=` + +Set the port for the Thrift RPC service, which is used for client +connections. + +*Default:* 9160 + +== `cassandra.storage_port=` + +Set the port for inter-node communication. + +*Default:* 7000 + +== `cassandra.ssl_storage_port=` + +Set the SSL port for encrypted communication. + +*Default:* 7001 + +== `cassandra.start_native_transport=true|false` + +Enable or disable the native transport server. See +`start_native_transport` in `cassandra.yaml`. + +*Default:* true + +== `cassandra.start_rpc=true|false` + +Enable or disable the Thrift RPC server. + +*Default:* true + +== `cassandra.triggers_dir=` + +Set the default location for the trigger JARs. + +*Default:* conf/triggers + +== `cassandra.write_survey=true` + +For testing new compaction and compression strategies. It allows you to +experiment with different strategies and benchmark write performance +differences without affecting the production workload. + +== `consistent.rangemovement=true|false` + +Set to true makes Cassandra perform bootstrap safely without violating +consistency. False disables this. diff --git a/doc/modules/cassandra/pages/configuration/cass_jvm_options_file.adoc b/doc/modules/cassandra/pages/configuration/cass_jvm_options_file.adoc new file mode 100644 index 000000000000..b9a312c34092 --- /dev/null +++ b/doc/modules/cassandra/pages/configuration/cass_jvm_options_file.adoc @@ -0,0 +1,22 @@ += jvm-* files + +Several files for JVM configuration are included in Cassandra. The +`jvm-server.options` file, and corresponding files `jvm8-server.options` +and `jvm11-server.options` are the main file for settings that affect +the operation of the Cassandra JVM on cluster nodes. The file includes +startup parameters, general JVM settings such as garbage collection, and +heap settings. The `jvm-clients.options` and corresponding +`jvm8-clients.options` and `jvm11-clients.options` files can be used to +configure JVM settings for clients like `nodetool` and the `sstable` +tools. + +See each file for examples of settings. + +[NOTE] +.Note +==== +The `jvm-*` files replace the `cassandra-envsh` file used in Cassandra +versions prior to Cassandra 3.0. The `cassandra-env.sh` bash script file +is still useful if JVM settings must be dynamically calculated based on +system settings. The `jvm-*` files only store static JVM settings. +==== diff --git a/doc/modules/cassandra/pages/configuration/cass_logback_xml_file.adoc b/doc/modules/cassandra/pages/configuration/cass_logback_xml_file.adoc new file mode 100644 index 000000000000..e673622099d6 --- /dev/null +++ b/doc/modules/cassandra/pages/configuration/cass_logback_xml_file.adoc @@ -0,0 +1,166 @@ += logback.xml file + +The `logback.xml` configuration file can optionally set logging levels +for the logs written to `system.log` and `debug.log`. The logging levels +can also be set using `nodetool setlogginglevels`. + +== Options + +=== `appender name=""...` + +Specify log type and settings. Possible appender names are: `SYSTEMLOG`, +`DEBUGLOG`, `ASYNCDEBUGLOG`, and `STDOUT`. `SYSTEMLOG` ensures that WARN +and ERROR message are written synchronously to the specified file. +`DEBUGLOG` and `ASYNCDEBUGLOG` ensure that DEBUG messages are written +either synchronously or asynchronously, respectively, to the specified +file. `STDOUT` writes all messages to the console in a human-readable +format. + +*Example:* + +=== ` ` + +Specify the filename for a log. + +*Example:* $\{cassandra.logdir}/system.log + +=== ` ` + +Specify the level for a log. Part of the filter. Levels are: `ALL`, +`TRACE`, `DEBUG`, `INFO`, `WARN`, `ERROR`, `OFF`. `TRACE` creates the +most verbose log, `ERROR` the least. + +[NOTE] +.Note +==== +Note: Increasing logging levels can generate heavy logging output on +a moderately trafficked cluster. You can use the +`nodetool getlogginglevels` command to see the current logging +configuration. +==== + +*Default:* INFO + +*Example:* INFO + +=== ` ... ` + +Specify the policy for rolling logs over to an archive. + +*Example:* + +=== ` ` + +Specify the pattern information for rolling over the log to archive. +Part of the rolling policy. + +*Example:* +$\{cassandra.logdir}/system.log.%d\{yyyy-MM-dd}.%i.zip + +=== ` ` + +Specify the maximum file size to trigger rolling a log. Part of the +rolling policy. + +*Example:* 50MB + +=== ` ` + +Specify the maximum history in days to trigger rolling a log. Part of +the rolling policy. + +*Example:* 7 + +=== ` ... ` + +Specify the format of the message. Part of the rolling policy. + +*Example:* 7 *Example:* +%-5level [%thread] %date\{ISO8601} %F:%L - %msg%n + + +=== Contents of default `logback.xml` + +[source,XML] +---- + + + + + + + + + + INFO + + ${cassandra.logdir}/system.log + + + ${cassandra.logdir}/system.log.%d{yyyy-MM-dd}.%i.zip + + 50MB + 7 + 5GB + + + %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + + + + + + + ${cassandra.logdir}/debug.log + + + ${cassandra.logdir}/debug.log.%d{yyyy-MM-dd}.%i.zip + + 50MB + 7 + 5GB + + + %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + + + + + + + 1024 + 0 + true + + + + + + + + INFO + + + %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + + + + + + + + + + + + + + + +---- diff --git a/doc/modules/cassandra/pages/configuration/cass_rackdc_file.adoc b/doc/modules/cassandra/pages/configuration/cass_rackdc_file.adoc new file mode 100644 index 000000000000..0b370c9cc591 --- /dev/null +++ b/doc/modules/cassandra/pages/configuration/cass_rackdc_file.adoc @@ -0,0 +1,79 @@ += cassandra-rackdc.properties file + +Several `snitch` options use the `cassandra-rackdc.properties` +configuration file to determine which `datacenters` and racks cluster +nodes belong to. Information about the network topology allows requests +to be routed efficiently and to distribute replicas evenly. The +following snitches can be configured here: + +* GossipingPropertyFileSnitch +* AWS EC2 single-region snitch +* AWS EC2 multi-region snitch + +The GossipingPropertyFileSnitch is recommended for production. This +snitch uses the datacenter and rack information configured in a local +node's `cassandra-rackdc.properties` file and propagates the information +to other nodes using `gossip`. It is the default snitch and the settings +in this properties file are enabled. + +The AWS EC2 snitches are configured for clusters in AWS. This snitch +uses the `cassandra-rackdc.properties` options to designate one of two +AWS EC2 datacenter and rack naming conventions: + +* legacy: Datacenter name is the part of the availability zone name +preceding the last "-" when the zone ends in -1 and includes the number +if not -1. Rack name is the portion of the availability zone name +following the last "-". ++ +____ +Examples: us-west-1a => dc: us-west, rack: 1a; us-west-2b => dc: +us-west-2, rack: 2b; +____ +* standard: Datacenter name is the standard AWS region name, including +the number. Rack name is the region plus the availability zone letter. ++ +____ +Examples: us-west-1a => dc: us-west-1, rack: us-west-1a; us-west-2b => +dc: us-west-2, rack: us-west-2b; +____ + +Either snitch can set to use the local or internal IP address when +multiple datacenters are not communicating. + +== GossipingPropertyFileSnitch + +=== `dc` + +Name of the datacenter. The value is case-sensitive. + +*Default value:* DC1 + +=== `rack` + +Rack designation. The value is case-sensitive. + +*Default value:* RAC1 + +== AWS EC2 snitch + +=== `ec2_naming_scheme` + +Datacenter and rack naming convention. Options are `legacy` or +`standard` (default). *This option is commented out by default.* + +*Default value:* standard + +[NOTE] +.Note +==== +YOU MUST USE THE `legacy` VALUE IF YOU ARE UPGRADING A PRE-4.0 CLUSTER. +==== + +== Either snitch + +=== `prefer_local` + +Option to use the local or internal IP address when communication is not +across different datacenters. *This option is commented out by default.* + +*Default value:* true diff --git a/doc/modules/cassandra/pages/configuration/cass_topo_file.adoc b/doc/modules/cassandra/pages/configuration/cass_topo_file.adoc new file mode 100644 index 000000000000..5ca82219b5c2 --- /dev/null +++ b/doc/modules/cassandra/pages/configuration/cass_topo_file.adoc @@ -0,0 +1,53 @@ +[[cassandra-topology]] +cassandra-topologies.properties file ================================ + +The `PropertyFileSnitch` `snitch` option uses the +`cassandra-topologies.properties` configuration file to determine which +`datacenters` and racks cluster nodes belong to. If other snitches are +used, the :ref:cassandra_rackdc must be used. The snitch determines +network topology (proximity by rack and datacenter) so that requests are +routed efficiently and allows the database to distribute replicas +evenly. + +Include every node in the cluster in the properties file, defining your +datacenter names as in the keyspace definition. The datacenter and rack +names are case-sensitive. + +The `cassandra-topologies.properties` file must be copied identically to +every node in the cluster. + +== Example + +This example uses three datacenters: + +[source,bash] +---- +# datacenter One + +175.56.12.105=DC1:RAC1 +175.50.13.200=DC1:RAC1 +175.54.35.197=DC1:RAC1 + +120.53.24.101=DC1:RAC2 +120.55.16.200=DC1:RAC2 +120.57.102.103=DC1:RAC2 + +# datacenter Two + +110.56.12.120=DC2:RAC1 +110.50.13.201=DC2:RAC1 +110.54.35.184=DC2:RAC1 + +50.33.23.120=DC2:RAC2 +50.45.14.220=DC2:RAC2 +50.17.10.203=DC2:RAC2 + +# datacenter Three + +172.106.12.120=DC3:RAC1 +172.106.12.121=DC3:RAC1 +172.106.12.122=DC3:RAC1 + +# default for unknown nodes +default =DC3:RAC1 +---- diff --git a/doc/modules/cassandra/pages/configuration/index.adoc b/doc/modules/cassandra/pages/configuration/index.adoc new file mode 100644 index 000000000000..7c8ee367a90f --- /dev/null +++ b/doc/modules/cassandra/pages/configuration/index.adoc @@ -0,0 +1,11 @@ += Configuring Cassandra + +This section describes how to configure Apache Cassandra. + +* xref:configuration/cass_yaml_file.adoc[cassandra.yaml] +* xref:configuration/cass_rackdc_file.adoc[cassandra-rackdc.properties] +* xref:configuration/cass_env_sh_file.adoc[cassandra-env.sh] +* xref:configuration/cass_topo_file.adoc[cassandra-topologies.properties] +* xref:configuration/cass_cl_archive_file.adoc[commitlog-archiving.properties] +* xref:configuration/cass_cl_logback_xml_file.adoc[logback.xml] +* xref:configuration/cass_jvm_options_file.adoc[jvm-* files] diff --git a/doc/modules/cassandra/pages/cql/SASI.adoc b/doc/modules/cassandra/pages/cql/SASI.adoc new file mode 100644 index 000000000000..c24009ad24c3 --- /dev/null +++ b/doc/modules/cassandra/pages/cql/SASI.adoc @@ -0,0 +1,809 @@ +== SASIIndex + +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/SASIIndex.java[`SASIIndex`], +or ``SASI`` for short, is an implementation of Cassandra's `Index` +interface that can be used as an alternative to the existing +implementations. SASI's indexing and querying improves on existing +implementations by tailoring it specifically to Cassandra’s needs. SASI +has superior performance in cases where queries would previously require +filtering. In achieving this performance, SASI aims to be significantly +less resource intensive than existing implementations, in memory, disk, +and CPU usage. In addition, SASI supports prefix and contains queries on +strings (similar to SQL’s `LIKE = "foo*"` or `LIKE = "*foo*"'`). + +The following goes on describe how to get up and running with SASI, +demonstrates usage with examples, and provides some details on its +implementation. + +=== Using SASI + +The examples below walk through creating a table and indexes on its +columns, and performing queries on some inserted data. + +The examples below assume the `demo` keyspace has been created and is in +use. + +.... +cqlsh> CREATE KEYSPACE demo WITH replication = { + ... 'class': 'SimpleStrategy', + ... 'replication_factor': '1' + ... }; +cqlsh> USE demo; +.... + +All examples are performed on the `sasi` table: + +.... +cqlsh:demo> CREATE TABLE sasi (id uuid, first_name text, last_name text, + ... age int, height int, created_at bigint, primary key (id)); +.... + +==== Creating Indexes + +To create SASI indexes use CQLs `CREATE CUSTOM INDEX` statement: + +.... +cqlsh:demo> CREATE CUSTOM INDEX ON sasi (first_name) USING 'org.apache.cassandra.index.sasi.SASIIndex' + ... WITH OPTIONS = { + ... 'analyzer_class': + ... 'org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer', + ... 'case_sensitive': 'false' + ... }; + +cqlsh:demo> CREATE CUSTOM INDEX ON sasi (last_name) USING 'org.apache.cassandra.index.sasi.SASIIndex' + ... WITH OPTIONS = {'mode': 'CONTAINS'}; + +cqlsh:demo> CREATE CUSTOM INDEX ON sasi (age) USING 'org.apache.cassandra.index.sasi.SASIIndex'; + +cqlsh:demo> CREATE CUSTOM INDEX ON sasi (created_at) USING 'org.apache.cassandra.index.sasi.SASIIndex' + ... WITH OPTIONS = {'mode': 'SPARSE'}; +.... + +The indexes created have some options specified that customize their +behaviour and potentially performance. The index on `first_name` is +case-insensitive. The analyzers are discussed more in a subsequent +example. The `NonTokenizingAnalyzer` performs no analysis on the text. +Each index has a mode: `PREFIX`, `CONTAINS`, or `SPARSE`, the first +being the default. The `last_name` index is created with the mode +`CONTAINS` which matches terms on suffixes instead of prefix only. +Examples of this are available below and more detail can be found in the +section on link:#ondiskindexbuilder[OnDiskIndex].The `created_at` column +is created with its mode set to `SPARSE`, which is meant to improve +performance of querying large, dense number ranges like timestamps for +data inserted every millisecond. Details of the `SPARSE` implementation +can also be found in the section on the +link:#ondiskindexbuilder[OnDiskIndex]. The `age` index is created with +the default `PREFIX` mode and no case-sensitivity or text analysis +options are specified since the field is numeric. + +After inserting the following data and performing a `nodetool flush`, +SASI performing index flushes to disk can be seen in Cassandra’s logs – +although the direct call to flush is not required (see +link:#indexmemtable[IndexMemtable] for more details). + +.... +cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) + ... VALUES (556ebd54-cbe5-4b75-9aae-bf2a31a24500, 'Pavel', 'Yaskevich', 27, 181, 1442959315018); + +cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) + ... VALUES (5770382a-c56f-4f3f-b755-450e24d55217, 'Jordan', 'West', 26, 173, 1442959315019); + +cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) + ... VALUES (96053844-45c3-4f15-b1b7-b02c441d3ee1, 'Mikhail', 'Stepura', 36, 173, 1442959315020); + +cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) + ... VALUES (f5dfcabe-de96-4148-9b80-a1c41ed276b4, 'Michael', 'Kjellman', 26, 180, 1442959315021); + +cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) + ... VALUES (2970da43-e070-41a8-8bcb-35df7a0e608a, 'Johnny', 'Zhang', 32, 175, 1442959315022); + +cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) + ... VALUES (6b757016-631d-4fdb-ac62-40b127ccfbc7, 'Jason', 'Brown', 40, 182, 1442959315023); + +cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) + ... VALUES (8f909e8a-008e-49dd-8d43-1b0df348ed44, 'Vijay', 'Parthasarathy', 34, 183, 1442959315024); + +cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi; + + first_name | last_name | age | height | created_at +------------+---------------+-----+--------+--------------- + Michael | Kjellman | 26 | 180 | 1442959315021 + Mikhail | Stepura | 36 | 173 | 1442959315020 + Jason | Brown | 40 | 182 | 1442959315023 + Pavel | Yaskevich | 27 | 181 | 1442959315018 + Vijay | Parthasarathy | 34 | 183 | 1442959315024 + Jordan | West | 26 | 173 | 1442959315019 + Johnny | Zhang | 32 | 175 | 1442959315022 + +(7 rows) +.... + +==== Equality & Prefix Queries + +SASI supports all queries already supported by CQL, including LIKE +statement for PREFIX, CONTAINS and SUFFIX searches. + +.... +cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi + ... WHERE first_name = 'Pavel'; + + first_name | last_name | age | height | created_at +-------------+-----------+-----+--------+--------------- + Pavel | Yaskevich | 27 | 181 | 1442959315018 + +(1 rows) +.... + +.... +cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi + ... WHERE first_name = 'pavel'; + + first_name | last_name | age | height | created_at +-------------+-----------+-----+--------+--------------- + Pavel | Yaskevich | 27 | 181 | 1442959315018 + +(1 rows) +.... + +.... +cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi + ... WHERE first_name LIKE 'M%'; + + first_name | last_name | age | height | created_at +------------+-----------+-----+--------+--------------- + Michael | Kjellman | 26 | 180 | 1442959315021 + Mikhail | Stepura | 36 | 173 | 1442959315020 + +(2 rows) +.... + +Of course, the case of the query does not matter for the `first_name` +column because of the options provided at index creation time. + +.... +cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi + ... WHERE first_name LIKE 'm%'; + + first_name | last_name | age | height | created_at +------------+-----------+-----+--------+--------------- + Michael | Kjellman | 26 | 180 | 1442959315021 + Mikhail | Stepura | 36 | 173 | 1442959315020 + +(2 rows) +.... + +==== Compound Queries + +SASI supports queries with multiple predicates, however, due to the +nature of the default indexing implementation, CQL requires the user to +specify `ALLOW FILTERING` to opt-in to the potential performance +pitfalls of such a query. With SASI, while the requirement to include +`ALLOW FILTERING` remains, to reduce modifications to the grammar, the +performance pitfalls do not exist because filtering is not performed. +Details on how SASI joins data from multiple predicates is available +below in the link:#implementation-details[Implementation Details] +section. + +.... +cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi + ... WHERE first_name LIKE 'M%' and age < 30 ALLOW FILTERING; + + first_name | last_name | age | height | created_at +------------+-----------+-----+--------+--------------- + Michael | Kjellman | 26 | 180 | 1442959315021 + +(1 rows) +.... + +==== Suffix Queries + +The next example demonstrates `CONTAINS` mode on the `last_name` column. +By using this mode, predicates can search for any strings containing the +search string as a sub-string. In this case the strings containing ``a'' +or ``an''. + +.... +cqlsh:demo> SELECT * FROM sasi WHERE last_name LIKE '%a%'; + + id | age | created_at | first_name | height | last_name +--------------------------------------+-----+---------------+------------+--------+--------------- + f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | 1442959315021 | Michael | 180 | Kjellman + 96053844-45c3-4f15-b1b7-b02c441d3ee1 | 36 | 1442959315020 | Mikhail | 173 | Stepura + 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | 1442959315018 | Pavel | 181 | Yaskevich + 8f909e8a-008e-49dd-8d43-1b0df348ed44 | 34 | 1442959315024 | Vijay | 183 | Parthasarathy + 2970da43-e070-41a8-8bcb-35df7a0e608a | 32 | 1442959315022 | Johnny | 175 | Zhang + +(5 rows) + +cqlsh:demo> SELECT * FROM sasi WHERE last_name LIKE '%an%'; + + id | age | created_at | first_name | height | last_name +--------------------------------------+-----+---------------+------------+--------+----------- + f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | 1442959315021 | Michael | 180 | Kjellman + 2970da43-e070-41a8-8bcb-35df7a0e608a | 32 | 1442959315022 | Johnny | 175 | Zhang + +(2 rows) +.... + +==== Expressions on Non-Indexed Columns + +SASI also supports filtering on non-indexed columns like `height`. The +expression can only narrow down an existing query using `AND`. + +.... +cqlsh:demo> SELECT * FROM sasi WHERE last_name LIKE '%a%' AND height >= 175 ALLOW FILTERING; + + id | age | created_at | first_name | height | last_name +--------------------------------------+-----+---------------+------------+--------+--------------- + f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | 1442959315021 | Michael | 180 | Kjellman + 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | 1442959315018 | Pavel | 181 | Yaskevich + 8f909e8a-008e-49dd-8d43-1b0df348ed44 | 34 | 1442959315024 | Vijay | 183 | Parthasarathy + 2970da43-e070-41a8-8bcb-35df7a0e608a | 32 | 1442959315022 | Johnny | 175 | Zhang + +(4 rows) +.... + +==== Delimiter based Tokenization Analysis + +A simple text analysis provided is delimiter based tokenization. This +provides an alternative to indexing collections, as delimiter separated +text can be indexed without the overhead of `CONTAINS` mode nor using +`PREFIX` or `SUFFIX` queries. + +.... +cqlsh:demo> ALTER TABLE sasi ADD aliases text; +cqlsh:demo> CREATE CUSTOM INDEX on sasi (aliases) USING 'org.apache.cassandra.index.sasi.SASIIndex' + ... WITH OPTIONS = { + ... 'analyzer_class': 'org.apache.cassandra.index.sasi.analyzer.DelimiterAnalyzer', + ... 'delimiter': ',', + ... 'mode': 'prefix', + ... 'analyzed': 'true'}; +cqlsh:demo> UPDATE sasi SET aliases = 'Mike,Mick,Mikey,Mickey' WHERE id = f5dfcabe-de96-4148-9b80-a1c41ed276b4; +cqlsh:demo> SELECT * FROM sasi WHERE aliases LIKE 'Mikey' ALLOW FILTERING; + + id | age | aliases | created_at | first_name | height | last_name +--------------------------------------+-----+------------------------+---------------+------------+--------+----------- + f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | Mike,Mick,Mikey,Mickey | 1442959315021 | Michael | 180 | Kjellman +.... + +==== Text Analysis (Tokenization and Stemming) + +Lastly, to demonstrate text analysis an additional column is needed on +the table. Its definition, index, and statements to update rows are +shown below. + +.... +cqlsh:demo> ALTER TABLE sasi ADD bio text; +cqlsh:demo> CREATE CUSTOM INDEX ON sasi (bio) USING 'org.apache.cassandra.index.sasi.SASIIndex' + ... WITH OPTIONS = { + ... 'analyzer_class': 'org.apache.cassandra.index.sasi.analyzer.StandardAnalyzer', + ... 'tokenization_enable_stemming': 'true', + ... 'analyzed': 'true', + ... 'tokenization_normalize_lowercase': 'true', + ... 'tokenization_locale': 'en' + ... }; +cqlsh:demo> UPDATE sasi SET bio = 'Software Engineer, who likes distributed systems, doesnt like to argue.' WHERE id = 5770382a-c56f-4f3f-b755-450e24d55217; +cqlsh:demo> UPDATE sasi SET bio = 'Software Engineer, works on the freight distribution at nights and likes arguing' WHERE id = 556ebd54-cbe5-4b75-9aae-bf2a31a24500; +cqlsh:demo> SELECT * FROM sasi; + + id | age | bio | created_at | first_name | height | last_name +--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+--------------- + f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | null | 1442959315021 | Michael | 180 | Kjellman + 96053844-45c3-4f15-b1b7-b02c441d3ee1 | 36 | null | 1442959315020 | Mikhail | 173 | Stepura + 6b757016-631d-4fdb-ac62-40b127ccfbc7 | 40 | null | 1442959315023 | Jason | 182 | Brown + 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich + 8f909e8a-008e-49dd-8d43-1b0df348ed44 | 34 | null | 1442959315024 | Vijay | 183 | Parthasarathy + 5770382a-c56f-4f3f-b755-450e24d55217 | 26 | Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 | Jordan | 173 | West + 2970da43-e070-41a8-8bcb-35df7a0e608a | 32 | null | 1442959315022 | Johnny | 175 | Zhang + +(7 rows) +.... + +Index terms and query search strings are stemmed for the `bio` column +because it was configured to use the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java[`StandardAnalyzer`] +and `analyzed` is set to `true`. The `tokenization_normalize_lowercase` +is similar to the `case_sensitive` property but for the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java[`StandardAnalyzer`]. +These query demonstrates the stemming applied by +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java[`StandardAnalyzer`]. + +.... +cqlsh:demo> SELECT * FROM sasi WHERE bio LIKE 'distributing'; + + id | age | bio | created_at | first_name | height | last_name +--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+----------- + 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich + 5770382a-c56f-4f3f-b755-450e24d55217 | 26 | Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 | Jordan | 173 | West + +(2 rows) + +cqlsh:demo> SELECT * FROM sasi WHERE bio LIKE 'they argued'; + + id | age | bio | created_at | first_name | height | last_name +--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+----------- + 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich + 5770382a-c56f-4f3f-b755-450e24d55217 | 26 | Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 | Jordan | 173 | West + +(2 rows) + +cqlsh:demo> SELECT * FROM sasi WHERE bio LIKE 'working at the company'; + + id | age | bio | created_at | first_name | height | last_name +--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+----------- + 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich + +(1 rows) + +cqlsh:demo> SELECT * FROM sasi WHERE bio LIKE 'soft eng'; + + id | age | bio | created_at | first_name | height | last_name +--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+----------- + 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich + 5770382a-c56f-4f3f-b755-450e24d55217 | 26 | Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 | Jordan | 173 | West + +(2 rows) +.... + +=== Implementation Details + +While SASI, at the surface, is simply an implementation of the `Index` +interface, at its core there are several data structures and algorithms +used to satisfy it. These are described here. Additionally, the changes +internal to Cassandra to support SASI’s integration are described. + +The `Index` interface divides responsibility of the implementer into two +parts: Indexing and Querying. Further, Cassandra makes it possible to +divide those responsibilities into the memory and disk components. SASI +takes advantage of Cassandra’s write-once, immutable, ordered data model +to build indexes along with the flushing of the memtable to disk – this +is the origin of the name ``SSTable Attached Secondary Index''. + +The SASI index data structures are built in memory as the SSTable is +being written and they are flushed to disk before the writing of the +SSTable completes. The writing of each index file only requires +sequential writes to disk. In some cases, partial flushes are performed, +and later stitched back together, to reduce memory usage. These data +structures are optimized for this use case. + +Taking advantage of Cassandra’s ordered data model, at query time, +candidate indexes are narrowed down for searching, minimizing the amount +of work done. Searching is then performed using an efficient method that +streams data off disk as needed. + +==== Indexing + +Per SSTable, SASI writes an index file for each indexed column. The data +for these files is built in memory using the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java[`OnDiskIndexBuilder`]. +Once flushed to disk, the data is read using the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java[`OnDiskIndex`] +class. These are composed of bytes representing indexed terms, organized +for efficient writing or searching respectively. The keys and values +they hold represent tokens and positions in an SSTable and these are +stored per-indexed term in +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTreeBuilder.java[`TokenTreeBuilder`]s +for writing, and +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java[`TokenTree`]s +for querying. These index files are memory mapped after being written to +disk, for quicker access. For indexing data in the memtable, SASI uses +its +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/IndexMemtable.java[`IndexMemtable`] +class. + +===== OnDiskIndex(Builder) + +Each +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java[`OnDiskIndex`] +is an instance of a modified +https://en.wikipedia.org/wiki/Suffix_array[Suffix Array] data structure. +The +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java[`OnDiskIndex`] +is comprised of page-size blocks of sorted terms and pointers to the +terms’ associated data, as well as the data itself, stored also in one +or more page-sized blocks. The +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java[`OnDiskIndex`] +is structured as a tree of arrays, where each level describes the terms +in the level below, the final level being the terms themselves. The +`PointerLevel`s and their `PointerBlock`s contain terms and pointers to +other blocks that _end_ with those terms. The `DataLevel`, the final +level, and its `DataBlock`s contain terms and point to the data itself, +contained in +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java[`TokenTree`]s. + +The terms written to the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java[`OnDiskIndex`] +vary depending on its ``mode'': either `PREFIX`, `CONTAINS`, or +`SPARSE`. In the `PREFIX` and `SPARSE` cases, terms’ exact values are +written exactly once per `OnDiskIndex`. For example, when using a +`PREFIX` index with terms `Jason`, `Jordan`, `Pavel`, all three will be +included in the index. A `CONTAINS` index writes additional terms for +each suffix of each term recursively. Continuing with the example, a +`CONTAINS` index storing the previous terms would also store `ason`, +`ordan`, `avel`, `son`, `rdan`, `vel`, etc. This allows for queries on +the suffix of strings. The `SPARSE` mode differs from `PREFIX` in that +for every 64 blocks of terms a +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java[`TokenTree`] +is built merging all the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java[`TokenTree`]s +for each term into a single one. This copy of the data is used for +efficient iteration of large ranges of e.g. timestamps. The index +``mode'' is configurable per column at index creation time. + +===== TokenTree(Builder) + +The +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java[`TokenTree`] +is an implementation of the well-known +https://en.wikipedia.org/wiki/B%2B_tree[B+-tree] that has been modified +to optimize for its use-case. In particular, it has been optimized to +associate tokens, longs, with a set of positions in an SSTable, also +longs. Allowing the set of long values accommodates the possibility of a +hash collision in the token, but the data structure is optimized for the +unlikely possibility of such a collision. + +To optimize for its write-once environment the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTreeBuilder.java[`TokenTreeBuilder`] +completely loads its interior nodes as the tree is built and it uses the +well-known algorithm optimized for bulk-loading the data structure. + +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java[`TokenTree`]s +provide the means to iterate over tokens, and file positions, that match +a given term, and to skip forward in that iteration, an operation used +heavily at query time. + +===== IndexMemtable + +The +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/IndexMemtable.java[`IndexMemtable`] +handles indexing the in-memory data held in the memtable. The +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/IndexMemtable.java[`IndexMemtable`] +in turn manages either a +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java[`TrieMemIndex`] +or a +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java[`SkipListMemIndex`] +per-column. The choice of which index type is used is data dependent. +The +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java[`TrieMemIndex`] +is used for literal types. `AsciiType` and `UTF8Type` are literal types +by default but any column can be configured as a literal type using the +`is_literal` option at index creation time. For non-literal types the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java[`SkipListMemIndex`] +is used. The +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java[`TrieMemIndex`] +is an implementation that can efficiently support prefix queries on +character-like data. The +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java[`SkipListMemIndex`], +conversely, is better suited for other Cassandra data types like +numbers. + +The +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java[`TrieMemIndex`] +is built using either the `ConcurrentRadixTree` or +`ConcurrentSuffixTree` from the `com.goooglecode.concurrenttrees` +package. The choice between the two is made based on the indexing mode, +`PREFIX` or other modes, and `CONTAINS` mode, respectively. + +The +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java[`SkipListMemIndex`] +is built on top of `java.util.concurrent.ConcurrentSkipListSet`. + +==== Querying + +Responsible for converting the internal `IndexExpression` representation +into SASI’s +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`] +and +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java[`Expression`] +trees, optimizing the trees to reduce the amount of work done, and +driving the query itself, the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`] +is the work horse of SASI’s querying implementation. To efficiently +perform union and intersection operations, SASI provides several +iterators similar to Cassandra’s `MergeIterator`, but tailored +specifically for SASI’s use while including more features. The +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java[`RangeUnionIterator`], +like its name suggests, performs set unions over sets of tokens/keys +matching the query, only reading as much data as it needs from each set +to satisfy the query. The +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java[`RangeIntersectionIterator`], +similar to its counterpart, performs set intersections over its data. + +===== QueryPlan + +The +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`] +instantiated per search query is at the core of SASI’s querying +implementation. Its work can be divided in two stages: analysis and +execution. + +During the analysis phase, +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`] +converts from Cassandra’s internal representation of `IndexExpression`s, +which has also been modified to support encoding queries that contain +ORs and groupings of expressions using parentheses (see the +link:#cassandra-internal-changes[Cassandra Internal Changes] section +below for more details). This process produces a tree of +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`]s, +which in turn may contain +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java[`Expression`]s, +all of which provide an alternative, more efficient, representation of +the query. + +During execution, the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`] +uses the `DecoratedKey`-generating iterator created from the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`] +tree. These keys are read from disk and a final check to ensure they +satisfy the query is made, once again using the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`] +tree. At the point the desired amount of matching data has been found, +or there is no more matching data, the result set is returned to the +coordinator through the existing internal components. + +The number of queries (total/failed/timed-out), and their latencies, are +maintined per-table/column family. + +SASI also supports concurrently iterating terms for the same index +across SSTables. The concurrency factor is controlled by the +`cassandra.search_concurrency_factor` system property. The default is +`1`. + +====== QueryController + +Each +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`] +references a +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java[`QueryController`] +used throughout the execution phase. The +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java[`QueryController`] +has two responsibilities: to manage and ensure the proper cleanup of +resources (indexes), and to strictly enforce the time bound per query, +specified by the user via the range slice timeout. All indexes are +accessed via the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java[`QueryController`] +so that they can be safely released by it later. The +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java[`QueryController`]’s +`checkpoint` function is called in specific places in the execution path +to ensure the time-bound is enforced. + +====== QueryPlan Optimizations + +While in the analysis phase, the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`] +performs several potential optimizations to the query. The goal of these +optimizations is to reduce the amount of work performed during the +execution phase. + +The simplest optimization performed is compacting multiple expressions +joined by logical intersections (`AND`) into a single +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`] +with three or more +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java[`Expression`]s. +For example, the query +`WHERE age < 100 AND fname = 'p*' AND first_name != 'pa*' AND age > 21` +would, without modification, have the following tree: + +.... + ┌───────┐ + ┌────────│ AND │──────┐ + │ └───────┘ │ + ▼ ▼ + ┌───────┐ ┌──────────┐ + ┌─────│ AND │─────┐ │age < 100 │ + │ └───────┘ │ └──────────┘ + ▼ ▼ +┌──────────┐ ┌───────┐ +│ fname=p* │ ┌─│ AND │───┐ +└──────────┘ │ └───────┘ │ + ▼ ▼ + ┌──────────┐ ┌──────────┐ + │fname!=pa*│ │ age > 21 │ + └──────────┘ └──────────┘ +.... + +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`] +will remove the redundant right branch whose root is the final `AND` and +has leaves `fname != pa*` and `age > 21`. These +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java[`Expression`]s +will be compacted into the parent `AND`, a safe operation due to `AND` +being associative and commutative. The resulting tree looks like the +following: + +.... + ┌───────┐ + ┌────────│ AND │──────┐ + │ └───────┘ │ + ▼ ▼ + ┌───────┐ ┌──────────┐ + ┌───────────│ AND │────────┐ │age < 100 │ + │ └───────┘ │ └──────────┘ + ▼ │ ▼ +┌──────────┐ │ ┌──────────┐ +│ fname=p* │ ▼ │ age > 21 │ +└──────────┘ ┌──────────┐ └──────────┘ + │fname!=pa*│ + └──────────┘ +.... + +When excluding results from the result set, using `!=`, the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`] +determines the best method for handling it. For range queries, for +example, it may be optimal to divide the range into multiple parts with +a hole for the exclusion. For string queries, such as this one, it is +more optimal, however, to simply note which data to skip, or exclude, +while scanning the index. Following this optimization the tree looks +like this: + +.... + ┌───────┐ + ┌────────│ AND │──────┐ + │ └───────┘ │ + ▼ ▼ + ┌───────┐ ┌──────────┐ + ┌───────│ AND │────────┐ │age < 100 │ + │ └───────┘ │ └──────────┘ + ▼ ▼ + ┌──────────────────┐ ┌──────────┐ + │ fname=p* │ │ age > 21 │ + │ exclusions=[pa*] │ └──────────┘ + └──────────────────┘ +.... + +The last type of optimization applied, for this query, is to merge range +expressions across branches of the tree – without modifying the meaning +of the query, of course. In this case, because the query contains all +`AND`s the `age` expressions can be collapsed. Along with this +optimization, the initial collapsing of unneeded `AND`s can also be +applied once more to result in this final tree using to execute the +query: + +.... + ┌───────┐ + ┌──────│ AND │───────┐ + │ └───────┘ │ + ▼ ▼ + ┌──────────────────┐ ┌────────────────┐ + │ fname=p* │ │ 21 < age < 100 │ + │ exclusions=[pa*] │ └────────────────┘ + └──────────────────┘ +.... + +===== Operations and Expressions + +As discussed, the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`] +optimizes a tree represented by +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`]s +as interior nodes, and +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java[`Expression`]s +as leaves. The +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`] +class, more specifically, can have zero, one, or two +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`]s +as children and an unlimited number of expressions. The iterators used +to perform the queries, discussed below in the +``Range(Union|Intersection)Iterator'' section, implement the necessary +logic to merge results transparently regardless of the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`]s +children. + +Besides participating in the optimizations performed by the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`], +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`] +is also responsible for taking a row that has been returned by the query +and performing a final validation that it in fact does match. This +`satisfiesBy` operation is performed recursively from the root of the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`] +tree for a given query. These checks are performed directly on the data +in a given row. For more details on how `satisfiesBy` works, see the +documentation +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java#L87-L123[in +the code]. + +===== Range(Union|Intersection)Iterator + +The abstract `RangeIterator` class provides a unified interface over the +two main operations performed by SASI at various layers in the execution +path: set intersection and union. These operations are performed in a +iterated, or ``streaming'', fashion to prevent unneeded reads of +elements from either set. In both the intersection and union cases the +algorithms take advantage of the data being pre-sorted using the same +sort order, e.g. term or token order. + +The +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java[`RangeUnionIterator`] +performs the ``Merge-Join'' portion of the +https://en.wikipedia.org/wiki/Sort-merge_join[Sort-Merge-Join] +algorithm, with the properties of an outer-join, or union. It is +implemented with several optimizations to improve its performance over a +large number of iterators – sets to union. Specifically, the iterator +exploits the likely case of the data having many sub-groups of +overlapping ranges and the unlikely case that all ranges will overlap +each other. For more details see the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java#L9-L21[javadoc]. + +The +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java[`RangeIntersectionIterator`] +itself is not a subclass of `RangeIterator`. It is a container for +several classes, one of which, `AbstractIntersectionIterator`, +sub-classes `RangeIterator`. SASI supports two methods of performing the +intersection operation, and the ability to be adaptive in choosing +between them based on some properties of the data. + +`BounceIntersectionIterator`, and the `BOUNCE` strategy, works like the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java[`RangeUnionIterator`] +in that it performs a ``Merge-Join'', however, its nature is similar to +a inner-join, where like values are merged by a data-specific merge +function (e.g. merging two tokens in a list to lookup in a SSTable +later). See the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java#L88-L101[javadoc] +for more details on its implementation. + +`LookupIntersectionIterator`, and the `LOOKUP` strategy, performs a +different operation, more similar to a lookup in an associative data +structure, or ``hash lookup'' in database terminology. Once again, +details on the implementation can be found in the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java#L199-L208[javadoc]. + +The choice between the two iterators, or the `ADAPTIVE` strategy, is +based upon the ratio of data set sizes of the minimum and maximum range +of the sets being intersected. If the number of the elements in minimum +range divided by the number of elements is the maximum range is less +than or equal to `0.01`, then the `ADAPTIVE` strategy chooses the +`LookupIntersectionIterator`, otherwise the `BounceIntersectionIterator` +is chosen. + +==== The SASIIndex Class + +The above components are glued together by the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/SASIIndex.java[`SASIIndex`] +class which implements `Index`, and is instantiated per-table containing +SASI indexes. It manages all indexes for a table via the +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/conf/DataTracker.java[`sasi.conf.DataTracker`] +and +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/conf/view/View.java[`sasi.conf.view.View`] +components, controls writing of all indexes for an SSTable via its +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/PerSSTableIndexWriter.java[`PerSSTableIndexWriter`], +and initiates searches with `Searcher`. These classes glue the +previously mentioned indexing components together with Cassandra’s +SSTable life-cycle ensuring indexes are not only written when Memtable’s +flush, but also as SSTable’s are compacted. For querying, the `Searcher` +does little but defer to +https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`] +and update e.g. latency metrics exposed by SASI. + +==== Cassandra Internal Changes + +To support the above changes and integrate them into Cassandra a few +minor internal changes were made to Cassandra itself. These are +described here. + +===== SSTable Write Life-cycle Notifications + +The `SSTableFlushObserver` is an observer pattern-like interface, whose +sub-classes can register to be notified about events in the life-cycle +of writing out a SSTable. Sub-classes can be notified when a flush +begins and ends, as well as when each next row is about to be written, +and each next column. SASI’s `PerSSTableIndexWriter`, discussed above, +is the only current subclass. + +==== Limitations and Caveats + +The following are items that can be addressed in future updates but are +not available in this repository or are not currently implemented. + +* The cluster must be configured to use a partitioner that produces +`LongToken`s, e.g. `Murmur3Partitioner`. Other existing partitioners +which don’t produce LongToken e.g. `ByteOrderedPartitioner` and +`RandomPartitioner` will not work with SASI. +* Not Equals and OR support have been removed in this release while +changes are made to Cassandra itself to support them. + +==== Contributors + +* https://github.com/xedin[Pavel Yaskevich] +* https://github.com/jrwest[Jordan West] +* https://github.com/mkjellman[Michael Kjellman] +* https://github.com/jasobrown[Jason Brown] +* https://github.com/mishail[Mikhail Stepura] diff --git a/doc/modules/cassandra/pages/cql/appendices.adoc b/doc/modules/cassandra/pages/cql/appendices.adoc new file mode 100644 index 000000000000..7e17266a3f7e --- /dev/null +++ b/doc/modules/cassandra/pages/cql/appendices.adoc @@ -0,0 +1,179 @@ += Appendices + +[[appendix-A]] +== Appendix A: CQL Keywords + +CQL distinguishes between _reserved_ and _non-reserved_ keywords. +Reserved keywords cannot be used as identifier, they are truly reserved +for the language (but one can enclose a reserved keyword by +double-quotes to use it as an identifier). Non-reserved keywords however +only have a specific meaning in certain context but can used as +identifier otherwise. The only _raison d’être_ of these non-reserved +keywords is convenience: some keyword are non-reserved when it was +always easy for the parser to decide whether they were used as keywords +or not. + +[width="48%",cols="60%,40%",options="header",] +|=== +|Keyword |Reserved? +|`ADD` |yes +|`AGGREGATE` |no +|`ALL` |no +|`ALLOW` |yes +|`ALTER` |yes +|`AND` |yes +|`APPLY` |yes +|`AS` |no +|`ASC` |yes +|`ASCII` |no +|`AUTHORIZE` |yes +|`BATCH` |yes +|`BEGIN` |yes +|`BIGINT` |no +|`BLOB` |no +|`BOOLEAN` |no +|`BY` |yes +|`CALLED` |no +|`CLUSTERING` |no +|`COLUMNFAMILY` |yes +|`COMPACT` |no +|`CONTAINS` |no +|`COUNT` |no +|`COUNTER` |no +|`CREATE` |yes +|`CUSTOM` |no +|`DATE` |no +|`DECIMAL` |no +|`DELETE` |yes +|`DESC` |yes +|`DESCRIBE` |yes +|`DISTINCT` |no +|`DOUBLE` |no +|`DROP` |yes +|`ENTRIES` |yes +|`EXECUTE` |yes +|`EXISTS` |no +|`FILTERING` |no +|`FINALFUNC` |no +|`FLOAT` |no +|`FROM` |yes +|`FROZEN` |no +|`FULL` |yes +|`FUNCTION` |no +|`FUNCTIONS` |no +|`GRANT` |yes +|`IF` |yes +|`IN` |yes +|`INDEX` |yes +|`INET` |no +|`INFINITY` |yes +|`INITCOND` |no +|`INPUT` |no +|`INSERT` |yes +|`INT` |no +|`INTO` |yes +|`JSON` |no +|`KEY` |no +|`KEYS` |no +|`KEYSPACE` |yes +|`KEYSPACES` |no +|`LANGUAGE` |no +|`LIMIT` |yes +|`LIST` |no +|`LOGIN` |no +|`MAP` |no +|`MODIFY` |yes +|`NAN` |yes +|`NOLOGIN` |no +|`NORECURSIVE` |yes +|`NOSUPERUSER` |no +|`NOT` |yes +|`NULL` |yes +|`OF` |yes +|`ON` |yes +|`OPTIONS` |no +|`OR` |yes +|`ORDER` |yes +|`PASSWORD` |no +|`PERMISSION` |no +|`PERMISSIONS` |no +|`PRIMARY` |yes +|`RENAME` |yes +|`REPLACE` |yes +|`RETURNS` |no +|`REVOKE` |yes +|`ROLE` |no +|`ROLES` |no +|`SCHEMA` |yes +|`SELECT` |yes +|`SET` |yes +|`SFUNC` |no +|`SMALLINT` |no +|`STATIC` |no +|`STORAGE` |no +|`STYPE` |no +|`SUPERUSER` |no +|`TABLE` |yes +|`TEXT` |no +|`TIME` |no +|`TIMESTAMP` |no +|`TIMEUUID` |no +|`TINYINT` |no +|`TO` |yes +|`TOKEN` |yes +|`TRIGGER` |no +|`TRUNCATE` |yes +|`TTL` |no +|`TUPLE` |no +|`TYPE` |no +|`UNLOGGED` |yes +|`UPDATE` |yes +|`USE` |yes +|`USER` |no +|`USERS` |no +|`USING` |yes +|`UUID` |no +|`VALUES` |no +|`VARCHAR` |no +|`VARINT` |no +|`WHERE` |yes +|`WITH` |yes +|`WRITETIME` |no +|=== + +== Appendix B: CQL Reserved Types + +The following type names are not currently used by CQL, but are reserved +for potential future use. User-defined types may not use reserved type +names as their name. + +[width="25%",cols="100%",options="header",] +|=== +|type +|`bitstring` +|`byte` +|`complex` +|`enum` +|`interval` +|`macaddr` +|=== + +== Appendix C: Dropping Compact Storage + +Starting version 4.0, Thrift and COMPACT STORAGE is no longer supported. + +`ALTER ... DROP COMPACT STORAGE` statement makes Compact Tables +CQL-compatible, exposing internal structure of Thrift/Compact Tables: + +* CQL-created Compact Tables that have no clustering columns, will +expose an additional clustering column `column1` with `UTF8Type`. +* CQL-created Compact Tables that had no regular columns, will expose a +regular column `value` with `BytesType`. +* For CQL-Created Compact Tables, all columns originally defined as +`regular` will be come `static` +* CQL-created Compact Tables that have clustering but have no regular +columns will have an empty value column (of `EmptyType`) +* SuperColumn Tables (can only be created through Thrift) will expose a +compact value map with an empty name. +* Thrift-created Compact Tables will have types corresponding to their +Thrift definition. diff --git a/doc/modules/cassandra/pages/cql/changes.adoc b/doc/modules/cassandra/pages/cql/changes.adoc new file mode 100644 index 000000000000..1f89469a328d --- /dev/null +++ b/doc/modules/cassandra/pages/cql/changes.adoc @@ -0,0 +1,215 @@ += Changes + +The following describes the changes in each version of CQL. + +== 3.4.5 + +* Adds support for arithmetic operators (`11935`) +* Adds support for `+` and `-` operations on dates (`11936`) +* Adds `currentTimestamp`, `currentDate`, `currentTime` and +`currentTimeUUID` functions (`13132`) + +== 3.4.4 + +* `ALTER TABLE` `ALTER` has been removed; a column's type may not be +changed after creation (`12443`). +* `ALTER TYPE` `ALTER` has been removed; a field's type may not be +changed after creation (`12443`). + +== 3.4.3 + +* Adds a new `duration` `data types ` (`11873`). +* Support for `GROUP BY` (`10707`). +* Adds a `DEFAULT UNSET` option for `INSERT JSON` to ignore omitted +columns (`11424`). +* Allows `null` as a legal value for TTL on insert and update. It will +be treated as equivalent to inserting a 0 (`12216`). + +== 3.4.2 + +* If a table has a non zero `default_time_to_live`, then explicitly +specifying a TTL of 0 in an `INSERT` or `UPDATE` statement will result +in the new writes not having any expiration (that is, an explicit TTL of +0 cancels the `default_time_to_live`). This wasn't the case before and +the `default_time_to_live` was applied even though a TTL had been +explicitly set. +* `ALTER TABLE` `ADD` and `DROP` now allow multiple columns to be +added/removed. +* New `PER PARTITION LIMIT` option for `SELECT` statements (see +https://issues.apache.org/jira/browse/CASSANDRA-7017)[CASSANDRA-7017]. +* `User-defined functions ` can now instantiate +`UDTValue` and `TupleValue` instances via the new `UDFContext` interface +(see +https://issues.apache.org/jira/browse/CASSANDRA-10818)[CASSANDRA-10818]. +* `User-defined types ` may now be stored in a non-frozen form, +allowing individual fields to be updated and deleted in `UPDATE` +statements and `DELETE` statements, respectively. +(https://issues.apache.org/jira/browse/CASSANDRA-7423)[CASSANDRA-7423]). + +== 3.4.1 + +* Adds `CAST` functions. + +== 3.4.0 + +* Support for `materialized views `. +* `DELETE` support for inequality expressions and `IN` restrictions on +any primary key columns. +* `UPDATE` support for `IN` restrictions on any primary key columns. + +== 3.3.1 + +* The syntax `TRUNCATE TABLE X` is now accepted as an alias for +`TRUNCATE X`. + +== 3.3.0 + +* `User-defined functions and aggregates ` are now +supported. +* Allows double-dollar enclosed strings literals as an alternative to +single-quote enclosed strings. +* Introduces Roles to supersede user based authentication and access +control +* New `date`, `time`, `tinyint` and `smallint` `data types ` +have been added. +* `JSON support ` has been added +* Adds new time conversion functions and deprecate `dateOf` and +`unixTimestampOf`. + +== 3.2.0 + +* `User-defined types ` supported. +* `CREATE INDEX` now supports indexing collection columns, including +indexing the keys of map collections through the `keys()` function +* Indexes on collections may be queried using the new `CONTAINS` and +`CONTAINS KEY` operators +* `Tuple types ` were added to hold fixed-length sets of typed +positional fields. +* `DROP INDEX` now supports optionally specifying a keyspace. + +== 3.1.7 + +* `SELECT` statements now support selecting multiple rows in a single +partition using an `IN` clause on combinations of clustering columns. +* `IF NOT EXISTS` and `IF EXISTS` syntax is now supported by +`CREATE USER` and `DROP USER` statements, respectively. + +== 3.1.6 + +* A new `uuid()` method has been added. +* Support for `DELETE ... IF EXISTS` syntax. + +== 3.1.5 + +* It is now possible to group clustering columns in a relation, see +`WHERE ` clauses. +* Added support for `static columns `. + +== 3.1.4 + +* `CREATE INDEX` now allows specifying options when creating CUSTOM +indexes. + +== 3.1.3 + +* Millisecond precision formats have been added to the +`timestamp ` parser. + +== 3.1.2 + +* `NaN` and `Infinity` has been added as valid float constants. They are +now reserved keywords. In the unlikely case you we using them as a +column identifier (or keyspace/table one), you will now need to double +quote them. + +== 3.1.1 + +* `SELECT` statement now allows listing the partition keys (using the +`DISTINCT` modifier). See +https://issues.apache.org/jira/browse/CASSANDRA-4536[CASSANDRA-4536]. +* The syntax `c IN ?` is now supported in `WHERE` clauses. In that case, +the value expected for the bind variable will be a list of whatever type +`c` is. +* It is now possible to use named bind variables (using `:name` instead +of `?`). + +== 3.1.0 + +* `ALTER TABLE` `DROP` option added. +* `SELECT` statement now supports aliases in select clause. Aliases in +WHERE and ORDER BY clauses are not supported. +* `CREATE` statements for `KEYSPACE`, `TABLE` and `INDEX` now supports +an `IF NOT EXISTS` condition. Similarly, `DROP` statements support a +`IF EXISTS` condition. +* `INSERT` statements optionally supports a `IF NOT EXISTS` condition +and `UPDATE` supports `IF` conditions. + +== 3.0.5 + +* `SELECT`, `UPDATE`, and `DELETE` statements now allow empty `IN` +relations (see +https://issues.apache.org/jira/browse/CASSANDRA-5626)[CASSANDRA-5626]. + +== 3.0.4 + +* Updated the syntax for custom `secondary indexes `. +* Non-equal condition on the partition key are now never supported, even +for ordering partitioner as this was not correct (the order was *not* +the one of the type of the partition key). Instead, the `token` method +should always be used for range queries on the partition key (see +`WHERE clauses `). + +== 3.0.3 + +* Support for custom `secondary indexes ` has been +added. + +== 3.0.2 + +* Type validation for the `constants ` has been fixed. For +instance, the implementation used to allow `'2'` as a valid value for an +`int` column (interpreting it has the equivalent of `2`), or `42` as a +valid `blob` value (in which case `42` was interpreted as an hexadecimal +representation of the blob). This is no longer the case, type validation +of constants is now more strict. See the `data types ` +section for details on which constant is allowed for which type. +* The type validation fixed of the previous point has lead to the +introduction of blobs constants to allow the input of blobs. Do note +that while the input of blobs as strings constant is still supported by +this version (to allow smoother transition to blob constant), it is now +deprecated and will be removed by a future version. If you were using +strings as blobs, you should thus update your client code ASAP to switch +blob constants. +* A number of functions to convert native types to blobs have also been +introduced. Furthermore the token function is now also allowed in select +clauses. See the `section on functions ` for details. + +== 3.0.1 + +* Date strings (and timestamps) are no longer accepted as valid +`timeuuid` values. Doing so was a bug in the sense that date string are +not valid `timeuuid`, and it was thus resulting in +https://issues.apache.org/jira/browse/CASSANDRA-4936[confusing +behaviors]. However, the following new methods have been added to help +working with `timeuuid`: `now`, `minTimeuuid`, `maxTimeuuid` , `dateOf` +and `unixTimestampOf`. +* Float constants now support the exponent notation. In other words, +`4.2E10` is now a valid floating point value. + +== Versioning + +Versioning of the CQL language adheres to the http://semver.org[Semantic +Versioning] guidelines. Versions take the form X.Y.Z where X, Y, and Z +are integer values representing major, minor, and patch level +respectively. There is no correlation between Cassandra release versions +and the CQL language version. + +[cols=",",options="header",] +|=== +|version |description +| Major | The major version _must_ be bumped when backward incompatible changes +are introduced. This should rarely occur. +| Minor | Minor version increments occur when new, but backward compatible, +functionality is introduced. +| Patch | The patch version is incremented when bugs are fixed. +|=== diff --git a/doc/modules/cassandra/pages/cql/cql_singlefile.adoc b/doc/modules/cassandra/pages/cql/cql_singlefile.adoc new file mode 100644 index 000000000000..e2fea00dc010 --- /dev/null +++ b/doc/modules/cassandra/pages/cql/cql_singlefile.adoc @@ -0,0 +1,3904 @@ +== Cassandra Query Language (CQL) v3.4.3 + +\{toc:maxLevel=3} + +=== CQL Syntax + +==== Preamble + +This document describes the Cassandra Query Language (CQL) version 3. +CQL v3 is not backward compatible with CQL v2 and differs from it in +numerous ways. Note that this document describes the last version of the +languages. However, the link:#changes[changes] section provides the diff +between the different versions of CQL v3. + +CQL v3 offers a model very close to SQL in the sense that data is put in +_tables_ containing _rows_ of _columns_. For that reason, when used in +this document, these terms (tables, rows and columns) have the same +definition than they have in SQL. But please note that as such, they do +*not* refer to the concept of rows and columns found in the internal +implementation of Cassandra and in the thrift and CQL v2 API. + +==== Conventions + +To aid in specifying the CQL syntax, we will use the following +conventions in this document: + +* Language rules will be given in a +http://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form[BNF] -like +notation: + +bc(syntax). ::= TERMINAL + +* Nonterminal symbols will have ``. +* As additional shortcut notations to BNF, we’ll use traditional regular +expression’s symbols (`?`, `+` and `*`) to signify that a given symbol +is optional and/or can be repeated. We’ll also allow parentheses to +group symbols and the `[]` notation to represent any one of +``. +* The grammar is provided for documentation purposes and leave some +minor details out. For instance, the last column definition in a +`CREATE TABLE` statement is optional but supported if present even +though the provided grammar in this document suggest it is not +supported. +* Sample code will be provided in a code block: + +bc(sample). SELECT sample_usage FROM cql; + +* References to keywords or pieces of CQL code in running text will be +shown in a `fixed-width font`. + +[[identifiers]] +==== Identifiers and keywords + +The CQL language uses _identifiers_ (or _names_) to identify tables, +columns and other objects. An identifier is a token matching the regular +expression `[a-zA-Z]``[a-zA-Z0-9_]``*`. + +A number of such identifiers, like `SELECT` or `WITH`, are _keywords_. +They have a fixed meaning for the language and most are reserved. The +list of those keywords can be found in link:#appendixA[Appendix A]. + +Identifiers and (unquoted) keywords are case insensitive. Thus `SELECT` +is the same than `select` or `sElEcT`, and `myId` is the same than +`myid` or `MYID` for instance. A convention often used (in particular by +the samples of this documentation) is to use upper case for keywords and +lower case for other identifiers. + +There is a second kind of identifiers called _quoted identifiers_ +defined by enclosing an arbitrary sequence of characters in +double-quotes(`"`). Quoted identifiers are never keywords. Thus +`"select"` is not a reserved keyword and can be used to refer to a +column, while `select` would raise a parse error. Also, contrarily to +unquoted identifiers and keywords, quoted identifiers are case sensitive +(`"My Quoted Id"` is _different_ from `"my quoted id"`). A fully +lowercase quoted identifier that matches `[a-zA-Z]``[a-zA-Z0-9_]``*` is +equivalent to the unquoted identifier obtained by removing the +double-quote (so `"myid"` is equivalent to `myid` and to `myId` but +different from `"myId"`). Inside a quoted identifier, the double-quote +character can be repeated to escape it, so `"foo "" bar"` is a valid +identifier. + +*Warning*: _quoted identifiers_ allows to declare columns with arbitrary +names, and those can sometime clash with specific names used by the +server. For instance, when using conditional update, the server will +respond with a result-set containing a special result named +`"[applied]"`. If you’ve declared a column with such a name, this could +potentially confuse some tools and should be avoided. In general, +unquoted identifiers should be preferred but if you use quoted +identifiers, it is strongly advised to avoid any name enclosed by +squared brackets (like `"[applied]"`) and any name that looks like a +function call (like `"f(x)"`). + +==== Constants + +CQL defines the following kind of _constants_: strings, integers, +floats, booleans, uuids and blobs: + +* A string constant is an arbitrary sequence of characters characters +enclosed by single-quote(`'`). One can include a single-quote in a +string by repeating it, e.g. `'It''s raining today'`. Those are not to +be confused with quoted identifiers that use double-quotes. +* An integer constant is defined by `'-'?[0-9]+`. +* A float constant is defined by +`'-'?[0-9]+('.'[0-9]*)?([eE][+-]?[0-9+])?`. On top of that, `NaN` and +`Infinity` are also float constants. +* A boolean constant is either `true` or `false` up to +case-insensitivity (i.e. `True` is a valid boolean constant). +* A http://en.wikipedia.org/wiki/Universally_unique_identifier[UUID] +constant is defined by `hex{8}-hex{4}-hex{4}-hex{4}-hex{12}` where `hex` +is an hexadecimal character, e.g. `[0-9a-fA-F]` and `{4}` is the number +of such characters. +* A blob constant is an hexadecimal number defined by `0[xX](hex)+` +where `hex` is an hexadecimal character, e.g. `[0-9a-fA-F]`. + +For how these constants are typed, see the link:#types[data types +section]. + +==== Comments + +A comment in CQL is a line beginning by either double dashes (`--`) or +double slash (`//`). + +Multi-line comments are also supported through enclosure within `/*` and +`*/` (but nesting is not supported). + +bc(sample). + +— This is a comment + +// This is a comment too + +/* This is + +a multi-line comment */ + +==== Statements + +CQL consists of statements. As in SQL, these statements can be divided +in 3 categories: + +* Data definition statements, that allow to set and change the way data +is stored. +* Data manipulation statements, that allow to change data +* Queries, to look up data + +All statements end with a semicolon (`;`) but that semicolon can be +omitted when dealing with a single statement. The supported statements +are described in the following sections. When describing the grammar of +said statements, we will reuse the non-terminal symbols defined below: + +bc(syntax).. + +::= any quoted or unquoted identifier, excluding reserved keywords + +::= ( `.')? + +::= a string constant + +::= an integer constant + +::= a float constant + +::= | + +::= a uuid constant + +::= a boolean constant + +::= a blob constant + +::= + +| + +| + +| + +| + +::= `?' + +| `:' + +::= + +| + +| + +| `(' ( (`,' )*)? `)' + +::= + +| + +| + +::= `\{' ( `:' ( `,' `:' )* )? `}' + +::= `\{' ( ( `,' )* )? `}' + +::= `[' ( ( `,' )* )? `]' + +::= + +::= (AND )* + +::= `=' ( | | ) + +p. + +Please note that not every possible productions of the grammar above +will be valid in practice. Most notably, `` and nested +`` are currently not allowed inside +``. + +A `` can be either anonymous (a question mark (`?`)) or named +(an identifier preceded by `:`). Both declare a bind variables for +link:#preparedStatement[prepared statements]. The only difference +between an anymous and a named variable is that a named one will be +easier to refer to (how exactly depends on the client driver used). + +The `` production is use by statement that create and alter +keyspaces and tables. Each `` is either a _simple_ one, in +which case it just has a value, or a _map_ one, in which case it’s value +is a map grouping sub-options. The following will refer to one or the +other as the _kind_ (_simple_ or _map_) of the property. + +A `` will be used to identify a table. This is an identifier +representing the table name that can be preceded by a keyspace name. The +keyspace name, if provided, allow to identify a table in another +keyspace than the currently active one (the currently active keyspace is +set through the `USE` statement). + +For supported ``, see the section on +link:#functions[functions]. + +Strings can be either enclosed with single quotes or two dollar +characters. The second syntax has been introduced to allow strings that +contain single quotes. Typical candidates for such strings are source +code fragments for user-defined functions. + +_Sample:_ + +bc(sample).. + +`some string value' + +$$double-dollar string can contain single ’ quotes$$ + +p. + +[[preparedStatement]] +==== Prepared Statement + +CQL supports _prepared statements_. Prepared statement is an +optimization that allows to parse a query only once but execute it +multiple times with different concrete values. + +In a statement, each time a column value is expected (in the data +manipulation and query statements), a `` (see above) can be +used instead. A statement with bind variables must then be _prepared_. +Once it has been prepared, it can executed by providing concrete values +for the bind variables. The exact procedure to prepare a statement and +execute a prepared statement depends on the CQL driver used and is +beyond the scope of this document. + +In addition to providing column values, bind markers may be used to +provide values for `LIMIT`, `TIMESTAMP`, and `TTL` clauses. If anonymous +bind markers are used, the names for the query parameters will be +`[limit]`, `[timestamp]`, and `[ttl]`, respectively. + +[[dataDefinition]] +=== Data Definition + +[[createKeyspaceStmt]] +==== CREATE KEYSPACE + +_Syntax:_ + +bc(syntax).. + +::= CREATE KEYSPACE (IF NOT EXISTS)? WITH + +p. + +_Sample:_ + +bc(sample).. + +CREATE KEYSPACE Excelsior + +WITH replication = \{’class’: `SimpleStrategy', `replication_factor' : +3}; + +CREATE KEYSPACE Excalibur + +WITH replication = \{’class’: `NetworkTopologyStrategy', `DC1' : 1, +`DC2' : 3} + +AND durable_writes = false; + +p. + +The `CREATE KEYSPACE` statement creates a new top-level _keyspace_. A +keyspace is a namespace that defines a replication strategy and some +options for a set of tables. Valid keyspaces names are identifiers +composed exclusively of alphanumerical characters and whose length is +lesser or equal to 32. Note that as identifiers, keyspace names are case +insensitive: use a quoted identifier for case sensitive keyspace names. + +The supported `` for `CREATE KEYSPACE` are: + +[cols=",,,,",options="header",] +|=== +|name |kind |mandatory |default |description +|`replication` |_map_ |yes | |The replication strategy and options to +use for the keyspace. + +|`durable_writes` |_simple_ |no |true |Whether to use the commit log for +updates on this keyspace (disable this option at your own risk!). +|=== + +The `replication` `` is mandatory. It must at least contains +the `'class'` sub-option which defines the replication strategy class to +use. The rest of the sub-options depends on that replication strategy +class. By default, Cassandra support the following `'class'`: + +* `'SimpleStrategy'`: A simple strategy that defines a simple +replication factor for the whole cluster. The only sub-options supported +is `'replication_factor'` to define that replication factor and is +mandatory. +* `'NetworkTopologyStrategy'`: A replication strategy that allows to set +the replication factor independently for each data-center. The rest of +the sub-options are key-value pairs where each time the key is the name +of a datacenter and the value the replication factor for that +data-center. + +Attempting to create an already existing keyspace will return an error +unless the `IF NOT EXISTS` option is used. If it is used, the statement +will be a no-op if the keyspace already exists. + +[[useStmt]] +==== USE + +_Syntax:_ + +bc(syntax). ::= USE + +_Sample:_ + +bc(sample). USE myApp; + +The `USE` statement takes an existing keyspace name as argument and set +it as the per-connection current working keyspace. All subsequent +keyspace-specific actions will be performed in the context of the +selected keyspace, unless link:#statements[otherwise specified], until +another USE statement is issued or the connection terminates. + +[[alterKeyspaceStmt]] +==== ALTER KEYSPACE + +_Syntax:_ + +bc(syntax).. + +::= ALTER KEYSPACE WITH + +p. + +_Sample:_ + +bc(sample).. + +ALTER KEYSPACE Excelsior + +WITH replication = \{’class’: `SimpleStrategy', `replication_factor' : +4}; + +The `ALTER KEYSPACE` statement alters the properties of an existing +keyspace. The supported `` are the same as for the +link:#createKeyspaceStmt[`CREATE KEYSPACE`] statement. + +[[dropKeyspaceStmt]] +==== DROP KEYSPACE + +_Syntax:_ + +bc(syntax). ::= DROP KEYSPACE ( IF EXISTS )? + +_Sample:_ + +bc(sample). DROP KEYSPACE myApp; + +A `DROP KEYSPACE` statement results in the immediate, irreversible +removal of an existing keyspace, including all column families in it, +and all data contained in those column families. + +If the keyspace does not exists, the statement will return an error, +unless `IF EXISTS` is used in which case the operation is a no-op. + +[[createTableStmt]] +==== CREATE TABLE + +_Syntax:_ + +bc(syntax).. + +::= CREATE ( TABLE | COLUMNFAMILY ) ( IF NOT EXISTS )? + +`(' ( `,' )* `)' + +( WITH ( AND )* )? + +::= ( STATIC )? ( PRIMARY KEY )? + +| PRIMARY KEY `(' ( `,' )* `)' + +::= + +| `(' (`,' )* `)' + +::= + +| COMPACT STORAGE + +| CLUSTERING ORDER + +p. + +_Sample:_ + +bc(sample).. + +CREATE TABLE monkeySpecies ( + +species text PRIMARY KEY, + +common_name text, + +population varint, + +average_size int + +) WITH comment=`Important biological records'; + +CREATE TABLE timeline ( + +userid uuid, + +posted_month int, + +posted_time uuid, + +body text, + +posted_by text, + +PRIMARY KEY (userid, posted_month, posted_time) + +) WITH compaction = \{ `class' : `LeveledCompactionStrategy' }; + +p. + +The `CREATE TABLE` statement creates a new table. Each such table is a +set of _rows_ (usually representing related entities) for which it +defines a number of properties. A table is defined by a +link:#createTableName[name], it defines the columns composing rows of +the table and have a number of link:#createTableOptions[options]. Note +that the `CREATE COLUMNFAMILY` syntax is supported as an alias for +`CREATE TABLE` (for historical reasons). + +Attempting to create an already existing table will return an error +unless the `IF NOT EXISTS` option is used. If it is used, the statement +will be a no-op if the table already exists. + +[[createTableName]] +===== `` + +Valid table names are the same as valid +link:#createKeyspaceStmt[keyspace names] (up to 32 characters long +alphanumerical identifiers). If the table name is provided alone, the +table is created within the current keyspace (see `USE`), but if it is +prefixed by an existing keyspace name (see +link:#statements[``] grammar), it is created in the specified +keyspace (but does *not* change the current keyspace). + +[[createTableColumn]] +===== `` + +A `CREATE TABLE` statement defines the columns that rows of the table +can have. A _column_ is defined by its name (an identifier) and its type +(see the link:#types[data types] section for more details on allowed +types and their properties). + +Within a table, a row is uniquely identified by its `PRIMARY KEY` (or +more simply the key), and hence all table definitions *must* define a +PRIMARY KEY (and only one). A `PRIMARY KEY` is composed of one or more +of the columns defined in the table. If the `PRIMARY KEY` is only one +column, this can be specified directly after the column definition. +Otherwise, it must be specified by following `PRIMARY KEY` by the +comma-separated list of column names composing the key within +parenthesis. Note that: + +bc(sample). + +CREATE TABLE t ( + +k int PRIMARY KEY, + +other text + +) + +is equivalent to + +bc(sample). + +CREATE TABLE t ( + +k int, + +other text, + +PRIMARY KEY (k) + +) + +[[createTablepartitionClustering]] +===== Partition key and clustering columns + +In CQL, the order in which columns are defined for the `PRIMARY KEY` +matters. The first column of the key is called the _partition key_. It +has the property that all the rows sharing the same partition key (even +across table in fact) are stored on the same physical node. Also, +insertion/update/deletion on rows sharing the same partition key for a +given table are performed _atomically_ and in _isolation_. Note that it +is possible to have a composite partition key, i.e. a partition key +formed of multiple columns, using an extra set of parentheses to define +which columns forms the partition key. + +The remaining columns of the `PRIMARY KEY` definition, if any, are +called __clustering columns. On a given physical node, rows for a given +partition key are stored in the order induced by the clustering columns, +making the retrieval of rows in that clustering order particularly +efficient (see `SELECT`). + +[[createTableStatic]] +===== `STATIC` columns + +Some columns can be declared as `STATIC` in a table definition. A column +that is static will be ``shared'' by all the rows belonging to the same +partition (having the same partition key). For instance, in: + +bc(sample). + +CREATE TABLE test ( + +pk int, + +t int, + +v text, + +s text static, + +PRIMARY KEY (pk, t) + +); + +INSERT INTO test(pk, t, v, s) VALUES (0, 0, `val0', `static0'); + +INSERT INTO test(pk, t, v, s) VALUES (0, 1, `val1', `static1'); + +SELECT * FROM test WHERE pk=0 AND t=0; + +the last query will return `'static1'` as value for `s`, since `s` is +static and thus the 2nd insertion modified this ``shared'' value. Note +however that static columns are only static within a given partition, +and if in the example above both rows where from different partitions +(i.e. if they had different value for `pk`), then the 2nd insertion +would not have modified the value of `s` for the first row. + +A few restrictions applies to when static columns are allowed: + +* tables with the `COMPACT STORAGE` option (see below) cannot have them +* a table without clustering columns cannot have static columns (in a +table without clustering columns, every partition has only one row, and +so every column is inherently static). +* only non `PRIMARY KEY` columns can be static + +[[createTableOptions]] +===== `
must be specified separately in the +`--kt-list` option. + +For example, create a snapshot for table `t` in +the `cqlkeyspace` and table `journal` in the catalogkeyspace and tag the +snapshot `multi-ks`. + +[source,bash] +---- +include::example$BASH/snapshot_mult_ks.sh[] +---- + +results in +[source, plaintext] +---- +include::example$RESULTS/snapshot_mult_ks.result[] +---- + +=== Listing Snapshots + +To list snapshots use the `nodetool listsnapshots` command. All the +snapshots that we created in the preceding examples get listed: + +[source,bash] +---- +include::example$BASH/nodetool_list_snapshots.sh[] +---- + +results in + +[source, plaintext] +---- +include::example$RESULTS/nodetool_list_snapshots.result[] +---- + +=== Finding Snapshots Directories + +The `snapshots` directories may be listed with `find –name snapshots` +command: + +[source,bash] +---- +include::example$BASH/find_snapshots.sh[] +---- + +results in + +[source, plaintext] +---- +include::example$RESULTS/snapshot_all.result[] +---- + +To list the snapshots for a particular table first change to the snapshots directory for that table. +For example, list the snapshots for the `catalogkeyspace/journal` table: + +[source,bash] +---- +include::example$BASH/find_two_snapshots.sh[] +---- + +results in + +[source, plaintext] +---- +include::example$RESULTS/find_two_snapshots.result[] +---- + +A `snapshots` directory lists the SSTable files in the snapshot. +A `schema.cql` file is also created in each snapshot that defines schema +that can recreate the table with CQL when restoring from a snapshot: + +[source,bash] +---- +include::example$BASH/snapshot_files.sh[] +---- + +results in + +[source, plaintext] +---- +include::example$RESULTS/snapshot_files.result[] +---- + +=== Clearing Snapshots + +Snapshots may be cleared or deleted with the `nodetool clearsnapshot` +command. Either a specific snapshot name must be specified or the `–all` +option must be specified. + +For example, delete a snapshot called `magazine` from keyspace `cqlkeyspace`: + +[source,bash] +---- +include::example$BASH/nodetool_clearsnapshot.sh[] +---- + +or delete all snapshots from `cqlkeyspace` with the –all option: + +[source,bash] +---- +include::example$BASH/nodetool_clearsnapshot_all.sh[] +---- + +== Incremental Backups + +In the following sections, we shall discuss configuring and creating +incremental backups. + +=== Configuring for Incremental Backups + +To create incremental backups set `incremental_backups` to `true` in +`cassandra.yaml`. + +[source,yaml] +---- +include::example$YAML/incremental_bups.yaml[] +---- + +This is the only setting needed to create incremental backups. +By default `incremental_backups` setting is set to `false` because a new +set of SSTable files is created for each data flush and if several CQL +statements are to be run the `backups` directory could fill up quickly +and use up storage that is needed to store table data. +Incremental backups may also be enabled on the command line with the nodetool +command `nodetool enablebackup`. +Incremental backups may be disabled with `nodetool disablebackup` command. +Status of incremental backups, whether they are enabled may be checked with `nodetool statusbackup`. + +=== Creating Incremental Backups + +After each table is created flush the table data with `nodetool flush` +command. Incremental backups get created. + +[source,bash] +---- +include::example$BASH/nodetool_flush.sh[] +---- + +=== Finding Incremental Backups + +Incremental backups are created within the Cassandra’s `data` directory +within a table directory. Backups may be found with following command. + +[source,bash] +---- +include::example$BASH/find_backups.sh[] +---- +results in +[source,none] +---- +include::example$RESULTS/find_backups.result[] +---- + +=== Creating an Incremental Backup + +This section discusses how incremental backups are created in more +detail using the keyspace and table previously created. + +Flush the keyspace and table: + +[source,bash] +---- +include::example$BASH/nodetool_flush_table.sh[] +---- + +A search for backups and a `backups` directory will list a backup directory, +even if we have added no table data yet. + +[source,bash] +---- +include::example$BASH/find_backups.sh[] +---- + +results in + +[source,plaintext] +---- +include::example$RESULTS/find_backups_table.result[] +---- + +Checking the `backups` directory will show that there are also no backup files: + +[source,bash] +---- +include::example$BASH/check_backups.sh[] +---- + +results in + +[source, plaintext] +---- +include::example$RESULTS/no_bups.result[] +---- + +If a row of data is added to the data, running the `nodetool flush` command will +flush the table data and an incremental backup will be created: + +[source,bash] +---- +include::example$BASH/flush_and_check.sh[] +---- + +results in + +[source, plaintext] +---- +include::example$RESULTS/flush_and_check.result[] +---- + +[NOTE] +.note +==== +The `backups` directory for any table, such as `cqlkeyspace/t` is created in the +`data` directory for that table. +==== + +Adding another row of data and flushing will result in another set of incremental backup files. +The SSTable files are timestamped, which distinguishes the first incremental backup from the +second: + +[source,none] +---- +include::example$RESULTS/flush_and_check2.result[] +---- + +== Restoring from Incremental Backups and Snapshots + +The two main tools/commands for restoring a table after it has been +dropped are: + +* sstableloader +* nodetool import + +A snapshot contains essentially the same set of SSTable files as an +incremental backup does with a few additional files. A snapshot includes +a `schema.cql` file for the schema DDL to create a table in CQL. A table +backup does not include DDL which must be obtained from a snapshot when +restoring from an incremental backup. diff --git a/doc/modules/cassandra/pages/operating/bloom_filters.adoc b/doc/modules/cassandra/pages/operating/bloom_filters.adoc new file mode 100644 index 000000000000..5ce5f8d04cb5 --- /dev/null +++ b/doc/modules/cassandra/pages/operating/bloom_filters.adoc @@ -0,0 +1,64 @@ += Bloom Filters + +In the read path, Cassandra merges data on disk (in SSTables) with data +in RAM (in memtables). To avoid checking every SSTable data file for the +partition being requested, Cassandra employs a data structure known as a +bloom filter. + +Bloom filters are a probabilistic data structure that allows Cassandra +to determine one of two possible states: - The data definitely does not +exist in the given file, or - The data probably exists in the given +file. + +While bloom filters can not guarantee that the data exists in a given +SSTable, bloom filters can be made more accurate by allowing them to +consume more RAM. Operators have the opportunity to tune this behavior +per table by adjusting the the `bloom_filter_fp_chance` to a float +between 0 and 1. + +The default value for `bloom_filter_fp_chance` is 0.1 for tables using +LeveledCompactionStrategy and 0.01 for all other cases. + +Bloom filters are stored in RAM, but are stored offheap, so operators +should not consider bloom filters when selecting the maximum heap size. +As accuracy improves (as the `bloom_filter_fp_chance` gets closer to 0), +memory usage increases non-linearly - the bloom filter for +`bloom_filter_fp_chance = 0.01` will require about three times as much +memory as the same table with `bloom_filter_fp_chance = 0.1`. + +Typical values for `bloom_filter_fp_chance` are usually between 0.01 +(1%) to 0.1 (10%) false-positive chance, where Cassandra may scan an +SSTable for a row, only to find that it does not exist on the disk. The +parameter should be tuned by use case: + +* Users with more RAM and slower disks may benefit from setting the +`bloom_filter_fp_chance` to a numerically lower number (such as 0.01) to +avoid excess IO operations +* Users with less RAM, more dense nodes, or very fast disks may tolerate +a higher `bloom_filter_fp_chance` in order to save RAM at the expense of +excess IO operations +* In workloads that rarely read, or that only perform reads by scanning +the entire data set (such as analytics workloads), setting the +`bloom_filter_fp_chance` to a much higher number is acceptable. + +== Changing + +The bloom filter false positive chance is visible in the +`DESCRIBE TABLE` output as the field `bloom_filter_fp_chance`. Operators +can change the value with an `ALTER TABLE` statement: : + +[source,none] +---- +ALTER TABLE keyspace.table WITH bloom_filter_fp_chance=0.01 +---- + +Operators should be aware, however, that this change is not immediate: +the bloom filter is calculated when the file is written, and persisted +on disk as the Filter component of the SSTable. Upon issuing an +`ALTER TABLE` statement, new files on disk will be written with the new +`bloom_filter_fp_chance`, but existing sstables will not be modified +until they are compacted - if an operator needs a change to +`bloom_filter_fp_chance` to take effect, they can trigger an SSTable +rewrite using `nodetool scrub` or `nodetool upgradesstables -a`, both of +which will rebuild the sstables on disk, regenerating the bloom filters +in the progress. diff --git a/doc/modules/cassandra/pages/operating/bulk_loading.adoc b/doc/modules/cassandra/pages/operating/bulk_loading.adoc new file mode 100644 index 000000000000..2b11f27460f9 --- /dev/null +++ b/doc/modules/cassandra/pages/operating/bulk_loading.adoc @@ -0,0 +1,842 @@ += Bulk Loading + +Bulk loading Apache Cassandra data is supported by different tools. +The data to bulk load must be in the form of SSTables. +Cassandra does not support loading data in any other format such as CSV, +JSON, and XML directly. +Although the cqlsh `COPY` command can load CSV data, it is not a good option +for amounts of data. +Bulk loading is used to: + +* Restore incremental backups and snapshots. Backups and snapshots are +already in the form of SSTables. +* Load existing SSTables into another cluster. The data can have a +different number of nodes or replication strategy. +* Load external data to a cluster. + +== Tools for Bulk Loading + +Cassandra provides two commands or tools for bulk loading data: + +* Cassandra Bulk loader, also called `sstableloader` +* The `nodetool import` command + +The `sstableloader` and `nodetool import` are accessible if the +Cassandra installation `bin` directory is in the `PATH` environment +variable. +Or these may be accessed directly from the `bin` directory. +The examples use the keyspaces and tables created in xref:cql/operating/backups.adoc[Backups]. + +== Using sstableloader + +The `sstableloader` is the main tool for bulk uploading data. +`sstableloader` streams SSTable data files to a running cluster, +conforming to the replication strategy and replication factor. +The table to upload data to does need not to be empty. + +The only requirements to run `sstableloader` are: + +* One or more comma separated initial hosts to connect to and get ring +information +* A directory path for the SSTables to load + +[source,bash] +---- +sstableloader [options] +---- + +Sstableloader bulk loads the SSTables found in the directory +`` to the configured cluster. +The `` is used as the target _keyspace/table_ name. +For example, to load an SSTable named `Standard1-g-1-Data.db` into `Keyspace1/Standard1`, +you will need to have the files `Standard1-g-1-Data.db` and `Standard1-g-1-Index.db` in a +directory `/path/to/Keyspace1/Standard1/`. + +=== Sstableloader Option to accept Target keyspace name + +Often as part of a backup strategy, some Cassandra DBAs store an entire data directory. +When corruption in the data is found, restoring data in the same cluster (for large clusters 200 nodes) +is common, but with a different keyspace name. + +Currently `sstableloader` derives keyspace name from the folder structure. +As an option, to specify target keyspace name as part of `sstableloader`, +version 4.0 adds support for the `--target-keyspace` option +(https://issues.apache.org/jira/browse/CASSANDRA-13884[CASSANDRA-13884]). + +The following options are supported, with `-d,--nodes ` required: + +[source,none] +---- +-alg,--ssl-alg Client SSL: algorithm + +-ap,--auth-provider Custom + AuthProvider class name for + cassandra authentication +-ciphers,--ssl-ciphers Client SSL: + comma-separated list of + encryption suites to use +-cph,--connections-per-host Number of + concurrent connections-per-host. +-d,--nodes Required. + Try to connect to these hosts (comma separated) initially for ring information + +-f,--conf-path cassandra.yaml file path for streaming throughput and client/server SSL. + +-h,--help Display this help message + +-i,--ignore Don't stream to this (comma separated) list of nodes + +-idct,--inter-dc-throttle Inter-datacenter throttle speed in Mbits (default unlimited) + +-k,--target-keyspace Target + keyspace name +-ks,--keystore Client SSL: + full path to keystore +-kspw,--keystore-password Client SSL: + password of the keystore +--no-progress Don't + display progress +-p,--port Port used + for native connection (default 9042) +-prtcl,--ssl-protocol Client SSL: + connections protocol to use (default: TLS) +-pw,--password Password for + cassandra authentication +-sp,--storage-port Port used + for internode communication (default 7000) +-spd,--server-port-discovery Use ports + published by server to decide how to connect. With SSL requires StartTLS + to be used. +-ssp,--ssl-storage-port Port used + for TLS internode communication (default 7001) +-st,--store-type Client SSL: + type of store +-t,--throttle Throttle + speed in Mbits (default unlimited) +-ts,--truststore Client SSL: + full path to truststore +-tspw,--truststore-password Client SSL: + Password of the truststore +-u,--username Username for + cassandra authentication +-v,--verbose verbose + output +---- + +The `cassandra.yaml` file can be provided on the command-line with `-f` option to set up streaming throughput, client and server encryption +options. +Only `stream_throughput_outbound_megabits_per_sec`, `server_encryption_options` and `client_encryption_options` are read +from the `cassandra.yaml` file. +You can override options read from `cassandra.yaml` with corresponding command line options. + +=== A sstableloader Demo + +An example shows how to use `sstableloader` to upload incremental backup data for the table `catalogkeyspace.magazine`. +In addition, a snapshot of the same table is created to bulk upload, also with `sstableloader`. + +The backups and snapshots for the `catalogkeyspace.magazine` table are listed as follows: + +[source,bash] +---- +$ cd ./cassandra/data/data/catalogkeyspace/magazine-446eae30c22a11e9b1350d927649052c && ls -l +---- + +results in + +[source,none] +---- +total 0 +drwxrwxr-x. 2 ec2-user ec2-user 226 Aug 19 02:38 backups +drwxrwxr-x. 4 ec2-user ec2-user 40 Aug 19 02:45 snapshots +---- + +The directory path structure of SSTables to be uploaded using +`sstableloader` is used as the target keyspace/table. +You can directly upload from the `backups` and `snapshots` +directories respectively, if the directory structure is in the format +used by `sstableloader`. +But the directory path of backups and snapshots for SSTables is +`/catalogkeyspace/magazine-446eae30c22a11e9b1350d927649052c/backups` and +`/catalogkeyspace/magazine-446eae30c22a11e9b1350d927649052c/snapshots` +respectively, and cannot be used to upload SSTables to +`catalogkeyspace.magazine` table. +The directory path structure must be `/catalogkeyspace/magazine/` to use `sstableloader`. +Create a new directory structure to upload SSTables with `sstableloader` +located at `/catalogkeyspace/magazine` and set appropriate permissions. + +[source,bash] +---- +$ sudo mkdir -p /catalogkeyspace/magazine +$ sudo chmod -R 777 /catalogkeyspace/magazine +---- + +==== Bulk Loading from an Incremental Backup + +An incremental backup does not include the DDL for a table; the table must already exist. +If the table was dropped, it can be created using the `schema.cql` file generated with every snapshot of a table. +Prior to using `sstableloader` to load SSTables to the `magazine` table, the table must exist. +The table does not need to be empty but we have used an empty table as indicated by a CQL query: + +[source,cql] +---- +SELECT * FROM magazine; +---- +results in +[source,cql] +---- +id | name | publisher +----+------+----------- + +(0 rows) +---- + +After creating the table to upload to, copy the SSTable files from the `backups` directory to the `/catalogkeyspace/magazine/` directory. + +[source,bash] +---- +$ sudo cp ./cassandra/data/data/catalogkeyspace/magazine-446eae30c22a11e9b1350d927649052c/backups/* \ +/catalogkeyspace/magazine/ +---- + +Run the `sstableloader` to upload SSTables from the +`/catalogkeyspace/magazine/` directory. + +[source,bash] +---- +$ sstableloader --nodes 10.0.2.238 /catalogkeyspace/magazine/ +---- + +The output from the `sstableloader` command should be similar to this listing: + +[source,bash] +---- +$ sstableloader --nodes 10.0.2.238 /catalogkeyspace/magazine/ +---- + +results in + +[source,none] +---- +Opening SSTables and calculating sections to stream +Streaming relevant part of /catalogkeyspace/magazine/na-1-big-Data.db +/catalogkeyspace/magazine/na-2-big-Data.db to [35.173.233.153:7000, 10.0.2.238:7000, +54.158.45.75:7000] +progress: [35.173.233.153:7000]0:1/2 88 % total: 88% 0.018KiB/s (avg: 0.018KiB/s) +progress: [35.173.233.153:7000]0:2/2 176% total: 176% 33.807KiB/s (avg: 0.036KiB/s) +progress: [35.173.233.153:7000]0:2/2 176% total: 176% 0.000KiB/s (avg: 0.029KiB/s) +progress: [35.173.233.153:7000]0:2/2 176% [10.0.2.238:7000]0:1/2 39 % total: 81% 0.115KiB/s +(avg: 0.024KiB/s) +progress: [35.173.233.153:7000]0:2/2 176% [10.0.2.238:7000]0:2/2 78 % total: 108% +97.683KiB/s (avg: 0.033KiB/s) +progress: [35.173.233.153:7000]0:2/2 176% [10.0.2.238:7000]0:2/2 78 % +[54.158.45.75:7000]0:1/2 39 % total: 80% 0.233KiB/s (avg: 0.040KiB/s) +progress: [35.173.233.153:7000]0:2/2 176% [10.0.2.238:7000]0:2/2 78 % +[54.158.45.75:7000]0:2/2 78 % total: 96% 88.522KiB/s (avg: 0.049KiB/s) +progress: [35.173.233.153:7000]0:2/2 176% [10.0.2.238:7000]0:2/2 78 % +[54.158.45.75:7000]0:2/2 78 % total: 96% 0.000KiB/s (avg: 0.045KiB/s) +progress: [35.173.233.153:7000]0:2/2 176% [10.0.2.238:7000]0:2/2 78 % +[54.158.45.75:7000]0:2/2 78 % total: 96% 0.000KiB/s (avg: 0.044KiB/s) +---- + +After the `sstableloader` has finished loading the data, run a query the `magazine` table to check: + +[source,cql] +---- +SELECT * FROM magazine; +---- +results in +[source,cql] +---- +id | name | publisher +----+---------------------------+------------------ + 1 | Couchbase Magazine | Couchbase + 0 | Apache Cassandra Magazine | Apache Cassandra + +(2 rows) +---- + +==== Bulk Loading from a Snapshot + +Restoring a snapshot of a table to the same table can be easily accomplished: + +If the directory structure needed to load SSTables to `catalogkeyspace.magazine` does not exist create the +directories and set appropriate permissions: + +[source,bash] +---- +$ sudo mkdir -p /catalogkeyspace/magazine +$ sudo chmod -R 777 /catalogkeyspace/magazine +---- + +Remove any files from the directory, so that the snapshot files can be copied without interference: + +[source,bash] +---- +$ sudo rm /catalogkeyspace/magazine/* +$ cd /catalogkeyspace/magazine/ +$ ls -l +---- + +results in + +[source,none] +---- +total 0 +---- + +Copy the snapshot files to the `/catalogkeyspace/magazine` directory. + +[source,bash] +---- +$ sudo cp ./cassandra/data/data/catalogkeyspace/magazine-446eae30c22a11e9b1350d927649052c/snapshots/magazine/* \ +/catalogkeyspace/magazine +---- + +List the files in the `/catalogkeyspace/magazine` directory. +The `schema.cql` will also be listed. + +[source,bash] +---- +$ cd /catalogkeyspace/magazine && ls -l +---- + +results in + +[source,none] +---- +total 44 +-rw-r--r--. 1 root root 31 Aug 19 04:13 manifest.json +-rw-r--r--. 1 root root 47 Aug 19 04:13 na-1-big-CompressionInfo.db +-rw-r--r--. 1 root root 97 Aug 19 04:13 na-1-big-Data.db +-rw-r--r--. 1 root root 10 Aug 19 04:13 na-1-big-Digest.crc32 +-rw-r--r--. 1 root root 16 Aug 19 04:13 na-1-big-Filter.db +-rw-r--r--. 1 root root 16 Aug 19 04:13 na-1-big-Index.db +-rw-r--r--. 1 root root 4687 Aug 19 04:13 na-1-big-Statistics.db +-rw-r--r--. 1 root root 56 Aug 19 04:13 na-1-big-Summary.db +-rw-r--r--. 1 root root 92 Aug 19 04:13 na-1-big-TOC.txt +-rw-r--r--. 1 root root 815 Aug 19 04:13 schema.cql +---- + +Alternatively create symlinks to the snapshot folder instead of copying +the data: + +[source,bash] +---- +$ mkdir +$ ln -s / +---- + +If the `magazine` table was dropped, run the DDL in the `schema.cql` to +create the table. +Run the `sstableloader` with the following command: + +[source,bash] +---- +$ sstableloader --nodes 10.0.2.238 /catalogkeyspace/magazine/ +---- + +As the output from the command indicates, SSTables get streamed to the +cluster: + +[source,none] +---- +Established connection to initial hosts +Opening SSTables and calculating sections to stream +Streaming relevant part of /catalogkeyspace/magazine/na-1-big-Data.db to +[35.173.233.153:7000, 10.0.2.238:7000, 54.158.45.75:7000] +progress: [35.173.233.153:7000]0:1/1 176% total: 176% 0.017KiB/s (avg: 0.017KiB/s) +progress: [35.173.233.153:7000]0:1/1 176% total: 176% 0.000KiB/s (avg: 0.014KiB/s) +progress: [35.173.233.153:7000]0:1/1 176% [10.0.2.238:7000]0:1/1 78 % total: 108% 0.115KiB/s +(avg: 0.017KiB/s) +progress: [35.173.233.153:7000]0:1/1 176% [10.0.2.238:7000]0:1/1 78 % +[54.158.45.75:7000]0:1/1 78 % total: 96% 0.232KiB/s (avg: 0.024KiB/s) +progress: [35.173.233.153:7000]0:1/1 176% [10.0.2.238:7000]0:1/1 78 % +[54.158.45.75:7000]0:1/1 78 % total: 96% 0.000KiB/s (avg: 0.022KiB/s) +progress: [35.173.233.153:7000]0:1/1 176% [10.0.2.238:7000]0:1/1 78 % +[54.158.45.75:7000]0:1/1 78 % total: 96% 0.000KiB/s (avg: 0.021KiB/s) +---- + +Some other requirements of `sstableloader` that should be kept into +consideration are: + +* The SSTables loaded must be compatible with the Cassandra +version being loaded into. +* Repairing tables that have been loaded into a different cluster does +not repair the source tables. +* Sstableloader makes use of port 7000 for internode communication. +* Before restoring incremental backups, run `nodetool flush` to backup +any data in memtables. + +== Using nodetool import + +Importing SSTables into a table using the `nodetool import` command is recommended instead of the deprecated +`nodetool refresh` command. +The `nodetool import` command has an option to load new SSTables from a separate directory. + +The command usage is as follows: + +[source,none] +---- +nodetool [(-h | --host )] [(-p | --port )] + [(-pp | --print-port)] [(-pw | --password )] + [(-pwf | --password-file )] + [(-u | --username )] import + [(-c | --no-invalidate-caches)] [(-e | --extended-verify)] + [(-l | --keep-level)] [(-q | --quick)] [(-r | --keep-repaired)] + [(-t | --no-tokens)] [(-v | --no-verify)] [--]
+ ... +---- + +The arguments `keyspace`, `table` name and `directory` are required. + +The following options are supported: + +[source,none] +---- +-c, --no-invalidate-caches + Don't invalidate the row cache when importing + +-e, --extended-verify + Run an extended verify, verifying all values in the new SSTables + +-h , --host + Node hostname or ip address + +-l, --keep-level + Keep the level on the new SSTables + +-p , --port + Remote jmx agent port number + +-pp, --print-port + Operate in 4.0 mode with hosts disambiguated by port number + +-pw , --password + Remote jmx agent password + +-pwf , --password-file + Path to the JMX password file + +-q, --quick + Do a quick import without verifying SSTables, clearing row cache or + checking in which data directory to put the file + +-r, --keep-repaired + Keep any repaired information from the SSTables + +-t, --no-tokens + Don't verify that all tokens in the new SSTable are owned by the + current node + +-u , --username + Remote jmx agent username + +-v, --no-verify + Don't verify new SSTables + +-- + This option can be used to separate command-line options from the + list of argument, (useful when arguments might be mistaken for + command-line options +---- + +Because the keyspace and table are specified on the command line for +`nodetool import`, there is not the same requirement as with +`sstableloader`, to have the SSTables in a specific directory path. +When importing snapshots or incremental backups with +`nodetool import`, the SSTables don’t need to be copied to another +directory. + +=== Importing Data from an Incremental Backup + +Using `nodetool import` to import SSTables from an incremental backup, and restoring +the table is shown below. + +[source,cql] +---- +DROP table t; +---- + +An incremental backup for a table does not include the schema definition for the table. +If the schema definition is not kept as a separate +backup, the `schema.cql` from a backup of the table may be used to +create the table as follows: + +[source,cql] +---- +CREATE TABLE IF NOT EXISTS cqlkeyspace.t ( + id int PRIMARY KEY, + k int, + v text) + WITH ID = d132e240-c217-11e9-bbee-19821dcea330 + AND bloom_filter_fp_chance = 0.01 + AND crc_check_chance = 1.0 + AND default_time_to_live = 0 + AND gc_grace_seconds = 864000 + AND min_index_interval = 128 + AND max_index_interval = 2048 + AND memtable_flush_period_in_ms = 0 + AND speculative_retry = '99p' + AND additional_write_policy = '99p' + AND comment = '' + AND caching = { 'keys': 'ALL', 'rows_per_partition': 'NONE' } + AND compaction = { 'max_threshold': '32', 'min_threshold': '4', + 'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy' } + AND compression = { 'chunk_length_in_kb': '16', 'class': + 'org.apache.cassandra.io.compress.LZ4Compressor' } + AND cdc = false + AND extensions = { } +; +---- + +Initially the table could be empty, but does not have to be. + +[source,cql] +---- +SELECT * FROM t; +---- +[source,cql] +---- +id | k | v +----+---+--- + +(0 rows) +---- + +Run the `nodetool import` command, providing the keyspace, table and +the backups directory. +Don’t copy the table backups to another directory, as with `sstableloader`. + +[source,bash] +---- +$ nodetool import -- cqlkeyspace t \ +./cassandra/data/data/cqlkeyspace/t-d132e240c21711e9bbee19821dcea330/backups +---- + +The SSTables are imported into the table. Run a query in cqlsh to check: + +[source,cql] +---- +SELECT * FROM t; +---- +[source,cql] +---- +id | k | v +----+---+------ + 1 | 1 | val1 + 0 | 0 | val0 + +(2 rows) +---- + +=== Importing Data from a Snapshot + +Importing SSTables from a snapshot with the `nodetool import` command is +similar to importing SSTables from an incremental backup. +Shown here is an import of a snapshot for table `catalogkeyspace.journal`, after +dropping the table to demonstrate the restore. + +[source,cql] +---- +USE CATALOGKEYSPACE; +DROP TABLE journal; +---- + +Use the `catalog-ks` snapshot for the `journal` table. +Check the files in the snapshot, and note the existence of the `schema.cql` file. + +[source,bash] +---- +$ ls -l +---- +[source,none] +---- +total 44 +-rw-rw-r--. 1 ec2-user ec2-user 31 Aug 19 02:44 manifest.json +-rw-rw-r--. 3 ec2-user ec2-user 47 Aug 19 02:38 na-1-big-CompressionInfo.db +-rw-rw-r--. 3 ec2-user ec2-user 97 Aug 19 02:38 na-1-big-Data.db +-rw-rw-r--. 3 ec2-user ec2-user 10 Aug 19 02:38 na-1-big-Digest.crc32 +-rw-rw-r--. 3 ec2-user ec2-user 16 Aug 19 02:38 na-1-big-Filter.db +-rw-rw-r--. 3 ec2-user ec2-user 16 Aug 19 02:38 na-1-big-Index.db +-rw-rw-r--. 3 ec2-user ec2-user 4687 Aug 19 02:38 na-1-big-Statistics.db +-rw-rw-r--. 3 ec2-user ec2-user 56 Aug 19 02:38 na-1-big-Summary.db +-rw-rw-r--. 3 ec2-user ec2-user 92 Aug 19 02:38 na-1-big-TOC.txt +-rw-rw-r--. 1 ec2-user ec2-user 814 Aug 19 02:44 schema.cql +---- + +Copy the DDL from the `schema.cql` and run in cqlsh to create the +`catalogkeyspace.journal` table: + +[source,cql] +---- +CREATE TABLE IF NOT EXISTS catalogkeyspace.journal ( + id int PRIMARY KEY, + name text, + publisher text) + WITH ID = 296a2d30-c22a-11e9-b135-0d927649052c + AND bloom_filter_fp_chance = 0.01 + AND crc_check_chance = 1.0 + AND default_time_to_live = 0 + AND gc_grace_seconds = 864000 + AND min_index_interval = 128 + AND max_index_interval = 2048 + AND memtable_flush_period_in_ms = 0 + AND speculative_retry = '99p' + AND additional_write_policy = '99p' + AND comment = '' + AND caching = { 'keys': 'ALL', 'rows_per_partition': 'NONE' } + AND compaction = { 'min_threshold': '4', 'max_threshold': + '32', 'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy' } + AND compression = { 'chunk_length_in_kb': '16', 'class': + 'org.apache.cassandra.io.compress.LZ4Compressor' } + AND cdc = false + AND extensions = { } +; +---- + +Run the `nodetool import` command to import the SSTables for the +snapshot: + +[source,bash] +---- +$ nodetool import -- catalogkeyspace journal \ +./cassandra/data/data/catalogkeyspace/journal- +296a2d30c22a11e9b1350d927649052c/snapshots/catalog-ks/ +---- + +Subsequently run a CQL query on the `journal` table to check the imported data: + +[source,cql] +---- +SELECT * FROM journal; +---- +[source,cql] +---- +id | name | publisher +----+---------------------------+------------------ + 1 | Couchbase Magazine | Couchbase + 0 | Apache Cassandra Magazine | Apache Cassandra + +(2 rows) +---- + +== Bulk Loading External Data + +Bulk loading external data directly is not supported by any of the tools +we have discussed which include `sstableloader` and `nodetool import`. +The `sstableloader` and `nodetool import` require data to be in the form +of SSTables. +Apache Cassandra supports a Java API for generating SSTables from input data, using the +`org.apache.cassandra.io.sstable.CQLSSTableWriter` Java class. +Subsequently, either `sstableloader` or `nodetool import` is used to bulk load the SSTables. + +=== Generating SSTables with CQLSSTableWriter Java API + +To generate SSTables using the `CQLSSTableWriter` class the following are required: + +* An output directory to generate the SSTable in +* The schema for the SSTable +* A prepared statement for the `INSERT` +* A partitioner + +The output directory must exist before starting. Create a directory +(`/sstables` as an example) and set appropriate permissions. + +[source,bash] +---- +$ sudo mkdir /sstables +$ sudo chmod 777 -R /sstables +---- + +To use `CQLSSTableWriter` in a Java application, create a Java constant for the output directory. + +[source,java] +---- +public static final String OUTPUT_DIR = "./sstables"; +---- + +`CQLSSTableWriter` Java API can create a user-defined type. Create a new type to store `int` data: + +[source,java] +---- +String type = "CREATE TYPE CQLKeyspace.intType (a int, b int)"; +// Define a String variable for the SSTable schema. +String schema = "CREATE TABLE CQLKeyspace.t (" + + " id int PRIMARY KEY," + + " k int," + + " v1 text," + + " v2 intType," + + ")"; +---- + +Define a `String` variable for the prepared statement to use: + +[source,java] +---- +String insertStmt = "INSERT INTO CQLKeyspace.t (id, k, v1, v2) VALUES (?, ?, ?, ?)"; +---- + +The partitioner to use only needs setting if the default partitioner `Murmur3Partitioner` is not used. + +All these variables or settings are used by the builder class +`CQLSSTableWriter.Builder` to create a `CQLSSTableWriter` object. + +Create a File object for the output directory. + +[source,java] +---- +File outputDir = new File(OUTPUT_DIR + File.separator + "CQLKeyspace" + File.separator + "t"); +---- + +Obtain a `CQLSSTableWriter.Builder` object using `static` method `CQLSSTableWriter.builder()`. +Set the following items: + +* output directory `File` object +* user-defined type +* SSTable schema +* buffer size +* prepared statement +* optionally any of the other builder options + +and invoke the `build()` method to create a `CQLSSTableWriter` object: + +[source,java] +---- +CQLSSTableWriter writer = CQLSSTableWriter.builder() + .inDirectory(outputDir) + .withType(type) + .forTable(schema) + .withBufferSizeInMB(256) + .using(insertStmt).build(); +---- + +Set the SSTable data. If any user-defined types are used, obtain a +`UserType` object for each type: + +[source,java] +---- +UserType userType = writer.getUDType("intType"); +---- + +Add data rows for the resulting SSTable: + +[source,java] +---- +writer.addRow(0, 0, "val0", userType.newValue().setInt("a", 0).setInt("b", 0)); + writer.addRow(1, 1, "val1", userType.newValue().setInt("a", 1).setInt("b", 1)); + writer.addRow(2, 2, "val2", userType.newValue().setInt("a", 2).setInt("b", 2)); +---- + +Close the writer, finalizing the SSTable: + +[source,java] +---- +writer.close(); +---- + +Other public methods the `CQLSSTableWriter` class provides are: + +[cols=",",options="header",] +|=== +|Method |Description + +|addRow(java.util.List values) |Adds a new row to the +writer. Returns a CQLSSTableWriter object. Each provided value type +should correspond to the types of the CQL column the value is for. The +correspondence between java type and CQL type is the same one than the +one documented at +www.datastax.com/drivers/java/2.0/apidocs/com/datastax/driver/core/DataType.Name.html#asJavaC +lass(). + +|addRow(java.util.Map values) |Adds a +new row to the writer. Returns a CQLSSTableWriter object. This is +equivalent to the other addRow methods, but takes a map whose keys are +the names of the columns to add instead of taking a list of the values +in the order of the insert statement used during construction of this +SSTable writer. The column names in the map keys must be in lowercase +unless the declared column name is a case-sensitive quoted identifier in +which case the map key must use the exact case of the column. The values +parameter is a map of column name to column values representing the new +row to add. If a column is not included in the map, it's value will be +null. If the map contains keys that do not correspond to one of the +columns of the insert statement used when creating this SSTable writer, +the corresponding value is ignored. + +|addRow(java.lang.Object... values) |Adds a new row to the writer. +Returns a CQLSSTableWriter object. + +|CQLSSTableWriter.builder() |Returns a new builder for a +CQLSSTableWriter. + +|close() |Closes the writer. + +|rawAddRow(java.nio.ByteBuffer... values) |Adds a new row to the writer +given already serialized binary values. Returns a CQLSSTableWriter +object. The row values must correspond to the bind variables of the +insertion statement used when creating by this SSTable writer. + +|rawAddRow(java.util.List values) |Adds a new row +to the writer given already serialized binary values. Returns a +CQLSSTableWriter object. The row values must correspond to the bind +variables of the insertion statement used when creating by this SSTable +writer. + +|rawAddRow(java.util.Map values) +|Adds a new row to the writer given already serialized binary values. +Returns a CQLSSTableWriter object. The row values must correspond to the +bind variables of the insertion statement used when creating by this +SSTable writer. + +|getUDType(String dataType) |Returns the User Defined type used in this +SSTable Writer that can be used to create UDTValue instances. +|=== + +Other public methods the `CQLSSTableWriter.Builder` class provides are: + +[cols=",",options="header",] +|=== +|Method |Description +|inDirectory(String directory) |The directory where to write the +SSTables. This is a mandatory option. The directory to use should +already exist and be writable. + +|inDirectory(File directory) |The directory where to write the SSTables. +This is a mandatory option. The directory to use should already exist +and be writable. + +|forTable(String schema) |The schema (CREATE TABLE statement) for the +table for which SSTable is to be created. The provided CREATE TABLE +statement must use a fully-qualified table name, one that includes the +keyspace name. This is a mandatory option. + +|withPartitioner(IPartitioner partitioner) |The partitioner to use. By +default, Murmur3Partitioner will be used. If this is not the partitioner +used by the cluster for which the SSTables are created, the correct +partitioner needs to be provided. + +|using(String insert) |The INSERT or UPDATE statement defining the order +of the values to add for a given CQL row. The provided INSERT statement +must use a fully-qualified table name, one that includes the keyspace +name. Moreover, said statement must use bind variables since these +variables will be bound to values by the resulting SSTable writer. This +is a mandatory option. + +|withBufferSizeInMB(int size) |The size of the buffer to use. This +defines how much data will be buffered before being written as a new +SSTable. This corresponds roughly to the data size that will have the +created SSTable. The default is 128MB, which should be reasonable for a +1GB heap. If OutOfMemory exception gets generated while using the +SSTable writer, should lower this value. + +|sorted() |Creates a CQLSSTableWriter that expects sorted inputs. If +this option is used, the resulting SSTable writer will expect rows to be +added in SSTable sorted order (and an exception will be thrown if that +is not the case during row insertion). The SSTable sorted order means +that rows are added such that their partition keys respect the +partitioner order. This option should only be used if the rows can be +provided in order, which is rarely the case. If the rows can be provided +in order however, using this sorted might be more efficient. If this +option is used, some option like withBufferSizeInMB will be ignored. + +|build() |Builds a CQLSSTableWriter object. +|=== diff --git a/doc/modules/cassandra/pages/operating/cdc.adoc b/doc/modules/cassandra/pages/operating/cdc.adoc new file mode 100644 index 000000000000..b0d5c191daf0 --- /dev/null +++ b/doc/modules/cassandra/pages/operating/cdc.adoc @@ -0,0 +1,86 @@ += Change Data Capture + +== Overview + +Change data capture (CDC) provides a mechanism to flag specific tables +for archival as well as rejecting writes to those tables once a +configurable size-on-disk for the CDC log is reached. An operator can +enable CDC on a table by setting the table property `cdc=true` (either +when xref:cql/ddl.adoc#create-table[`creating the table`] or +xref:cql/ddl.adoc#alter-table[`altering it`]). Upon CommitLogSegment creation, +a hard-link to the segment is created in the directory specified in +`cassandra.yaml`. On segment fsync to disk, if CDC data is present +anywhere in the segment a _cdc.idx file is also created +with the integer offset of how much data in the original segment is +persisted to disk. Upon final segment flush, a second line with the +human-readable word "COMPLETED" will be added to the _cdc.idx file +indicating that Cassandra has completed all processing on the file. + +We we use an index file rather than just encouraging clients to parse +the log realtime off a memory mapped handle as data can be reflected in +a kernel buffer that is not yet persisted to disk. Parsing only up to +the listed offset in the _cdc.idx file will ensure that you only parse +CDC data for data that is durable. + +A threshold of total disk space allowed is specified in the yaml at +which time newly allocated CommitLogSegments will not allow CDC data +until a consumer parses and removes files from the specified cdc_raw +directory. + +== Configuration + +=== Enabling or disabling CDC on a table + +CDC is enable or disable through the [.title-ref]#cdc# table property, +for instance: + +[source,cql] +---- +CREATE TABLE foo (a int, b text, PRIMARY KEY(a)) WITH cdc=true; + +ALTER TABLE foo WITH cdc=true; + +ALTER TABLE foo WITH cdc=false; +---- + +=== cassandra.yaml parameters + +The following cassandra.yaml options are available for CDC: + +`cdc_enabled` (default: false):: + Enable or disable CDC operations node-wide. +`cdc_raw_directory` (default: `$CASSANDRA_HOME/data/cdc_raw`):: + Destination for CommitLogSegments to be moved after all corresponding + memtables are flushed. +`cdc_free_space_in_mb`: (default: min of 4096 and 1/8th volume space):: + Calculated as sum of all active CommitLogSegments that permit CDC + + all flushed CDC segments in `cdc_raw_directory`. +`cdc_free_space_check_interval_ms` (default: 250):: + When at capacity, we limit the frequency with which we re-calculate + the space taken up by `cdc_raw_directory` to prevent burning CPU + cycles unnecessarily. Default is to check 4 times per second. + +== Reading CommitLogSegments + +Use a +https://github.com/apache/cassandra/blob/e31e216234c6b57a531cae607e0355666007deb2/src/java/org/apache/cassandra/db/commitlog/CommitLogReader.java[CommitLogReader.java]. +Usage is +https://github.com/apache/cassandra/blob/e31e216234c6b57a531cae607e0355666007deb2/src/java/org/apache/cassandra/db/commitlog/CommitLogReplayer.java#L132-L140[fairly +straightforward] with a +https://github.com/apache/cassandra/blob/e31e216234c6b57a531cae607e0355666007deb2/src/java/org/apache/cassandra/db/commitlog/CommitLogReader.java#L71-L103[variety +of signatures] available for use. In order to handle mutations read from +disk, implement +https://github.com/apache/cassandra/blob/e31e216234c6b57a531cae607e0355666007deb2/src/java/org/apache/cassandra/db/commitlog/CommitLogReadHandler.java[CommitLogReadHandler]. + +== Warnings + +*Do not enable CDC without some kind of consumption process in-place.* + +If CDC is enabled on a node and then on a table, the +`cdc_free_space_in_mb` will fill up and then writes to CDC-enabled +tables will be rejected unless some consumption process is in place. + +== Further Reading + +* https://issues.apache.org/jira/browse/CASSANDRA-8844[JIRA ticket] +* https://issues.apache.org/jira/browse/CASSANDRA-12148[JIRA ticket] diff --git a/doc/modules/cassandra/pages/operating/compaction/index.adoc b/doc/modules/cassandra/pages/operating/compaction/index.adoc new file mode 100644 index 000000000000..880ff16afb67 --- /dev/null +++ b/doc/modules/cassandra/pages/operating/compaction/index.adoc @@ -0,0 +1,339 @@ += Compaction + +== Strategies + +Picking the right compaction strategy for your workload will ensure the +best performance for both querying and for compaction itself. + +xref:cql/operating/compaction/stcs.adoc[`Size Tiered Compaction Strategy (STCS)`]:: + The default compaction strategy. Useful as a fallback when other + strategies don't fit the workload. Most useful for non pure time + series workloads with spinning disks, or when the I/O from `LCS` + is too high. +xref:cql/operating/compaction/lcs.adoc[`Leveled Compaction Strategy (LCS)`]:: + Leveled Compaction Strategy (LCS) is optimized for read heavy + workloads, or workloads with lots of updates and deletes. It is not a + good choice for immutable time series data. +xref:cql/operating/compaction/twcs.adoc[`Time Window Compaction Strategy (TWCS)`]:: + Time Window Compaction Strategy is designed for TTL'ed, mostly + immutable time series data. + +== Types of compaction + +The concept of compaction is used for different kinds of operations in +Cassandra, the common thing about these operations is that it takes one +or more SSTables and output new SSTables. The types of compactions are: + +Minor compaction:: + triggered automatically in Cassandra. +Major compaction:: + a user executes a compaction over all SSTables on the node. +User defined compaction:: + a user triggers a compaction on a given set of SSTables. +Scrub:: + try to fix any broken SSTables. This can actually remove valid data if + that data is corrupted, if that happens you will need to run a full + repair on the node. +UpgradeSSTables:: + upgrade SSTables to the latest version. Run this after upgrading to a + new major version. +Cleanup:: + remove any ranges this node does not own anymore, typically triggered + on neighbouring nodes after a node has been bootstrapped since that + node will take ownership of some ranges from those nodes. +Secondary index rebuild:: + rebuild the secondary indexes on the node. +Anticompaction:: + after repair the ranges that were actually repaired are split out of + the SSTables that existed when repair started. +Sub range compaction:: + It is possible to only compact a given sub range - this could be + useful if you know a token that has been misbehaving - either + gathering many updates or many deletes. + (`nodetool compact -st x -et y`) will pick all SSTables containing the + range between x and y and issue a compaction for those SSTables. For + STCS this will most likely include all SSTables but with LCS it can + issue the compaction for a subset of the SSTables. With LCS the + resulting sstable will end up in L0. + +== When is a minor compaction triggered? + +* When an sstable is added to the node through flushing/streaming +* When autocompaction is enabled after being disabled (`nodetool enableautocompaction`) +* When compaction adds new SSTables +* A check for new minor compactions every 5 minutes + +== Merging SSTables + +Compaction is about merging SSTables, since partitions in SSTables are +sorted based on the hash of the partition key it is possible to +efficiently merge separate SSTables. Content of each partition is also +sorted so each partition can be merged efficiently. + +== Tombstones and Garbage Collection (GC) Grace + +=== Why Tombstones + +When a delete request is received by Cassandra it does not actually +remove the data from the underlying store. Instead it writes a special +piece of data known as a tombstone. The Tombstone represents the delete +and causes all values which occurred before the tombstone to not appear +in queries to the database. This approach is used instead of removing +values because of the distributed nature of Cassandra. + +=== Deletes without tombstones + +Imagine a three node cluster which has the value [A] replicated to every +node.: + +[source,none] +---- +[A], [A], [A] +---- + +If one of the nodes fails and and our delete operation only removes +existing values we can end up with a cluster that looks like: + +[source,none] +---- +[], [], [A] +---- + +Then a repair operation would replace the value of [A] back onto the two +nodes which are missing the value.: + +[source,none] +---- +[A], [A], [A] +---- + +This would cause our data to be resurrected even though it had been +deleted. + +=== Deletes with Tombstones + +Starting again with a three node cluster which has the value [A] +replicated to every node.: + +[source,none] +---- +[A], [A], [A] +---- + +If instead of removing data we add a tombstone record, our single node +failure situation will look like this.: + +[source,none] +---- +[A, Tombstone[A]], [A, Tombstone[A]], [A] +---- + +Now when we issue a repair the Tombstone will be copied to the replica, +rather than the deleted data being resurrected.: + +[source,none] +---- +[A, Tombstone[A]], [A, Tombstone[A]], [A, Tombstone[A]] +---- + +Our repair operation will correctly put the state of the system to what +we expect with the record [A] marked as deleted on all nodes. This does +mean we will end up accruing Tombstones which will permanently +accumulate disk space. To avoid keeping tombstones forever we have a +parameter known as `gc_grace_seconds` for every table in Cassandra. + +=== The gc_grace_seconds parameter and Tombstone Removal + +The table level `gc_grace_seconds` parameter controls how long Cassandra +will retain tombstones through compaction events before finally removing +them. This duration should directly reflect the amount of time a user +expects to allow before recovering a failed node. After +`gc_grace_seconds` has expired the tombstone may be removed (meaning +there will no longer be any record that a certain piece of data was +deleted), but as a tombstone can live in one sstable and the data it +covers in another, a compaction must also include both sstable for a +tombstone to be removed. More precisely, to be able to drop an actual +tombstone the following needs to be true; + +* The tombstone must be older than `gc_grace_seconds` +* If partition X contains the tombstone, the sstable containing the +partition plus all SSTables containing data older than the tombstone +containing X must be included in the same compaction. We don't need to +care if the partition is in an sstable if we can guarantee that all data +in that sstable is newer than the tombstone. If the tombstone is older +than the data it cannot shadow that data. +* If the option `only_purge_repaired_tombstones` is enabled, tombstones +are only removed if the data has also been repaired. + +If a node remains down or disconnected for longer than +`gc_grace_seconds` it's deleted data will be repaired back to the other +nodes and re-appear in the cluster. This is basically the same as in the +"Deletes without Tombstones" section. Note that tombstones will not be +removed until a compaction event even if `gc_grace_seconds` has elapsed. + +The default value for `gc_grace_seconds` is 864000 which is equivalent +to 10 days. This can be set when creating or altering a table using +`WITH gc_grace_seconds`. + +== TTL + +Data in Cassandra can have an additional property called time to live - +this is used to automatically drop data that has expired once the time +is reached. Once the TTL has expired the data is converted to a +tombstone which stays around for at least `gc_grace_seconds`. Note that +if you mix data with TTL and data without TTL (or just different length +of the TTL) Cassandra will have a hard time dropping the tombstones +created since the partition might span many SSTables and not all are +compacted at once. + +== Fully expired SSTables + +If an sstable contains only tombstones and it is guaranteed that that +sstable is not shadowing data in any other sstable compaction can drop +that sstable. If you see SSTables with only tombstones (note that TTL:ed +data is considered tombstones once the time to live has expired) but it +is not being dropped by compaction, it is likely that other SSTables +contain older data. There is a tool called `sstableexpiredblockers` that +will list which SSTables are droppable and which are blocking them from +being dropped. This is especially useful for time series compaction with +`TimeWindowCompactionStrategy` (and the deprecated +`DateTieredCompactionStrategy`). With `TimeWindowCompactionStrategy` it +is possible to remove the guarantee (not check for shadowing data) by +enabling `unsafe_aggressive_sstable_expiration`. + +== Repaired/unrepaired data + +With incremental repairs Cassandra must keep track of what data is +repaired and what data is unrepaired. With anticompaction repaired data +is split out into repaired and unrepaired SSTables. To avoid mixing up +the data again separate compaction strategy instances are run on the two +sets of data, each instance only knowing about either the repaired or +the unrepaired SSTables. This means that if you only run incremental +repair once and then never again, you might have very old data in the +repaired SSTables that block compaction from dropping tombstones in the +unrepaired (probably newer) SSTables. + +== Data directories + +Since tombstones and data can live in different SSTables it is important +to realize that losing an sstable might lead to data becoming live again +- the most common way of losing SSTables is to have a hard drive break +down. To avoid making data live tombstones and actual data are always in +the same data directory. This way, if a disk is lost, all versions of a +partition are lost and no data can get undeleted. To achieve this a +compaction strategy instance per data directory is run in addition to +the compaction strategy instances containing repaired/unrepaired data, +this means that if you have 4 data directories there will be 8 +compaction strategy instances running. This has a few more benefits than +just avoiding data getting undeleted: + +* It is possible to run more compactions in parallel - leveled +compaction will have several totally separate levelings and each one can +run compactions independently from the others. +* Users can backup and restore a single data directory. +* Note though that currently all data directories are considered equal, +so if you have a tiny disk and a big disk backing two data directories, +the big one will be limited the by the small one. One work around to +this is to create more data directories backed by the big disk. + +== Single sstable tombstone compaction + +When an sstable is written a histogram with the tombstone expiry times +is created and this is used to try to find SSTables with very many +tombstones and run single sstable compaction on that sstable in hope of +being able to drop tombstones in that sstable. Before starting this it +is also checked how likely it is that any tombstones will actually will +be able to be dropped how much this sstable overlaps with other +SSTables. To avoid most of these checks the compaction option +`unchecked_tombstone_compaction` can be enabled. + +[[compaction-options]] +== Common options + +There is a number of common options for all the compaction strategies; + +`enabled` (default: true):: + Whether minor compactions should run. Note that you can have + 'enabled': true as a compaction option and then do 'nodetool + enableautocompaction' to start running compactions. +`tombstone_threshold` (default: 0.2):: + How much of the sstable should be tombstones for us to consider doing + a single sstable compaction of that sstable. +`tombstone_compaction_interval` (default: 86400s (1 day)):: + Since it might not be possible to drop any tombstones when doing a + single sstable compaction we need to make sure that one sstable is not + constantly getting recompacted - this option states how often we + should try for a given sstable. +`log_all` (default: false):: + New detailed compaction logging, see + `below `. +`unchecked_tombstone_compaction` (default: false):: + The single sstable compaction has quite strict checks for whether it + should be started, this option disables those checks and for some + usecases this might be needed. Note that this does not change anything + for the actual compaction, tombstones are only dropped if it is safe + to do so - it might just rewrite an sstable without being able to drop + any tombstones. +`only_purge_repaired_tombstone` (default: false):: + Option to enable the extra safety of making sure that tombstones are + only dropped if the data has been repaired. +`min_threshold` (default: 4):: + Lower limit of number of SSTables before a compaction is triggered. + Not used for `LeveledCompactionStrategy`. +`max_threshold` (default: 32):: + Upper limit of number of SSTables before a compaction is triggered. + Not used for `LeveledCompactionStrategy`. + +Further, see the section on each strategy for specific additional +options. + +== Compaction nodetool commands + +The `nodetool ` utility provides a number of commands related +to compaction: + +`enableautocompaction`:: + Enable compaction. +`disableautocompaction`:: + Disable compaction. +`setcompactionthroughput`:: + How fast compaction should run at most - defaults to 16MB/s, but note + that it is likely not possible to reach this throughput. +`compactionstats`:: + Statistics about current and pending compactions. +`compactionhistory`:: + List details about the last compactions. +`setcompactionthreshold`:: + Set the min/max sstable count for when to trigger compaction, defaults + to 4/32. + +== Switching the compaction strategy and options using JMX + +It is possible to switch compaction strategies and its options on just a +single node using JMX, this is a great way to experiment with settings +without affecting the whole cluster. The mbean is: + +[source,none] +---- +org.apache.cassandra.db:type=ColumnFamilies,keyspace=,columnfamily= +---- + +and the attribute to change is `CompactionParameters` or +`CompactionParametersJson` if you use jconsole or jmc. The syntax for +the json version is the same as you would use in an +`ALTER TABLE ` statement -for example: + +[source,none] +---- +{ 'class': 'LeveledCompactionStrategy', 'sstable_size_in_mb': 123, 'fanout_size': 10} +---- + +The setting is kept until someone executes an +`ALTER TABLE ` that touches the compaction +settings or restarts the node. + +[[detailed-compaction-logging]] +== More detailed compaction logging + +Enable with the compaction option `log_all` and a more detailed +compaction log file will be produced in your log directory. diff --git a/doc/modules/cassandra/pages/operating/compaction/lcs.adoc b/doc/modules/cassandra/pages/operating/compaction/lcs.adoc new file mode 100644 index 000000000000..5b0adb8b4993 --- /dev/null +++ b/doc/modules/cassandra/pages/operating/compaction/lcs.adoc @@ -0,0 +1,81 @@ += Leveled Compaction Strategy + +[[lcs]] +The idea of `LeveledCompactionStrategy` (LCS) is that all sstables are +put into different levels where we guarantee that no overlapping +sstables are in the same level. By overlapping we mean that the +first/last token of a single sstable are never overlapping with other +sstables. This means that for a SELECT we will only have to look for the +partition key in a single sstable per level. Each level is 10x the size +of the previous one and each sstable is 160MB by default. L0 is where +sstables are streamed/flushed - no overlap guarantees are given here. + +When picking compaction candidates we have to make sure that the +compaction does not create overlap in the target level. This is done by +always including all overlapping sstables in the next level. For example +if we select an sstable in L3, we need to guarantee that we pick all +overlapping sstables in L4 and make sure that no currently ongoing +compactions will create overlap if we start that compaction. We can +start many parallel compactions in a level if we guarantee that we wont +create overlap. For L0 -> L1 compactions we almost always need to +include all L1 sstables since most L0 sstables cover the full range. We +also can't compact all L0 sstables with all L1 sstables in a single +compaction since that can use too much memory. + +When deciding which level to compact LCS checks the higher levels first +(with LCS, a "higher" level is one with a higher number, L0 being the +lowest one) and if the level is behind a compaction will be started in +that level. + +== Major compaction + +It is possible to do a major compaction with LCS - it will currently +start by filling out L1 and then once L1 is full, it continues with L2 +etc. This is sub optimal and will change to create all the sstables in a +high level instead, CASSANDRA-11817. + +== Bootstrapping + +During bootstrap sstables are streamed from other nodes. The level of +the remote sstable is kept to avoid many compactions after the bootstrap +is done. During bootstrap the new node also takes writes while it is +streaming the data from a remote node - these writes are flushed to L0 +like all other writes and to avoid those sstables blocking the remote +sstables from going to the correct level, we only do STCS in L0 until +the bootstrap is done. + +== STCS in L0 + +If LCS gets very many L0 sstables reads are going to hit all (or most) +of the L0 sstables since they are likely to be overlapping. To more +quickly remedy this LCS does STCS compactions in L0 if there are more +than 32 sstables there. This should improve read performance more +quickly compared to letting LCS do its L0 -> L1 compactions. If you keep +getting too many sstables in L0 it is likely that LCS is not the best +fit for your workload and STCS could work out better. + +== Starved sstables + +If a node ends up with a leveling where there are a few very high level +sstables that are not getting compacted they might make it impossible +for lower levels to drop tombstones etc. For example, if there are +sstables in L6 but there is only enough data to actually get a L4 on the +node the left over sstables in L6 will get starved and not compacted. +This can happen if a user changes sstable_size_in_mb from 5MB to 160MB +for example. To avoid this LCS tries to include those starved high level +sstables in other compactions if there has been 25 compaction rounds +where the highest level has not been involved. + +[[lcs_options]] +== LCS options + +`sstable_size_in_mb` (default: 160MB):: + The target compressed (if using compression) sstable size - the + sstables can end up being larger if there are very large partitions on + the node. +`fanout_size` (default: 10):: + The target size of levels increases by this fanout_size multiplier. + You can reduce the space amplification by tuning this option. + +LCS also support the `cassandra.disable_stcs_in_l0` startup option +(`-Dcassandra.disable_stcs_in_l0=true`) to avoid doing STCS in L0. diff --git a/doc/modules/cassandra/pages/operating/compaction/stcs.adoc b/doc/modules/cassandra/pages/operating/compaction/stcs.adoc new file mode 100644 index 000000000000..5a087f6ae93b --- /dev/null +++ b/doc/modules/cassandra/pages/operating/compaction/stcs.adoc @@ -0,0 +1,42 @@ += Leveled Compaction Strategy + +[[stcs]] +The basic idea of `SizeTieredCompactionStrategy` (STCS) is to merge +sstables of approximately the same size. All sstables are put in +different buckets depending on their size. An sstable is added to the +bucket if size of the sstable is within `bucket_low` and `bucket_high` +of the current average size of the sstables already in the bucket. This +will create several buckets and the most interesting of those buckets +will be compacted. The most interesting one is decided by figuring out +which bucket's sstables takes the most reads. + +== Major compaction + +When running a major compaction with STCS you will end up with two +sstables per data directory (one for repaired data and one for +unrepaired data). There is also an option (-s) to do a major compaction +that splits the output into several sstables. The sizes of the sstables +are approximately 50%, 25%, 12.5%... of the total size. + +[[stcs_options]] +== STCS options + +`min_sstable_size` (default: 50MB):: + Sstables smaller than this are put in the same bucket. +`bucket_low` (default: 0.5):: + How much smaller than the average size of a bucket a sstable should be + before not being included in the bucket. That is, if + `bucket_low * avg_bucket_size < sstable_size` (and the `bucket_high` + condition holds, see below), then the sstable is added to the bucket. +`bucket_high` (default: 1.5):: + How much bigger than the average size of a bucket a sstable should be + before not being included in the bucket. That is, if + `sstable_size < bucket_high * avg_bucket_size` (and the `bucket_low` + condition holds, see above), then the sstable is added to the bucket. + +== Defragmentation + +Defragmentation is done when many sstables are touched during a read. +The result of the read is put in to the memtable so that the next read +will not have to touch as many sstables. This can cause writes on a +read-only-cluster. diff --git a/doc/modules/cassandra/pages/operating/compaction/twcs.adoc b/doc/modules/cassandra/pages/operating/compaction/twcs.adoc new file mode 100644 index 000000000000..21c44f51769d --- /dev/null +++ b/doc/modules/cassandra/pages/operating/compaction/twcs.adoc @@ -0,0 +1,75 @@ += Time Window CompactionStrategy + +[[twcs]] +`TimeWindowCompactionStrategy` (TWCS) is designed specifically for +workloads where it's beneficial to have data on disk grouped by the +timestamp of the data, a common goal when the workload is time-series in +nature or when all data is written with a TTL. In an expiring/TTL +workload, the contents of an entire SSTable likely expire at +approximately the same time, allowing them to be dropped completely, and +space reclaimed much more reliably than when using +`SizeTieredCompactionStrategy` or `LeveledCompactionStrategy`. The basic +concept is that `TimeWindowCompactionStrategy` will create one sstable per +file for a given window, where a window is simply calculated as the +combination of two primary options: + +[[twcs_options]] + +`compaction_window_unit` (default: DAYS):: + A Java TimeUnit (MINUTES, HOURS, or DAYS). +`compaction_window_size` (default: 1):: + The number of units that make up a window. +`unsafe_aggressive_sstable_expiration` (default: false):: + Expired sstables will be dropped without checking its data is + shadowing other sstables. This is a potentially risky option that can + lead to data loss or deleted data re-appearing, going beyond what + unchecked_tombstone_compaction does for single sstable + compaction. Due to the risk the jvm must also be started with + `-Dcassandra.unsafe_aggressive_sstable_expiration=true`. + +Taken together, the operator can specify windows of virtually any size, +and `TimeWindowCompactionStrategy` will work to create a +single sstable for writes within that window. For efficiency during +writing, the newest window will be compacted using +`SizeTieredCompactionStrategy`. + +Ideally, operators should select a `compaction_window_unit` and +`compaction_window_size` pair that produces approximately 20-30 windows +- if writing with a 90 day TTL, for example, a 3 Day window would be a +reasonable choice +(`'compaction_window_unit':'DAYS','compaction_window_size':3`). + +== TimeWindowCompactionStrategy Operational Concerns + +The primary motivation for TWCS is to separate data on disk by timestamp +and to allow fully expired SSTables to drop more efficiently. One +potential way this optimal behavior can be subverted is if data is +written to SSTables out of order, with new data and old data in the same +SSTable. Out of order data can appear in two ways: + +* If the user mixes old data and new data in the traditional write path, +the data will be comingled in the memtables and flushed into the same +SSTable, where it will remain comingled. +* If the user's read requests for old data cause read repairs that pull +old data into the current memtable, that data will be comingled and +flushed into the same SSTable. + +While TWCS tries to minimize the impact of comingled data, users should +attempt to avoid this behavior. Specifically, users should avoid queries +that explicitly set the timestamp via CQL `USING TIMESTAMP`. +Additionally, users should run frequent repairs (which streams data in +such a way that it does not become comingled). + +== Changing TimeWindowCompactionStrategy Options + +Operators wishing to enable `TimeWindowCompactionStrategy` on existing +data should consider running a major compaction first, placing all +existing data into a single (old) window. Subsequent newer writes will +then create typical SSTables as expected. + +Operators wishing to change `compaction_window_unit` or +`compaction_window_size` can do so, but may trigger additional +compactions as adjacent windows are joined together. If the window size +is decrease d (for example, from 24 hours to 12 hours), then the +existing SSTables will not be modified - TWCS can not split existing +SSTables into multiple windows. diff --git a/doc/modules/cassandra/pages/operating/compression.adoc b/doc/modules/cassandra/pages/operating/compression.adoc new file mode 100644 index 000000000000..e6f8d50df1aa --- /dev/null +++ b/doc/modules/cassandra/pages/operating/compression.adoc @@ -0,0 +1,187 @@ += Compression + +Cassandra offers operators the ability to configure compression on a +per-table basis. Compression reduces the size of data on disk by +compressing the SSTable in user-configurable compression +`chunk_length_in_kb`. As Cassandra SSTables are immutable, the CPU cost +of compressing is only necessary when the SSTable is written - +subsequent updates to data will land in different SSTables, so Cassandra +will not need to decompress, overwrite, and recompress data when UPDATE +commands are issued. On reads, Cassandra will locate the relevant +compressed chunks on disk, decompress the full chunk, and then proceed +with the remainder of the read path (merging data from disks and +memtables, read repair, and so on). + +Compression algorithms typically trade off between the following three +areas: + +* *Compression speed*: How fast does the compression algorithm compress +data. This is critical in the flush and compaction paths because data +must be compressed before it is written to disk. +* *Decompression speed*: How fast does the compression algorithm +de-compress data. This is critical in the read and compaction paths as +data must be read off disk in a full chunk and decompressed before it +can be returned. +* *Ratio*: By what ratio is the uncompressed data reduced by. Cassandra +typically measures this as the size of data on disk relative to the +uncompressed size. For example a ratio of `0.5` means that the data on +disk is 50% the size of the uncompressed data. Cassandra exposes this +ratio per table as the `SSTable Compression Ratio` field of +`nodetool tablestats`. + +Cassandra offers five compression algorithms by default that make +different tradeoffs in these areas. While benchmarking compression +algorithms depends on many factors (algorithm parameters such as +compression level, the compressibility of the input data, underlying +processor class, etc ...), the following table should help you pick a +starting point based on your application's requirements with an +extremely rough grading of the different choices by their performance in +these areas (A is relatively good, F is relatively bad): + +[width="100%",cols="40%,19%,11%,13%,6%,11%",options="header",] +|=== +|Compression Algorithm |Cassandra Class |Compression |Decompression +|Ratio |C* Version + +|https://lz4.github.io/lz4/[LZ4] |`LZ4Compressor` | A+ | A+ | C+ | `>=1.2.2` + +|https://lz4.github.io/lz4/[LZ4HC] |`LZ4Compressor` | C+ | A+ | B+ | `>= 3.6` + +|https://facebook.github.io/zstd/[Zstd] |`ZstdCompressor` | A- | A- | A+ | `>= 4.0` + +|http://google.github.io/snappy/[Snappy] |`SnappyCompressor` | A- | A | C | `>= 1.0` + +|https://zlib.net[Deflate (zlib)] |`DeflateCompressor` | C | C | A | `>= 1.0` +|=== + +Generally speaking for a performance critical (latency or throughput) +application `LZ4` is the right choice as it gets excellent ratio per CPU +cycle spent. This is why it is the default choice in Cassandra. + +For storage critical applications (disk footprint), however, `Zstd` may +be a better choice as it can get significant additional ratio to `LZ4`. + +`Snappy` is kept for backwards compatibility and `LZ4` will typically be +preferable. + +`Deflate` is kept for backwards compatibility and `Zstd` will typically +be preferable. + +== Configuring Compression + +Compression is configured on a per-table basis as an optional argument +to `CREATE TABLE` or `ALTER TABLE`. Three options are available for all +compressors: + +* `class` (default: `LZ4Compressor`): specifies the compression class to +use. The two "fast" compressors are `LZ4Compressor` and +`SnappyCompressor` and the two "good" ratio compressors are +`ZstdCompressor` and `DeflateCompressor`. +* `chunk_length_in_kb` (default: `16KiB`): specifies the number of +kilobytes of data per compression chunk. The main tradeoff here is that +larger chunk sizes give compression algorithms more context and improve +their ratio, but require reads to deserialize and read more off disk. +* `crc_check_chance` (default: `1.0`): determines how likely Cassandra +is to verify the checksum on each compression chunk during reads to +protect against data corruption. Unless you have profiles indicating +this is a performance problem it is highly encouraged not to turn this +off as it is Cassandra's only protection against bitrot. + +The `LZ4Compressor` supports the following additional options: + +* `lz4_compressor_type` (default `fast`): specifies if we should use the +`high` (a.k.a `LZ4HC`) ratio version or the `fast` (a.k.a `LZ4`) version +of `LZ4`. The `high` mode supports a configurable level, which can allow +operators to tune the performance <-> ratio tradeoff via the +`lz4_high_compressor_level` option. Note that in `4.0` and above it may +be preferable to use the `Zstd` compressor. +* `lz4_high_compressor_level` (default `9`): A number between `1` and +`17` inclusive that represents how much CPU time to spend trying to get +more compression ratio. Generally lower levels are "faster" but they get +less ratio and higher levels are slower but get more compression ratio. + +The `ZstdCompressor` supports the following options in addition: + +* `compression_level` (default `3`): A number between `-131072` and `22` +inclusive that represents how much CPU time to spend trying to get more +compression ratio. The lower the level, the faster the speed (at the +cost of ratio). Values from 20 to 22 are called "ultra levels" and +should be used with caution, as they require more memory. The default of +`3` is a good choice for competing with `Deflate` ratios and `1` is a +good choice for competing with `LZ4`. + +Users can set compression using the following syntax: + +[source,cql] +---- +CREATE TABLE keyspace.table (id int PRIMARY KEY) + WITH compression = {'class': 'LZ4Compressor'}; +---- + +Or + +[source,cql] +---- +ALTER TABLE keyspace.table + WITH compression = {'class': 'LZ4Compressor', 'chunk_length_in_kb': 64, 'crc_check_chance': 0.5}; +---- + +Once enabled, compression can be disabled with `ALTER TABLE` setting +`enabled` to `false`: + +[source,cql] +---- +ALTER TABLE keyspace.table + WITH compression = {'enabled':'false'}; +---- + +Operators should be aware, however, that changing compression is not +immediate. The data is compressed when the SSTable is written, and as +SSTables are immutable, the compression will not be modified until the +table is compacted. Upon issuing a change to the compression options via +`ALTER TABLE`, the existing SSTables will not be modified until they are +compacted - if an operator needs compression changes to take effect +immediately, the operator can trigger an SSTable rewrite using +`nodetool scrub` or `nodetool upgradesstables -a`, both of which will +rebuild the SSTables on disk, re-compressing the data in the process. + +== Benefits and Uses + +Compression's primary benefit is that it reduces the amount of data +written to disk. Not only does the reduced size save in storage +requirements, it often increases read and write throughput, as the CPU +overhead of compressing data is faster than the time it would take to +read or write the larger volume of uncompressed data from disk. + +Compression is most useful in tables comprised of many rows, where the +rows are similar in nature. Tables containing similar text columns (such +as repeated JSON blobs) often compress very well. Tables containing data +that has already been compressed or random data (e.g. benchmark +datasets) do not typically compress well. + +== Operational Impact + +* Compression metadata is stored off-heap and scales with data on disk. +This often requires 1-3GB of off-heap RAM per terabyte of data on disk, +though the exact usage varies with `chunk_length_in_kb` and compression +ratios. +* Streaming operations involve compressing and decompressing data on +compressed tables - in some code paths (such as non-vnode bootstrap), +the CPU overhead of compression can be a limiting factor. +* To prevent slow compressors (`Zstd`, `Deflate`, `LZ4HC`) from blocking +flushes for too long, all three flush with the default fast `LZ4` +compressor and then rely on normal compaction to re-compress the data +into the desired compression strategy. See [.title-ref]#CASSANDRA-15379 +# for more +details. +* The compression path checksums data to ensure correctness - while the +traditional Cassandra read path does not have a way to ensure +correctness of data on disk, compressed tables allow the user to set +`crc_check_chance` (a float from 0.0 to 1.0) to allow Cassandra to +probabilistically validate chunks on read to verify bits on disk are not +corrupt. + +== Advanced Use + +Advanced users can provide their own compression class by implementing +the interface at `org.apache.cassandra.io.compress.ICompressor`. diff --git a/doc/modules/cassandra/pages/operating/hardware.adoc b/doc/modules/cassandra/pages/operating/hardware.adoc new file mode 100644 index 000000000000..24938ad2a816 --- /dev/null +++ b/doc/modules/cassandra/pages/operating/hardware.adoc @@ -0,0 +1,100 @@ += Hardware Choices + +Like most databases, Cassandra throughput improves with more CPU cores, +more RAM, and faster disks. While Cassandra can be made to run on small +servers for testing or development environments (including Raspberry +Pis), a minimal production server requires at least 2 cores, and at +least 8GB of RAM. Typical production servers have 8 or more cores and at +least 32GB of RAM. + +== CPU + +Cassandra is highly concurrent, handling many simultaneous requests +(both read and write) using multiple threads running on as many CPU +cores as possible. The Cassandra write path tends to be heavily +optimized (writing to the commitlog and then inserting the data into the +memtable), so writes, in particular, tend to be CPU bound. Consequently, +adding additional CPU cores often increases throughput of both reads and +writes. + +== Memory + +Cassandra runs within a Java VM, which will pre-allocate a fixed size +heap (java's Xmx system parameter). In addition to the heap, Cassandra +will use significant amounts of RAM offheap for compression metadata, +bloom filters, row, key, and counter caches, and an in process page +cache. Finally, Cassandra will take advantage of the operating system's +page cache, storing recently accessed portions files in RAM for rapid +re-use. + +For optimal performance, operators should benchmark and tune their +clusters based on their individual workload. However, basic guidelines +suggest: + +* ECC RAM should always be used, as Cassandra has few internal +safeguards to protect against bit level corruption +* The Cassandra heap should be no less than 2GB, and no more than 50% of +your system RAM +* Heaps smaller than 12GB should consider ParNew/ConcurrentMarkSweep +garbage collection +* Heaps larger than 12GB should consider either: +** 16GB heap with 8-10GB of new gen, a survivor ratio of 4-6, and a maximum +tenuring threshold of 6 +** G1GC + +== Disks + +Cassandra persists data to disk for two very different purposes. The +first is to the commitlog when a new write is made so that it can be +replayed after a crash or system shutdown. The second is to the data +directory when thresholds are exceeded and memtables are flushed to disk +as SSTables. + +Commitlogs receive every write made to a Cassandra node and have the +potential to block client operations, but they are only ever read on +node start-up. SSTable (data file) writes on the other hand occur +asynchronously, but are read to satisfy client look-ups. SSTables are +also periodically merged and rewritten in a process called compaction. +The data held in the commitlog directory is data that has not been +permanently saved to the SSTable data directories - it will be +periodically purged once it is flushed to the SSTable data files. + +Cassandra performs very well on both spinning hard drives and solid +state disks. In both cases, Cassandra's sorted immutable SSTables allow +for linear reads, few seeks, and few overwrites, maximizing throughput +for HDDs and lifespan of SSDs by avoiding write amplification. However, +when using spinning disks, it's important that the commitlog +(`commitlog_directory`) be on one physical disk (not simply a partition, +but a physical disk), and the data files (`data_file_directories`) be +set to a separate physical disk. By separating the commitlog from the +data directory, writes can benefit from sequential appends to the +commitlog without having to seek around the platter as reads request +data from various SSTables on disk. + +In most cases, Cassandra is designed to provide redundancy via multiple +independent, inexpensive servers. For this reason, using NFS or a SAN +for data directories is an antipattern and should typically be avoided. +Similarly, servers with multiple disks are often better served by using +RAID0 or JBOD than RAID1 or RAID5 - replication provided by Cassandra +obsoletes the need for replication at the disk layer, so it's typically +recommended that operators take advantage of the additional throughput +of RAID0 rather than protecting against failures with RAID1 or RAID5. + +== Common Cloud Choices + +Many large users of Cassandra run in various clouds, including AWS, +Azure, and GCE - Cassandra will happily run in any of these +environments. Users should choose similar hardware to what would be +needed in physical space. In EC2, popular options include: + +* i2 instances, which provide both a high RAM:CPU ratio and local +ephemeral SSDs +* i3 instances with NVMe disks +** EBS works okay if you want easy backups and replacements +* m4.2xlarge / c4.4xlarge instances, which provide modern CPUs, enhanced +networking and work well with EBS GP2 (SSD) storage + +Generally, disk and network performance increases with instance size and +generation, so newer generations of instances and larger instance types +within each family often perform better than their smaller or older +alternatives. diff --git a/doc/modules/cassandra/pages/operating/hints.adoc b/doc/modules/cassandra/pages/operating/hints.adoc new file mode 100644 index 000000000000..5e34093b42c5 --- /dev/null +++ b/doc/modules/cassandra/pages/operating/hints.adoc @@ -0,0 +1,248 @@ += Hints + +Hinting is a data repair technique applied during write operations. When +replica nodes are unavailable to accept a mutation, either due to +failure or more commonly routine maintenance, coordinators attempting to +write to those replicas store temporary hints on their local filesystem +for later application to the unavailable replica. Hints are an important +way to help reduce the duration of data inconsistency. Coordinators +replay hints quickly after unavailable replica nodes return to the ring. +Hints are best effort, however, and do not guarantee eventual +consistency like xref:operating/repair.adoc[`anti-entropy repair`] does. + +Hints are useful because of how Apache Cassandra replicates data to +provide fault tolerance, high availability and durability. Cassandra +xref:architecture/dynamo.adoc#consistent-hashing-using-a-token-ring[`partitions data across the cluster`] using +consistent hashing, and then replicates keys to multiple nodes along the +hash ring. To guarantee availability, all replicas of a key can accept +mutations without consensus, but this means it is possible for some +replicas to accept a mutation while others do not. When this happens an +inconsistency is introduced. + +Hints are one of the three ways, in addition to read-repair and +full/incremental anti-entropy repair, that Cassandra implements the +eventual consistency guarantee that all updates are eventually received +by all replicas. Hints, like read-repair, are best effort and not an +alternative to performing full repair, but they do help reduce the +duration of inconsistency between replicas in practice. + +== Hinted Handoff + +Hinted handoff is the process by which Cassandra applies hints to +unavailable nodes. + +For example, consider a mutation is to be made at `Consistency Level` +`LOCAL_QUORUM` against a keyspace with `Replication Factor` of `3`. +Normally the client sends the mutation to a single coordinator, who then +sends the mutation to all three replicas, and when two of the three +replicas acknowledge the mutation the coordinator responds successfully +to the client. If a replica node is unavailable, however, the +coordinator stores a hint locally to the filesystem for later +application. New hints will be retained for up to +`max_hint_window_in_ms` of downtime (defaults to `3 hours`). If the +unavailable replica does return to the cluster before the window +expires, the coordinator applies any pending hinted mutations against +the replica to ensure that eventual consistency is maintained. + +image::hints.svg[Hinted Handoff in Action] + +* (`t0`): The write is sent by the client, and the coordinator sends it +to the three replicas. Unfortunately `replica_2` is restarting and +cannot receive the mutation. +* (`t1`): The client receives a quorum acknowledgement from the +coordinator. At this point the client believe the write to be durable +and visible to reads (which it is). +* (`t2`): After the write timeout (default `2s`), the coordinator +decides that `replica_2` is unavailable and stores a hint to its local +disk. +* (`t3`): Later, when `replica_2` starts back up it sends a gossip +message to all nodes, including the coordinator. +* (`t4`): The coordinator replays hints including the missed mutation +against `replica_2`. + +If the node does not return in time, the destination replica will be +permanently out of sync until either read-repair or full/incremental +anti-entropy repair propagates the mutation. + +=== Application of Hints + +Hints are streamed in bulk, a segment at a time, to the target replica +node and the target node replays them locally. After the target node has +replayed a segment it deletes the segment and receives the next segment. +This continues until all hints are drained. + +=== Storage of Hints on Disk + +Hints are stored in flat files in the coordinator node’s +`$CASSANDRA_HOME/data/hints` directory. A hint includes a hint id, the +target replica node on which the mutation is meant to be stored, the +serialized mutation (stored as a blob) that couldn't be delivered to the +replica node, the mutation timestamp, and the Cassandra version used to +serialize the mutation. By default hints are compressed using +`LZ4Compressor`. Multiple hints are appended to the same hints file. + +Since hints contain the original unmodified mutation timestamp, hint +application is idempotent and cannot overwrite a future mutation. + +=== Hints for Timed Out Write Requests + +Hints are also stored for write requests that time out. The +`write_request_timeout_in_ms` setting in `cassandra.yaml` configures the +timeout for write requests. + +[source,none] +---- +write_request_timeout_in_ms: 2000 +---- + +The coordinator waits for the configured amount of time for write +requests to complete, at which point it will time out and generate a +hint for the timed out request. The lowest acceptable value for +`write_request_timeout_in_ms` is 10 ms. + +== Configuring Hints + +Hints are enabled by default as they are critical for data consistency. +The `cassandra.yaml` configuration file provides several settings for +configuring hints: + +Table 1. Settings for Hints + +[width="100%",cols="38%,36%,26%",] +|=== +|Setting |Description |Default Value + +|`hinted_handoff_enabled` |Enables/Disables hinted handoffs |`true` + +|`hinted_handoff_disabled_datacenters` a| +A list of data centers that do not perform hinted handoffs even when +handoff is otherwise enabled. Example: + +a| +[source,yaml] +---- +hinted_handoff_disabled_datacenters: + - DC1 + - DC2 +---- + +|`unset` + +|`max_hint_window_in_ms` |Defines the maximum amount of time (ms) a node +shall have hints generated after it has failed. |`10800000` # 3 hours + +|`hinted_handoff_throttle_in_kb` |Maximum throttle in KBs per second, +per delivery thread. This will be reduced proportionally to the number +of nodes in the cluster. (If there are two nodes in the cluster, each +delivery thread will use the maximum rate; if there are 3, each will +throttle to half of the maximum,since it is expected for two nodes to be +delivering hints simultaneously.) |`1024` + +|`max_hints_delivery_threads` |Number of threads with which to deliver +hints; Consider increasing this number when you have multi-dc +deployments, since cross-dc handoff tends to be slower |`2` + +|`hints_directory` |Directory where Cassandra stores hints. +|`$CASSANDRA_HOME/data/hints` + +|`hints_flush_period_in_ms` |How often hints should be flushed from the +internal buffers to disk. Will _not_ trigger fsync. |`10000` + +|`max_hints_file_size_in_mb` |Maximum size for a single hints file, in +megabytes. |`128` + +|`hints_compression` |Compression to apply to the hint files. If +omitted, hints files will be written uncompressed. LZ4, Snappy, and +Deflate compressors are supported. |`LZ4Compressor` +|=== + +== Configuring Hints at Runtime with `nodetool` + +`nodetool` provides several commands for configuring hints or getting +hints related information. The nodetool commands override the +corresponding settings if any in `cassandra.yaml` for the node running +the command. + +Table 2. Nodetool Commands for Hints + +[width="100%",cols="43%,57%",] +|=== +|Command |Description + +|`nodetool disablehandoff` |Disables storing and delivering hints + +|`nodetool disablehintsfordc` |Disables storing and delivering hints to +a data center + +|`nodetool enablehandoff` |Re-enables future hints storing and delivery +on the current node + +|`nodetool enablehintsfordc` |Enables hints for a data center that was +previously disabled + +|`nodetool getmaxhintwindow` |Prints the max hint window in ms. New in +Cassandra 4.0. + +|`nodetool handoffwindow` |Prints current hinted handoff window + +|`nodetool pausehandoff` |Pauses hints delivery process + +|`nodetool resumehandoff` |Resumes hints delivery process + +|`nodetool sethintedhandoffthrottlekb` |Sets hinted handoff throttle in +kb per second, per delivery thread + +|`nodetool setmaxhintwindow` |Sets the specified max hint window in ms + +|`nodetool statushandoff` |Status of storing future hints on the current +node + +|`nodetool truncatehints` |Truncates all hints on the local node, or +truncates hints for the endpoint(s) specified. +|=== + +=== Make Hints Play Faster at Runtime + +The default of `1024 kbps` handoff throttle is conservative for most +modern networks, and it is entirely possible that in a simple node +restart you may accumulate many gigabytes hints that may take hours to +play back. For example if you are ingesting `100 Mbps` of data per node, +a single 10 minute long restart will create +`10 minutes * (100 megabit / second) ~= 7 GiB` of data which at +`(1024 KiB / second)` would take +`7.5 GiB / (1024 KiB / second) = 2.03 hours` to play back. The exact +math depends on the load balancing strategy (round robin is better than +token aware), number of tokens per node (more tokens is better than +fewer), and naturally the cluster's write rate, but regardless you may +find yourself wanting to increase this throttle at runtime. + +If you find yourself in such a situation, you may consider raising the +`hinted_handoff_throttle` dynamically via the +`nodetool sethintedhandoffthrottlekb` command. + +=== Allow a Node to be Down Longer at Runtime + +Sometimes a node may be down for more than the normal +`max_hint_window_in_ms`, (default of three hours), but the hardware and +data itself will still be accessible. In such a case you may consider +raising the `max_hint_window_in_ms` dynamically via the +`nodetool setmaxhintwindow` command added in Cassandra 4.0 +(https://issues.apache.org/jira/browse/CASSANDRA-11720[CASSANDRA-11720]). +This will instruct Cassandra to continue holding hints for the down +endpoint for a longer amount of time. + +This command should be applied on all nodes in the cluster that may be +holding hints. If needed, the setting can be applied permanently by +setting the `max_hint_window_in_ms` setting in `cassandra.yaml` followed +by a rolling restart. + +== Monitoring Hint Delivery + +Cassandra 4.0 adds histograms available to understand how long it takes +to deliver hints which is useful for operators to better identify +problems +(https://issues.apache.org/jira/browse/CASSANDRA-13234[CASSANDRA-13234]). + +There are also metrics available for tracking +`Hinted Handoff ` and +`Hints Service ` metrics. diff --git a/doc/modules/cassandra/pages/operating/index.adoc b/doc/modules/cassandra/pages/operating/index.adoc new file mode 100644 index 000000000000..367430a2eff6 --- /dev/null +++ b/doc/modules/cassandra/pages/operating/index.adoc @@ -0,0 +1,15 @@ +== Operating Cassandra + +* xref:operating/hardware.adoc[Hardware] +* xref:operating/security.adoc[Security] +* xref:operating/topo_changes.adoc[Topology changes] +* xref:operating/hints.adoc[Hints] +* xref:operating/repair.adoc[Repair] +* xref:operating/read_repair.adoc[Read repair] +* xref:operating/backups.adoc[Backups] +* xref:operating/compression.adoc[Compression] +* xref:operating/compaction/index.adoc[Compaction] +* xref:operating/metrics.adoc[Monitoring] +* xref:operating/bulk_loading.adoc[Bulk loading] +* xref:operating/cdc.adoc[CDC] +* xref:operating/bloom_filters.adoc[Bloom filters] diff --git a/doc/modules/cassandra/pages/operating/metrics.adoc b/doc/modules/cassandra/pages/operating/metrics.adoc new file mode 100644 index 000000000000..1eb8156dc20a --- /dev/null +++ b/doc/modules/cassandra/pages/operating/metrics.adoc @@ -0,0 +1,1088 @@ += Monitoring + +Metrics in Cassandra are managed using the +http://metrics.dropwizard.io[Dropwizard Metrics] library. These metrics +can be queried via JMX or pushed to external monitoring systems using a +number of +http://metrics.dropwizard.io/3.1.0/getting-started/#other-reporting[built +in] and http://metrics.dropwizard.io/3.1.0/manual/third-party/[third +party] reporter plugins. + +Metrics are collected for a single node. It's up to the operator to use +an external monitoring system to aggregate them. + +== Metric Types + +All metrics reported by cassandra fit into one of the following types. + +`Gauge`:: + An instantaneous measurement of a value. +`Counter`:: + A gauge for an `AtomicLong` instance. Typically this is consumed by + monitoring the change since the last call to see if there is a large + increase compared to the norm. +`Histogram`:: + Measures the statistical distribution of values in a stream of data. + + + In addition to minimum, maximum, mean, etc., it also measures median, + 75th, 90th, 95th, 98th, 99th, and 99.9th percentiles. +`Timer`:: + Measures both the rate that a particular piece of code is called and + the histogram of its duration. +`Latency`:: + Special type that tracks latency (in microseconds) with a `Timer` plus + a `Counter` that tracks the total latency accrued since starting. The + former is useful if you track the change in total latency since the + last check. Each metric name of this type will have 'Latency' and + 'TotalLatency' appended to it. +`Meter`:: + A meter metric which measures mean throughput and one-, five-, and + fifteen-minute exponentially-weighted moving average throughputs. + +== Table Metrics + +Each table in Cassandra has metrics responsible for tracking its state +and performance. + +The metric names are all appended with the specific `Keyspace` and +`Table` name. + +Reported name format: + +*Metric Name*:: + `org.apache.cassandra.metrics.Table...
` +*JMX MBean*:: + `org.apache.cassandra.metrics:type=Table keyspace= scope=
name=` + +[NOTE] +.Note +==== +There is a special table called '`all`' without a keyspace. This +represents the aggregation of metrics across *all* tables and keyspaces +on the node. +====[cols=",,",options="header",] +|=== +|Name |Type |Description +|MemtableOnHeapSize |Gauge |Total amount of data stored in the +memtable that resides *on*-heap, including column related overhead and +partitions overwritten. + +|MemtableOffHeapSize |Gauge |Total amount of data stored in the +memtable that resides *off*-heap, including column related overhead and +partitions overwritten. + +|MemtableLiveDataSize |Gauge |Total amount of live data stored in +the memtable, excluding any data structure overhead. + +|AllMemtablesOnHeapSize |Gauge |Total amount of data stored in the +memtables (2i and pending flush memtables included) that resides +*on*-heap. + +|AllMemtablesOffHeapSize |Gauge |Total amount of data stored in +the memtables (2i and pending flush memtables included) that resides +*off*-heap. + +|AllMemtablesLiveDataSize |Gauge |Total amount of live data stored +in the memtables (2i and pending flush memtables included) that resides +off-heap, excluding any data structure overhead. + +|MemtableColumnsCount |Gauge |Total number of columns present in +the memtable. + +|MemtableSwitchCount |Counter |Number of times flush has resulted in the +memtable being switched out. + +|CompressionRatio |Gauge |Current compression ratio for all +SSTables. + +|EstimatedPartitionSizeHistogram |Gauge |Histogram of estimated +partition size (in bytes). + +|EstimatedPartitionCount |Gauge |Approximate number of keys in +table. + +|EstimatedColumnCountHistogram |Gauge |Histogram of estimated +number of columns. + +|SSTablesPerReadHistogram |Histogram |Histogram of the number of sstable +data files accessed per single partition read. SSTables skipped due to +Bloom Filters, min-max key or partition index lookup are not taken into +acoount. + +|ReadLatency |Latency |Local read latency for this table. + +|RangeLatency |Latency |Local range scan latency for this table. + +|WriteLatency |Latency |Local write latency for this table. + +|CoordinatorReadLatency |Timer |Coordinator read latency for this table. + +|CoordinatorWriteLatency |Timer |Coordinator write latency for this +table. + +|CoordinatorScanLatency |Timer |Coordinator range scan latency for this +table. + +|PendingFlushes |Counter |Estimated number of flush tasks pending for +this table. + +|BytesFlushed |Counter |Total number of bytes flushed since server +[re]start. + +|CompactionBytesWritten |Counter |Total number of bytes written by +compaction since server [re]start. + +|PendingCompactions |Gauge |Estimate of number of pending +compactions for this table. + +|LiveSSTableCount |Gauge |Number of SSTables on disk for this +table. + +|LiveDiskSpaceUsed |Counter |Disk space used by SSTables belonging to +this table (in bytes). + +|TotalDiskSpaceUsed |Counter |Total disk space used by SSTables +belonging to this table, including obsolete ones waiting to be GC'd. + +|MinPartitionSize |Gauge |Size of the smallest compacted partition +(in bytes). + +|MaxPartitionSize |Gauge |Size of the largest compacted partition +(in bytes). + +|MeanPartitionSize |Gauge |Size of the average compacted partition +(in bytes). + +|BloomFilterFalsePositives |Gauge |Number of false positives on +table's bloom filter. + +|BloomFilterFalseRatio |Gauge |False positive ratio of table's +bloom filter. + +|BloomFilterDiskSpaceUsed |Gauge |Disk space used by bloom filter +(in bytes). + +|BloomFilterOffHeapMemoryUsed |Gauge |Off-heap memory used by +bloom filter. + +|IndexSummaryOffHeapMemoryUsed |Gauge |Off-heap memory used by +index summary. + +|CompressionMetadataOffHeapMemoryUsed |Gauge |Off-heap memory used +by compression meta data. + +|KeyCacheHitRate |Gauge |Key cache hit rate for this table. + +|TombstoneScannedHistogram |Histogram |Histogram of tombstones scanned +in queries on this table. + +|LiveScannedHistogram |Histogram |Histogram of live cells scanned in +queries on this table. + +|ColUpdateTimeDeltaHistogram |Histogram |Histogram of column update time +delta on this table. + +|ViewLockAcquireTime |Timer |Time taken acquiring a partition lock for +materialized view updates on this table. + +|ViewReadTime |Timer |Time taken during the local read of a materialized +view update. + +|TrueSnapshotsSize |Gauge |Disk space used by snapshots of this +table including all SSTable components. + +|RowCacheHitOutOfRange |Counter |Number of table row cache hits that do +not satisfy the query filter, thus went to disk. + +|RowCacheHit |Counter |Number of table row cache hits. + +|RowCacheMiss |Counter |Number of table row cache misses. + +|CasPrepare |Latency |Latency of paxos prepare round. + +|CasPropose |Latency |Latency of paxos propose round. + +|CasCommit |Latency |Latency of paxos commit round. + +|PercentRepaired |Gauge |Percent of table data that is repaired +on disk. + +|BytesRepaired |Gauge |Size of table data repaired on disk + +|BytesUnrepaired |Gauge |Size of table data unrepaired on disk + +|BytesPendingRepair |Gauge |Size of table data isolated for an +ongoing incremental repair + +|SpeculativeRetries |Counter |Number of times speculative retries were +sent for this table. + +|SpeculativeFailedRetries |Counter |Number of speculative retries that +failed to prevent a timeout + +|SpeculativeInsufficientReplicas |Counter |Number of speculative retries +that couldn't be attempted due to lack of replicas + +|SpeculativeSampleLatencyNanos |Gauge |Number of nanoseconds to +wait before speculation is attempted. Value may be statically configured +or updated periodically based on coordinator latency. + +|WaitingOnFreeMemtableSpace |Histogram |Histogram of time spent waiting +for free memtable space, either on- or off-heap. + +|DroppedMutations |Counter |Number of dropped mutations on this table. + +|AnticompactionTime |Timer |Time spent anticompacting before a +consistent repair. + +|ValidationTime |Timer |Time spent doing validation compaction during +repair. + +|SyncTime |Timer |Time spent doing streaming during repair. + +|BytesValidated |Histogram |Histogram over the amount of bytes read +during validation. + +|PartitionsValidated |Histogram |Histogram over the number of partitions +read during validation. + +|BytesAnticompacted |Counter |How many bytes we anticompacted. + +|BytesMutatedAnticompaction |Counter |How many bytes we avoided +anticompacting because the sstable was fully contained in the repaired +range. + +|MutatedAnticompactionGauge |Gauge |Ratio of bytes mutated vs +total bytes repaired. +|=== + +== Keyspace Metrics + +Each keyspace in Cassandra has metrics responsible for tracking its +state and performance. + +Most of these metrics are the same as the `Table Metrics` above, only +they are aggregated at the Keyspace level. The keyspace specific metrics +are specified in the table below. + +Reported name format: + +*Metric Name*:: + `org.apache.cassandra.metrics.keyspace..` +*JMX MBean*:: + `org.apache.cassandra.metrics:type=Keyspace scope= name=` + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|WriteFailedIdeaCL |Counter |Number of writes that failed to achieve the +configured ideal consistency level or 0 if none is configured + +|IdealCLWriteLatency |Latency |Coordinator latency of writes at the +configured ideal consistency level. No values are recorded if ideal +consistency level is not configured + +|RepairTime |Timer |Total time spent as repair coordinator. + +|RepairPrepareTime |Timer |Total time spent preparing for repair. +|=== + +== ThreadPool Metrics + +Cassandra splits work of a particular type into its own thread pool. +This provides back-pressure and asynchrony for requests on a node. It's +important to monitor the state of these thread pools since they can tell +you how saturated a node is. + +The metric names are all appended with the specific `ThreadPool` name. +The thread pools are also categorized under a specific type. + +Reported name format: + +*Metric Name*:: + `org.apache.cassandra.metrics.ThreadPools...` +*JMX MBean*:: + `org.apache.cassandra.metrics:type=ThreadPools path= scope= name=` + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|ActiveTasks |Gauge |Number of tasks being actively worked on +by this pool. + +|PendingTasks |Gauge |Number of queued tasks queued up on this +pool. + +|CompletedTasks |Counter |Number of tasks completed. + +|TotalBlockedTasks |Counter |Number of tasks that were blocked due to +queue saturation. + +|CurrentlyBlockedTask |Counter |Number of tasks that are currently +blocked due to queue saturation but on retry will become unblocked. + +|MaxPoolSize |Gauge |The maximum number of threads in this +pool. + +|MaxTasksQueued |Gauge |The maximum number of tasks queued +before a task get blocked. +|=== + +The following thread pools can be monitored. + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|Native-Transport-Requests |transport |Handles client CQL requests + +|CounterMutationStage |request |Responsible for counter writes + +|ViewMutationStage |request |Responsible for materialized view writes + +|MutationStage |request |Responsible for all other writes + +|ReadRepairStage |request |ReadRepair happens on this thread pool + +|ReadStage |request |Local reads run on this thread pool + +|RequestResponseStage |request |Coordinator requests to the cluster run +on this thread pool + +|AntiEntropyStage |internal |Builds merkle tree for repairs + +|CacheCleanupExecutor |internal |Cache maintenance performed on this +thread pool + +|CompactionExecutor |internal |Compactions are run on these threads + +|GossipStage |internal |Handles gossip requests + +|HintsDispatcher |internal |Performs hinted handoff + +|InternalResponseStage |internal |Responsible for intra-cluster +callbacks + +|MemtableFlushWriter |internal |Writes memtables to disk + +|MemtablePostFlush |internal |Cleans up commit log after memtable is +written to disk + +|MemtableReclaimMemory |internal |Memtable recycling + +|MigrationStage |internal |Runs schema migrations + +|MiscStage |internal |Misceleneous tasks run here + +|PendingRangeCalculator |internal |Calculates token range + +|PerDiskMemtableFlushWriter_0 |internal |Responsible for writing a spec +(there is one of these per disk 0-N) + +|Sampler |internal |Responsible for re-sampling the index summaries of +SStables + +|SecondaryIndexManagement |internal |Performs updates to secondary +indexes + +|ValidationExecutor |internal |Performs validation compaction or +scrubbing + +|ViewBuildExecutor |internal |Performs materialized views initial build +|=== + +== Client Request Metrics + +Client requests have their own set of metrics that encapsulate the work +happening at coordinator level. + +Different types of client requests are broken down by `RequestType`. + +Reported name format: + +*Metric Name*:: + `org.apache.cassandra.metrics.ClientRequest..` +*JMX MBean*:: + `org.apache.cassandra.metrics:type=ClientRequest scope= name=` + +RequestType:: + CASRead +Description:: + Metrics related to transactional read requests. +Metrics:: + [cols=",,",options="header",] + |=== + |Name |Type |Description + |Timeouts |Counter |Number of timeouts encountered. + + |Failures |Counter |Number of transaction failures encountered. + + |  |Latency |Transaction read latency. + + |Unavailables |Counter |Number of unavailable exceptions encountered. + + |UnfinishedCommit |Counter |Number of transactions that were committed + on read. + + |ConditionNotMet |Counter |Number of transaction preconditions did not + match current values. + + |ContentionHistogram |Histogram |How many contended reads were + encountered + |=== +RequestType:: + CASWrite +Description:: + Metrics related to transactional write requests. +Metrics:: + [cols=",,",options="header",] + |=== + |Name |Type |Description + |Timeouts |Counter |Number of timeouts encountered. + + |Failures |Counter |Number of transaction failures encountered. + + |  |Latency |Transaction write latency. + + |UnfinishedCommit |Counter |Number of transactions that were committed + on write. + + |ConditionNotMet |Counter |Number of transaction preconditions did not + match current values. + + |ContentionHistogram |Histogram |How many contended writes were + encountered + + |MutationSizeHistogram |Histogram |Total size in bytes of the requests + mutations. + |=== +RequestType:: + Read +Description:: + Metrics related to standard read requests. +Metrics:: + [cols=",,",options="header",] + |=== + |Name |Type |Description + |Timeouts |Counter |Number of timeouts encountered. + |Failures |Counter |Number of read failures encountered. + |  |Latency |Read latency. + |Unavailables |Counter |Number of unavailable exceptions encountered. + |=== +RequestType:: + RangeSlice +Description:: + Metrics related to token range read requests. +Metrics:: + [cols=",,",options="header",] + |=== + |Name |Type |Description + |Timeouts |Counter |Number of timeouts encountered. + |Failures |Counter |Number of range query failures encountered. + |  |Latency |Range query latency. + |Unavailables |Counter |Number of unavailable exceptions encountered. + |=== +RequestType:: + Write +Description:: + Metrics related to regular write requests. +Metrics:: + [cols=",,",options="header",] + |=== + |Name |Type |Description + |Timeouts |Counter |Number of timeouts encountered. + + |Failures |Counter |Number of write failures encountered. + + |  |Latency |Write latency. + + |Unavailables |Counter |Number of unavailable exceptions encountered. + + |MutationSizeHistogram |Histogram |Total size in bytes of the requests + mutations. + |=== +RequestType:: + ViewWrite +Description:: + Metrics related to materialized view write wrtes. +Metrics:: + [cols=",,",] + |=== + |Timeouts |Counter |Number of timeouts encountered. + + |Failures |Counter |Number of transaction failures encountered. + + |Unavailables |Counter |Number of unavailable exceptions encountered. + + |ViewReplicasAttempted |Counter |Total number of attempted view + replica writes. + + |ViewReplicasSuccess |Counter |Total number of succeded view replica + writes. + + |ViewPendingMutations |Gauge |ViewReplicasAttempted - + ViewReplicasSuccess. + + |ViewWriteLatency |Timer |Time between when mutation is applied to + base table and when CL.ONE is achieved on view. + |=== + +== Cache Metrics + +Cassandra caches have metrics to track the effectivness of the caches. +Though the `Table Metrics` might be more useful. + +Reported name format: + +*Metric Name*:: + `org.apache.cassandra.metrics.Cache..` +*JMX MBean*:: + `org.apache.cassandra.metrics:type=Cache scope= name=` + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|Capacity |Gauge |Cache capacity in bytes. +|Entries |Gauge |Total number of cache entries. +|FifteenMinuteCacheHitRate |Gauge |15m cache hit rate. +|FiveMinuteCacheHitRate |Gauge |5m cache hit rate. +|OneMinuteCacheHitRate |Gauge |1m cache hit rate. +|HitRate |Gauge |All time cache hit rate. +|Hits |Meter |Total number of cache hits. +|Misses |Meter |Total number of cache misses. +|MissLatency |Timer |Latency of misses. +|Requests |Gauge |Total number of cache requests. +|Size |Gauge |Total size of occupied cache, in bytes. +|=== + +The following caches are covered: + +[cols=",",options="header",] +|=== +|Name |Description +|CounterCache |Keeps hot counters in memory for performance. +|ChunkCache |In process uncompressed page cache. +|KeyCache |Cache for partition to sstable offsets. +|RowCache |Cache for rows kept in memory. +|=== + +[NOTE] +.Note +==== +Misses and MissLatency are only defined for the ChunkCache +====== CQL Metrics + +Metrics specific to CQL prepared statement caching. + +Reported name format: + +*Metric Name*:: + `org.apache.cassandra.metrics.CQL.` +*JMX MBean*:: + `org.apache.cassandra.metrics:type=CQL name=` + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|PreparedStatementsCount |Gauge |Number of cached prepared +statements. + +|PreparedStatementsEvicted |Counter |Number of prepared statements +evicted from the prepared statement cache + +|PreparedStatementsExecuted |Counter |Number of prepared statements +executed. + +|RegularStatementsExecuted |Counter |Number of *non* prepared statements +executed. + +|PreparedStatementsRatio |Gauge |Percentage of statements that +are prepared vs unprepared. +|=== + +[[dropped-metrics]] +== DroppedMessage Metrics + +Metrics specific to tracking dropped messages for different types of +requests. Dropped writes are stored and retried by `Hinted Handoff` + +Reported name format: + +*Metric Name*:: + `org.apache.cassandra.metrics.DroppedMessage..` +*JMX MBean*:: + `org.apache.cassandra.metrics:type=DroppedMessage scope= name=` + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|CrossNodeDroppedLatency |Timer |The dropped latency across nodes. +|InternalDroppedLatency |Timer |The dropped latency within node. +|Dropped |Meter |Number of dropped messages. +|=== + +The different types of messages tracked are: + +[cols=",",options="header",] +|=== +|Name |Description +|BATCH_STORE |Batchlog write +|BATCH_REMOVE |Batchlog cleanup (after succesfully applied) +|COUNTER_MUTATION |Counter writes +|HINT |Hint replay +|MUTATION |Regular writes +|READ |Regular reads +|READ_REPAIR |Read repair +|PAGED_SLICE |Paged read +|RANGE_SLICE |Token range read +|REQUEST_RESPONSE |RPC Callbacks +|_TRACE |Tracing writes +|=== + +== Streaming Metrics + +Metrics reported during `Streaming` operations, such as repair, +bootstrap, rebuild. + +These metrics are specific to a peer endpoint, with the source node +being the node you are pulling the metrics from. + +Reported name format: + +*Metric Name*:: + `org.apache.cassandra.metrics.Streaming..` +*JMX MBean*:: + `org.apache.cassandra.metrics:type=Streaming scope= name=` + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|IncomingBytes |Counter |Number of bytes streamed to this node from the +peer. + +|OutgoingBytes |Counter |Number of bytes streamed to the peer endpoint +from this node. +|=== + +== Compaction Metrics + +Metrics specific to `Compaction` work. + +Reported name format: + +*Metric Name*:: + `org.apache.cassandra.metrics.Compaction.` +*JMX MBean*:: + `org.apache.cassandra.metrics:type=Compaction name=` + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|BytesCompacted |Counter |Total number of bytes compacted since server +[re]start. + +|PendingTasks |Gauge |Estimated number of compactions remaining +to perform. + +|CompletedTasks |Gauge |Number of completed compactions since +server [re]start. + +|TotalCompactionsCompleted |Meter |Throughput of completed compactions +since server [re]start. + +|PendingTasksByTableName |Gauge>> +|Estimated number of compactions remaining to perform, grouped by +keyspace and then table name. This info is also kept in `Table Metrics`. +|=== + +== CommitLog Metrics + +Metrics specific to the `CommitLog` + +Reported name format: + +*Metric Name*:: + `org.apache.cassandra.metrics.CommitLog.` +*JMX MBean*:: + `org.apache.cassandra.metrics:type=CommitLog name=` + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|CompletedTasks |Gauge |Total number of commit log messages +written since [re]start. + +|PendingTasks |Gauge |Number of commit log messages written but +yet to be fsync'd. + +|TotalCommitLogSize |Gauge |Current size, in bytes, used by all +the commit log segments. + +|WaitingOnSegmentAllocation |Timer |Time spent waiting for a +CommitLogSegment to be allocated - under normal conditions this should +be zero. + +|WaitingOnCommit |Timer |The time spent waiting on CL fsync; for +Periodic this is only occurs when the sync is lagging its sync interval. +|=== + +== Storage Metrics + +Metrics specific to the storage engine. + +Reported name format: + +*Metric Name*:: + `org.apache.cassandra.metrics.Storage.` +*JMX MBean*:: + `org.apache.cassandra.metrics:type=Storage name=` + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|Exceptions |Counter |Number of internal exceptions caught. Under normal +exceptions this should be zero. + +|Load |Counter |Size, in bytes, of the on disk data size this node +manages. + +|TotalHints |Counter |Number of hint messages written to this node since +[re]start. Includes one entry for each host to be hinted per hint. + +|TotalHintsInProgress |Counter |Number of hints attemping to be sent +currently. +|=== + +[[handoff-metrics]] +== HintedHandoff Metrics + +Metrics specific to Hinted Handoff. There are also some metrics related +to hints tracked in `Storage Metrics` + +These metrics include the peer endpoint *in the metric name* + +Reported name format: + +*Metric Name*:: + `org.apache.cassandra.metrics.HintedHandOffManager.` +*JMX MBean*:: + `org.apache.cassandra.metrics:type=HintedHandOffManager name=` + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|Hints_created- a| +____ +Counter +____ + +a| +____ +Number of hints on disk for this peer. +____ + +|Hints_not_stored- a| +____ +Counter +____ + +a| +____ +Number of hints not stored for this peer, due to being down past the +configured hint window. +____ + +|=== + +== HintsService Metrics + +Metrics specific to the Hints delivery service. There are also some +metrics related to hints tracked in `Storage Metrics` + +These metrics include the peer endpoint *in the metric name* + +Reported name format: + +*Metric Name*:: + `org.apache.cassandra.metrics.HintsService.` +*JMX MBean*:: + `org.apache.cassandra.metrics:type=HintsService name=` + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|HintsSucceeded a| +____ +Meter +____ + +a| +____ +A meter of the hints successfully delivered +____ + +|HintsFailed a| +____ +Meter +____ + +a| +____ +A meter of the hints that failed deliver +____ + +|HintsTimedOut a| +____ +Meter +____ + +a| +____ +A meter of the hints that timed out +____ + +|Hint_delays |Histogram |Histogram of hint delivery delays (in +milliseconds) + +|Hint_delays- |Histogram |Histogram of hint delivery delays (in +milliseconds) per peer +|=== + +== SSTable Index Metrics + +Metrics specific to the SSTable index metadata. + +Reported name format: + +*Metric Name*:: + `org.apache.cassandra.metrics.Index..RowIndexEntry` +*JMX MBean*:: + `org.apache.cassandra.metrics:type=Index scope=RowIndexEntry name=` + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|IndexedEntrySize |Histogram |Histogram of the on-heap size, in bytes, +of the index across all SSTables. + +|IndexInfoCount |Histogram |Histogram of the number of on-heap index +entries managed across all SSTables. + +|IndexInfoGets |Histogram |Histogram of the number index seeks performed +per SSTable. +|=== + +== BufferPool Metrics + +Metrics specific to the internal recycled buffer pool Cassandra manages. +This pool is meant to keep allocations and GC lower by recycling on and +off heap buffers. + +Reported name format: + +*Metric Name*:: + `org.apache.cassandra.metrics.BufferPool.` +*JMX MBean*:: + `org.apache.cassandra.metrics:type=BufferPool name=` + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|Size |Gauge |Size, in bytes, of the managed buffer pool + +|Misses |Meter a| +____ +The rate of misses in the pool. The higher this is the more allocations +incurred. +____ + +|=== + +== Client Metrics + +Metrics specifc to client managment. + +Reported name format: + +*Metric Name*:: + `org.apache.cassandra.metrics.Client.` +*JMX MBean*:: + `org.apache.cassandra.metrics:type=Client name=` + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|connectedNativeClients |Gauge |Number of clients connected to +this nodes native protocol server + +|connections |Gauge> |List of all connections +and their state information + +|connectedNativeClientsByUser |Gauge |Number of +connnective native clients by username +|=== + +== Batch Metrics + +Metrics specifc to batch statements. + +Reported name format: + +*Metric Name*:: + `org.apache.cassandra.metrics.Batch.` +*JMX MBean*:: + `org.apache.cassandra.metrics:type=Batch name=` + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|PartitionsPerCounterBatch |Histogram |Distribution of the number of +partitions processed per counter batch + +|PartitionsPerLoggedBatch |Histogram |Distribution of the number of +partitions processed per logged batch + +|PartitionsPerUnloggedBatch |Histogram |Distribution of the number of +partitions processed per unlogged batch +|=== + +== JVM Metrics + +JVM metrics such as memory and garbage collection statistics can either +be accessed by connecting to the JVM using JMX or can be exported using +link:#metric-reporters[Metric Reporters]. + +=== BufferPool + +*Metric Name*:: + `jvm.buffers..` +*JMX MBean*:: + `java.nio:type=BufferPool name=` + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|Capacity |Gauge |Estimated total capacity of the buffers in this +pool + +|Count |Gauge |Estimated number of buffers in the pool + +|Used |Gauge |Estimated memory that the Java virtual machine is +using for this buffer pool +|=== + +=== FileDescriptorRatio + +*Metric Name*:: + `jvm.fd.` +*JMX MBean*:: + `java.lang:type=OperatingSystem name=` + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|Usage |Ratio |Ratio of used to total file descriptors +|=== + +=== GarbageCollector + +*Metric Name*:: + `jvm.gc..` +*JMX MBean*:: + `java.lang:type=GarbageCollector name=` + +[cols=",,",options="header",] +|=== +|Name |Type |Description +|Count |Gauge |Total number of collections that have occurred + +|Time |Gauge |Approximate accumulated collection elapsed time in +milliseconds +|=== + +=== Memory + +*Metric Name*:: + `jvm.memory..` +*JMX MBean*:: + `java.lang:type=Memory` + +[cols=",,",] +|=== +|Committed |Gauge |Amount of memory in bytes that is committed for +the JVM to use + +|Init |Gauge |Amount of memory in bytes that the JVM initially +requests from the OS + +|Max |Gauge |Maximum amount of memory in bytes that can be used +for memory management + +|Usage |Ratio |Ratio of used to maximum memory + +|Used |Gauge |Amount of used memory in bytes +|=== + +=== MemoryPool + +*Metric Name*:: + `jvm.memory.pools..` +*JMX MBean*:: + `java.lang:type=MemoryPool name=` + +[cols=",,",] +|=== +|Committed |Gauge |Amount of memory in bytes that is committed for +the JVM to use + +|Init |Gauge |Amount of memory in bytes that the JVM initially +requests from the OS + +|Max |Gauge |Maximum amount of memory in bytes that can be used +for memory management + +|Usage |Ratio |Ratio of used to maximum memory + +|Used |Gauge |Amount of used memory in bytes +|=== + +== JMX + +Any JMX based client can access metrics from cassandra. + +If you wish to access JMX metrics over http it's possible to download +http://mx4j.sourceforge.net/[Mx4jTool] and place `mx4j-tools.jar` into +the classpath. On startup you will see in the log: + +[source,none] +---- +HttpAdaptor version 3.0.2 started on port 8081 +---- + +To choose a different port (8081 is the default) or a different listen +address (0.0.0.0 is not the default) edit `conf/cassandra-env.sh` and +uncomment: + +[source,none] +---- +#MX4J_ADDRESS="-Dmx4jaddress=0.0.0.0" + +#MX4J_PORT="-Dmx4jport=8081" +---- + +== Metric Reporters + +As mentioned at the top of this section on monitoring the Cassandra +metrics can be exported to a number of monitoring system a number of +http://metrics.dropwizard.io/3.1.0/getting-started/#other-reporting[built +in] and http://metrics.dropwizard.io/3.1.0/manual/third-party/[third +party] reporter plugins. + +The configuration of these plugins is managed by the +https://github.com/addthis/metrics-reporter-config[metrics reporter +config project]. There is a sample configuration file located at +`conf/metrics-reporter-config-sample.yaml`. + +Once configured, you simply start cassandra with the flag +`-Dcassandra.metricsReporterConfigFile=metrics-reporter-config.yaml`. +The specified .yaml file plus any 3rd party reporter jars must all be in +Cassandra's classpath. diff --git a/doc/modules/cassandra/pages/operating/read_repair.adoc b/doc/modules/cassandra/pages/operating/read_repair.adoc new file mode 100644 index 000000000000..763a38e30d72 --- /dev/null +++ b/doc/modules/cassandra/pages/operating/read_repair.adoc @@ -0,0 +1,264 @@ += Read repair + +Read Repair is the process of repairing data replicas during a read +request. If all replicas involved in a read request at the given read +consistency level are consistent the data is returned to the client and +no read repair is needed. But if the replicas involved in a read request +at the given consistency level are not consistent a read repair is +performed to make replicas involved in the read request consistent. The +most up-to-date data is returned to the client. The read repair runs in +the foreground and is blocking in that a response is not returned to the +client until the read repair has completed and up-to-date data is +constructed. + +== Expectation of Monotonic Quorum Reads + +Cassandra uses a blocking read repair to ensure the expectation of +"monotonic quorum reads" i.e. that in 2 successive quorum reads, it’s +guaranteed the 2nd one won't get something older than the 1st one, and +this even if a failed quorum write made a write of the most up to date +value only to a minority of replicas. "Quorum" means majority of nodes +among replicas. + +== Table level configuration of monotonic reads + +Cassandra 4.0 adds support for table level configuration of monotonic +reads +(https://issues.apache.org/jira/browse/CASSANDRA-14635[CASSANDRA-14635]). +The `read_repair` table option has been added to table schema, with the +options `blocking` (default), and `none`. + +The `read_repair` option configures the read repair behavior to allow +tuning for various performance and consistency behaviors. Two +consistency properties are affected by read repair behavior. + +* Monotonic Quorum Reads: Provided by `BLOCKING`. Monotonic quorum reads +prevents reads from appearing to go back in time in some circumstances. +When monotonic quorum reads are not provided and a write fails to reach +a quorum of replicas, it may be visible in one read, and then disappear +in a subsequent read. +* Write Atomicity: Provided by `NONE`. Write atomicity prevents reads +from returning partially applied writes. Cassandra attempts to provide +partition level write atomicity, but since only the data covered by a +`SELECT` statement is repaired by a read repair, read repair can break +write atomicity when data is read at a more granular level than it is +written. For example read repair can break write atomicity if you write +multiple rows to a clustered partition in a batch, but then select a +single row by specifying the clustering column in a `SELECT` statement. + +The available read repair settings are: + +=== Blocking + +The default setting. When `read_repair` is set to `BLOCKING`, and a read +repair is started, the read will block on writes sent to other replicas +until the CL is reached by the writes. Provides monotonic quorum reads, +but not partition level write atomicity. + +=== None + +When `read_repair` is set to `NONE`, the coordinator will reconcile any +differences between replicas, but will not attempt to repair them. +Provides partition level write atomicity, but not monotonic quorum +reads. + +An example of using the `NONE` setting for the `read_repair` option is +as follows: + +[source,none] +---- +CREATE TABLE ks.tbl (k INT, c INT, v INT, PRIMARY KEY (k,c)) with read_repair='NONE'"); +---- + +== Read Repair Example + +To illustrate read repair with an example, consider that a client sends +a read request with read consistency level `TWO` to a 5-node cluster as +illustrated in Figure 1. Read consistency level determines how many +replica nodes must return a response before the read request is +considered successful. + +image::Figure_1_read_repair.jpg[image] + +Figure 1. Client sends read request to a 5-node Cluster + +Three nodes host replicas for the requested data as illustrated in +Figure 2. With a read consistency level of `TWO` two replica nodes must +return a response for the read request to be considered successful. If +the node the client sends request to hosts a replica of the data +requested only one other replica node needs to be sent a read request +to. But if the receiving node does not host a replica for the requested +data the node becomes a coordinator node and forwards the read request +to a node that hosts a replica. A direct read request is forwarded to +the fastest node (as determined by dynamic snitch) as shown in Figure 2. +A direct read request is a full read and returns the requested data. + +image::Figure_2_read_repair.jpg[image] + +Figure 2. Direct Read Request sent to Fastest Replica Node + +Next, the coordinator node sends the requisite number of additional +requests to satisfy the consistency level, which is `TWO`. The +coordinator node needs to send one more read request for a total of two. +All read requests additional to the first direct read request are digest +read requests. A digest read request is not a full read and only returns +the hash value of the data. Only a hash value is returned to reduce the +network data traffic. In the example being discussed the coordinator +node sends one digest read request to a node hosting a replica as +illustrated in Figure 3. + +image::Figure_3_read_repair.jpg[image] + +Figure 3. Coordinator Sends a Digest Read Request + +The coordinator node has received a full copy of data from one node and +a hash value for the data from another node. To compare the data +returned a hash value is calculated for the full copy of data. The two +hash values are compared. If the hash values are the same no read repair +is needed and the full copy of requested data is returned to the client. +The coordinator node only performed a total of two replica read request +because the read consistency level is `TWO` in the example. If the +consistency level were higher such as `THREE`, three replica nodes would +need to respond to a read request and only if all digest or hash values +were to match with the hash value of the full copy of data would the +read request be considered successful and the data returned to the +client. + +But, if the hash value/s from the digest read request/s are not the same +as the hash value of the data from the full read request of the first +replica node it implies that an inconsistency in the replicas exists. To +fix the inconsistency a read repair is performed. + +For example, consider that that digest request returns a hash value that +is not the same as the hash value of the data from the direct full read +request. We would need to make the replicas consistent for which the +coordinator node sends a direct (full) read request to the replica node +that it sent a digest read request to earlier as illustrated in Figure +4. + +image::Figure_4_read_repair.jpg[image] + +Figure 4. Coordinator sends Direct Read Request to Replica Node it had +sent Digest Read Request to + +After receiving the data from the second replica node the coordinator +has data from two of the replica nodes. It only needs two replicas as +the read consistency level is `TWO` in the example. Data from the two +replicas is compared and based on the timestamps the most recent replica +is selected. Data may need to be merged to construct an up-to-date copy +of data if one replica has data for only some of the columns. In the +example, if the data from the first direct read request is found to be +outdated and the data from the second full read request to be the latest +read, repair needs to be performed on Replica 2. If a new up-to-date +data is constructed by merging the two replicas a read repair would be +needed on both the replicas involved. For example, a read repair is +performed on Replica 2 as illustrated in Figure 5. + +image::Figure_5_read_repair.jpg[image] + +Figure 5. Coordinator performs Read Repair + +The most up-to-date data is returned to the client as illustrated in +Figure 6. From the three replicas Replica 1 is not even read and thus +not repaired. Replica 2 is repaired. Replica 3 is the most up-to-date +and returned to client. + +image::Figure_6_read_repair.jpg[image] + +Figure 6. Most up-to-date Data returned to Client + +== Read Consistency Level and Read Repair + +The read consistency is most significant in determining if a read repair +needs to be performed. As discussed in Table 1 a read repair is not +needed for all of the consistency levels. + +Table 1. Read Repair based on Read Consistency Level + +[width="93%",cols="35%,65%",] +|=== +|Read Consistency Level |Description + +|ONE |Read repair is not performed as the data from the first direct +read request satisfies the consistency level ONE. No digest read +requests are involved for finding mismatches in data. + +|TWO |Read repair is performed if inconsistencies in data are found as +determined by the direct and digest read requests. + +|THREE |Read repair is performed if inconsistencies in data are found as +determined by the direct and digest read requests. + +|LOCAL_ONE |Read repair is not performed as the data from the direct +read request from the closest replica satisfies the consistency level +LOCAL_ONE.No digest read requests are involved for finding mismatches in +data. + +|LOCAL_QUORUM |Read repair is performed if inconsistencies in data are +found as determined by the direct and digest read requests. + +|QUORUM |Read repair is performed if inconsistencies in data are found +as determined by the direct and digest read requests. +|=== + +If read repair is performed it is made only on the replicas that are not +up-to-date and that are involved in the read request. The number of +replicas involved in a read request would be based on the read +consistency level; in the example it is two. + +== Improved Read Repair Blocking Behavior in Cassandra 4.0 + +Cassandra 4.0 makes two improvements to read repair blocking behavior +(https://issues.apache.org/jira/browse/CASSANDRA-10726[CASSANDRA-10726]). + +[arabic] +. Speculative Retry of Full Data Read Requests. Cassandra 4.0 makes use +of speculative retry in sending read requests (full, not digest) to +replicas if a full data response is not received, whether in the initial +full read request or a full data read request during read repair. With +speculative retry if it looks like a response may not be received from +the initial set of replicas Cassandra sent messages to, to satisfy the +consistency level, it speculatively sends additional read request to +un-contacted replica/s. Cassandra 4.0 will also speculatively send a +repair mutation to a minority of nodes not involved in the read repair +data read / write cycle with the combined contents of all +un-acknowledged mutations if it looks like one may not respond. +Cassandra accepts acks from them in lieu of acks from the initial +mutations sent out, so long as it receives the same number of acks as +repair mutations transmitted. +. Only blocks on Full Data Responses to satisfy the Consistency Level. +Cassandra 4.0 only blocks for what is needed for resolving the digest +mismatch and wait for enough full data responses to meet the consistency +level, no matter whether it’s speculative retry or read repair chance. +As an example, if it looks like Cassandra might not receive full data +requests from everyone in time, it sends additional requests to +additional replicas not contacted in the initial full data read. If the +collection of nodes that end up responding in time end up agreeing on +the data, the response from the disagreeing replica that started the +read repair is not considered, and won't be included in the response to +the client, preserving the expectation of monotonic quorum reads. + +== Diagnostic Events for Read Repairs + +Cassandra 4.0 adds diagnostic events for read repair +(https://issues.apache.org/jira/browse/CASSANDRA-14668[CASSANDRA-14668]) +that can be used for exposing information such as: + +* Contacted endpoints +* Digest responses by endpoint +* Affected partition keys +* Speculated reads / writes +* Update oversized + +== Background Read Repair + +Background read repair, which was configured using `read_repair_chance` +and `dclocal_read_repair_chance` settings in `cassandra.yaml` is removed +Cassandra 4.0 +(https://issues.apache.org/jira/browse/CASSANDRA-13910[CASSANDRA-13910]). + +Read repair is not an alternative for other kind of repairs such as full +repairs or replacing a node that keeps failing. The data returned even +after a read repair has been performed may not be the most up-to-date +data if consistency level is other than one requiring response from all +replicas. diff --git a/doc/modules/cassandra/pages/operating/repair.adoc b/doc/modules/cassandra/pages/operating/repair.adoc new file mode 100644 index 000000000000..3c6b20d3a995 --- /dev/null +++ b/doc/modules/cassandra/pages/operating/repair.adoc @@ -0,0 +1,222 @@ += Repair + +Cassandra is designed to remain available if one of it's nodes is down +or unreachable. However, when a node is down or unreachable, it needs to +eventually discover the writes it missed. Hints attempt to inform a node +of missed writes, but are a best effort, and aren't guaranteed to inform +a node of 100% of the writes it missed. These inconsistencies can +eventually result in data loss as nodes are replaced or tombstones +expire. + +These inconsistencies are fixed with the repair process. Repair +synchronizes the data between nodes by comparing their respective +datasets for their common token ranges, and streaming the differences +for any out of sync sections between the nodes. It compares the data +with merkle trees, which are a hierarchy of hashes. + +== Incremental and Full Repairs + +There are 2 types of repairs: full repairs, and incremental repairs. +Full repairs operate over all of the data in the token range being +repaired. Incremental repairs only repair data that's been written since +the previous incremental repair. + +Incremental repairs are the default repair type, and if run regularly, +can significantly reduce the time and io cost of performing a repair. +However, it's important to understand that once an incremental repair +marks data as repaired, it won't try to repair it again. This is fine +for syncing up missed writes, but it doesn't protect against things like +disk corruption, data loss by operator error, or bugs in Cassandra. For +this reason, full repairs should still be run occasionally. + +== Usage and Best Practices + +Since repair can result in a lot of disk and network io, it's not run +automatically by Cassandra. It is run by the operator via nodetool. + +Incremental repair is the default and is run with the following command: + +[source,none] +---- +nodetool repair +---- + +A full repair can be run with the following command: + +[source,none] +---- +nodetool repair --full +---- + +Additionally, repair can be run on a single keyspace: + +[source,none] +---- +nodetool repair [options] +---- + +Or even on specific tables: + +[source,none] +---- +nodetool repair [options] +---- + +The repair command only repairs token ranges on the node being repaired, +it doesn't repair the whole cluster. By default, repair will operate on +all token ranges replicated by the node you're running repair on, which +will cause duplicate work if you run it on every node. The `-pr` flag +will only repair the "primary" ranges on a node, so you can repair your +entire cluster by running `nodetool repair -pr` on each node in a single +datacenter. + +The specific frequency of repair that's right for your cluster, of +course, depends on several factors. However, if you're just starting out +and looking for somewhere to start, running an incremental repair every +1-3 days, and a full repair every 1-3 weeks is probably reasonable. If +you don't want to run incremental repairs, a full repair every 5 days is +a good place to start. + +At a minimum, repair should be run often enough that the gc grace period +never expires on unrepaired data. Otherwise, deleted data could +reappear. With a default gc grace period of 10 days, repairing every +node in your cluster at least once every 7 days will prevent this, while +providing enough slack to allow for delays. + +== Other Options + +`-pr, --partitioner-range`:: + Restricts repair to the 'primary' token ranges of the node being + repaired. A primary range is just a token range for which a node is + the first replica in the ring. +`-prv, --preview`:: + Estimates the amount of streaming that would occur for the given + repair command. This builds the merkle trees, and prints the expected + streaming activity, but does not actually do any streaming. By + default, incremental repairs are estimated, add the `--full` flag to + estimate a full repair. +`-vd, --validate`:: + Verifies that the repaired data is the same across all nodes. Similiar + to `--preview`, this builds and compares merkle trees of repaired + data, but doesn't do any streaming. This is useful for + troubleshooting. If this shows that the repaired data is out of sync, + a full repair should be run. + +`nodetool repair docs ` + +== Full Repair Example + +Full repair is typically needed to redistribute data after increasing +the replication factor of a keyspace or after adding a node to the +cluster. Full repair involves streaming SSTables. To demonstrate full +repair start with a three node cluster. + +[source,none] +---- +[ec2-user@ip-10-0-2-238 ~]$ nodetool status +Datacenter: us-east-1 +===================== +Status=Up/Down +|/ State=Normal/Leaving/Joining/Moving +-- Address Load Tokens Owns Host ID Rack +UN 10.0.1.115 547 KiB 256 ? b64cb32a-b32a-46b4-9eeb-e123fa8fc287 us-east-1b +UN 10.0.3.206 617.91 KiB 256 ? 74863177-684b-45f4-99f7-d1006625dc9e us-east-1d +UN 10.0.2.238 670.26 KiB 256 ? 4dcdadd2-41f9-4f34-9892-1f20868b27c7 us-east-1c +---- + +Create a keyspace with replication factor 3: + +[source,none] +---- +cqlsh> DROP KEYSPACE cqlkeyspace; +cqlsh> CREATE KEYSPACE CQLKeyspace + ... WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 3}; +---- + +Add a table to the keyspace: + +[source,none] +---- +cqlsh> use cqlkeyspace; +cqlsh:cqlkeyspace> CREATE TABLE t ( + ... id int, + ... k int, + ... v text, + ... PRIMARY KEY (id) + ... ); +---- + +Add table data: + +[source,none] +---- +cqlsh:cqlkeyspace> INSERT INTO t (id, k, v) VALUES (0, 0, 'val0'); +cqlsh:cqlkeyspace> INSERT INTO t (id, k, v) VALUES (1, 1, 'val1'); +cqlsh:cqlkeyspace> INSERT INTO t (id, k, v) VALUES (2, 2, 'val2'); +---- + +A query lists the data added: + +[source,none] +---- +cqlsh:cqlkeyspace> SELECT * FROM t; + +id | k | v +----+---+------ + 1 | 1 | val1 + 0 | 0 | val0 + 2 | 2 | val2 +(3 rows) +---- + +Make the following changes to a three node cluster: + +[arabic] +. Increase the replication factor from 3 to 4. +. Add a 4th node to the cluster + +When the replication factor is increased the following message gets +output indicating that a full repair is needed as per +(https://issues.apache.org/jira/browse/CASSANDRA-13079[CASSANDRA-13079]): + +[source,none] +---- +cqlsh:cqlkeyspace> ALTER KEYSPACE CQLKeyspace + ... WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 4}; +Warnings : +When increasing replication factor you need to run a full (-full) repair to distribute the +data. +---- + +Perform a full repair on the keyspace `cqlkeyspace` table `t` with +following command: + +[source,none] +---- +nodetool repair -full cqlkeyspace t +---- + +Full repair completes in about a second as indicated by the output: + +[source,none] +---- +[ec2-user@ip-10-0-2-238 ~]$ nodetool repair -full cqlkeyspace t +[2019-08-17 03:06:21,445] Starting repair command #1 (fd576da0-c09b-11e9-b00c-1520e8c38f00), repairing keyspace cqlkeyspace with repair options (parallelism: parallel, primary range: false, incremental: false, job threads: 1, ColumnFamilies: [t], dataCenters: [], hosts: [], previewKind: NONE, # of ranges: 1024, pull repair: false, force repair: false, optimise streams: false) +[2019-08-17 03:06:23,059] Repair session fd8e5c20-c09b-11e9-b00c-1520e8c38f00 for range [(-8792657144775336505,-8786320730900698730], (-5454146041421260303,-5439402053041523135], (4288357893651763201,4324309707046452322], ... , (4350676211955643098,4351706629422088296]] finished (progress: 0%) +[2019-08-17 03:06:23,077] Repair completed successfully +[2019-08-17 03:06:23,077] Repair command #1 finished in 1 second +[ec2-user@ip-10-0-2-238 ~]$ +---- + +The `nodetool tpstats` command should list a repair having been +completed as `Repair-Task` > `Completed` column value of 1: + +[source,none] +---- +[ec2-user@ip-10-0-2-238 ~]$ nodetool tpstats +Pool Name Active Pending Completed Blocked All time blocked +ReadStage 0 0 99 0 0 +… +Repair-Task 0 0 1 0 0 +RequestResponseStage 0 0 2078 0 0 +---- diff --git a/doc/modules/cassandra/pages/operating/security.adoc b/doc/modules/cassandra/pages/operating/security.adoc new file mode 100644 index 000000000000..a74c0427b0c5 --- /dev/null +++ b/doc/modules/cassandra/pages/operating/security.adoc @@ -0,0 +1,527 @@ += Security + +There are three main components to the security features provided by +Cassandra: + +* TLS/SSL encryption for client and inter-node communication +* Client authentication +* Authorization + +By default, these features are disabled as Cassandra is configured to +easily find and be found by other members of a cluster. In other words, +an out-of-the-box Cassandra installation presents a large attack surface +for a bad actor. Enabling authentication for clients using the binary +protocol is not sufficient to protect a cluster. Malicious users able to +access internode communication and JMX ports can still: + +* Craft internode messages to insert users into authentication schema +* Craft internode messages to truncate or drop schema +* Use tools such as `sstableloader` to overwrite `system_auth` tables +* Attach to the cluster directly to capture write traffic + +Correct configuration of all three security components should negate +theses vectors. Therefore, understanding Cassandra's security features +is crucial to configuring your cluster to meet your security needs. + +== TLS/SSL Encryption + +Cassandra provides secure communication between a client machine and a +database cluster and between nodes within a cluster. Enabling encryption +ensures that data in flight is not compromised and is transferred +securely. The options for client-to-node and node-to-node encryption are +managed separately and may be configured independently. + +In both cases, the JVM defaults for supported protocols and cipher +suites are used when encryption is enabled. These can be overidden using +the settings in `cassandra.yaml`, but this is not recommended unless +there are policies in place which dictate certain settings or a need to +disable vulnerable ciphers or protocols in cases where the JVM cannot be +updated. + +FIPS compliant settings can be configured at the JVM level and should +not involve changing encryption settings in cassandra.yaml. See +https://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/FIPS.html[the +java document on FIPS] for more details. + +For information on generating the keystore and truststore files used in +SSL communications, see the +http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore[java +documentation on creating keystores] + +== SSL Certificate Hot Reloading + +Beginning with Cassandra 4, Cassandra supports hot reloading of SSL +Certificates. If SSL/TLS support is enabled in Cassandra, the node +periodically polls the Trust and Key Stores specified in cassandra.yaml. +When the files are updated, Cassandra will reload them and use them for +subsequent connections. Please note that the Trust & Key Store passwords +are part of the yaml so the updated files should also use the same +passwords. The default polling interval is 10 minutes. + +Certificate Hot reloading may also be triggered using the +`nodetool reloadssl` command. Use this if you want to Cassandra to +immediately notice the changed certificates. + +=== Inter-node Encryption + +The settings for managing inter-node encryption are found in +`cassandra.yaml` in the `server_encryption_options` section. To enable +inter-node encryption, change the `internode_encryption` setting from +its default value of `none` to one value from: `rack`, `dc` or `all`. + +=== Client to Node Encryption + +The settings for managing client to node encryption are found in +`cassandra.yaml` in the `client_encryption_options` section. There are +two primary toggles here for enabling encryption, `enabled` and +`optional`. + +* If neither is set to `true`, client connections are entirely +unencrypted. +* If `enabled` is set to `true` and `optional` is set to `false`, all +client connections must be secured. +* If both options are set to `true`, both encrypted and unencrypted +connections are supported using the same port. Client connections using +encryption with this configuration will be automatically detected and +handled by the server. + +As an alternative to the `optional` setting, separate ports can also be +configured for secure and unsecure connections where operational +requirements demand it. To do so, set `optional` to false and use the +`native_transport_port_ssl` setting in `cassandra.yaml` to specify the +port to be used for secure client communication. + +[[operation-roles]] +== Roles + +Cassandra uses database roles, which may represent either a single user +or a group of users, in both authentication and permissions management. +Role management is an extension point in Cassandra and may be configured +using the `role_manager` setting in `cassandra.yaml`. The default +setting uses `CassandraRoleManager`, an implementation which stores role +information in the tables of the `system_auth` keyspace. + +See also the xref:cql/security.adoc#database-roles[`CQL documentation on roles`]. + +== Authentication + +Authentication is pluggable in Cassandra and is configured using the +`authenticator` setting in `cassandra.yaml`. Cassandra ships with two +options included in the default distribution. + +By default, Cassandra is configured with `AllowAllAuthenticator` which +performs no authentication checks and therefore requires no credentials. +It is used to disable authentication completely. Note that +authentication is a necessary condition of Cassandra's permissions +subsystem, so if authentication is disabled, effectively so are +permissions. + +The default distribution also includes `PasswordAuthenticator`, which +stores encrypted credentials in a system table. This can be used to +enable simple username/password authentication. + +[[password-authentication]] +=== Enabling Password Authentication + +Before enabling client authentication on the cluster, client +applications should be pre-configured with their intended credentials. +When a connection is initiated, the server will only ask for credentials +once authentication is enabled, so setting up the client side config in +advance is safe. In contrast, as soon as a server has authentication +enabled, any connection attempt without proper credentials will be +rejected which may cause availability problems for client applications. +Once clients are setup and ready for authentication to be enabled, +follow this procedure to enable it on the cluster. + +Pick a single node in the cluster on which to perform the initial +configuration. Ideally, no clients should connect to this node during +the setup process, so you may want to remove it from client config, +block it at the network level or possibly add a new temporary node to +the cluster for this purpose. On that node, perform the following steps: + +[arabic] +. Open a `cqlsh` session and change the replication factor of the +`system_auth` keyspace. By default, this keyspace uses +`SimpleReplicationStrategy` and a `replication_factor` of 1. It is +recommended to change this for any non-trivial deployment to ensure that +should nodes become unavailable, login is still possible. Best practice +is to configure a replication factor of 3 to 5 per-DC. + +[source,cql] +---- +ALTER KEYSPACE system_auth WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1': 3, 'DC2': 3}; +---- + +[arabic, start=2] +. Edit `cassandra.yaml` to change the `authenticator` option like so: + +[source,yaml] +---- +authenticator: PasswordAuthenticator +---- + +[arabic, start=3] +. Restart the node. +. Open a new `cqlsh` session using the credentials of the default +superuser: + +[source,bash] +---- +$ cqlsh -u cassandra -p cassandra +---- + +[arabic, start=5] +. During login, the credentials for the default superuser are read with +a consistency level of `QUORUM`, whereas those for all other users +(including superusers) are read at `LOCAL_ONE`. In the interests of +performance and availability, as well as security, operators should +create another superuser and disable the default one. This step is +optional, but highly recommended. While logged in as the default +superuser, create another superuser role which can be used to bootstrap +further configuration. + +[source,cql] +---- +# create a new superuser +CREATE ROLE dba WITH SUPERUSER = true AND LOGIN = true AND PASSWORD = 'super'; +---- + +[arabic, start=6] +. Start a new cqlsh session, this time logging in as the new_superuser +and disable the default superuser. + +[source,cql] +---- +ALTER ROLE cassandra WITH SUPERUSER = false AND LOGIN = false; +---- + +[arabic, start=7] +. Finally, set up the roles and credentials for your application users +with xref:cql/security.adoc#create-role[`CREATE ROLE`] statements. + +At the end of these steps, the one node is configured to use password +authentication. To roll that out across the cluster, repeat steps 2 and +3 on each node in the cluster. Once all nodes have been restarted, +authentication will be fully enabled throughout the cluster. + +Note that using `PasswordAuthenticator` also requires the use of +xref:cql/security.adoc#operation-roles[`CassandraRoleManager`]. + +See also: `setting-credentials-for-internal-authentication`, +xref:cql/security.adoc#create-role[`CREATE ROLE`], +xref:cql/security.adoc#alter-role[`ALTER ROLE`], +xref:xref:cql/security.adoc#alter-keyspace[`ALTER KEYSPACE`] and +xref:cql/security.adoc#grant-permission[`GRANT PERMISSION`]. + +== Authorization + +Authorization is pluggable in Cassandra and is configured using the +`authorizer` setting in `cassandra.yaml`. Cassandra ships with two +options included in the default distribution. + +By default, Cassandra is configured with `AllowAllAuthorizer` which +performs no checking and so effectively grants all permissions to all +roles. This must be used if `AllowAllAuthenticator` is the configured +authenticator. + +The default distribution also includes `CassandraAuthorizer`, which does +implement full permissions management functionality and stores its data +in Cassandra system tables. + +=== Enabling Internal Authorization + +Permissions are modelled as a whitelist, with the default assumption +that a given role has no access to any database resources. The +implication of this is that once authorization is enabled on a node, all +requests will be rejected until the required permissions have been +granted. For this reason, it is strongly recommended to perform the +initial setup on a node which is not processing client requests. + +The following assumes that authentication has already been enabled via +the process outlined in `password-authentication`. Perform these steps +to enable internal authorization across the cluster: + +[arabic] +. On the selected node, edit `cassandra.yaml` to change the `authorizer` +option like so: + +[source,yaml] +---- +authorizer: CassandraAuthorizer +---- + +[arabic, start=2] +. Restart the node. +. Open a new `cqlsh` session using the credentials of a role with +superuser credentials: + +[source,bash] +---- +$ cqlsh -u dba -p super +---- + +[arabic, start=4] +. Configure the appropriate access privileges for your clients using +link:cql.html#grant-permission[GRANT PERMISSION] statements. On the +other nodes, until configuration is updated and the node restarted, this +will have no effect so disruption to clients is avoided. + +[source,cql] +---- +GRANT SELECT ON ks.t1 TO db_user; +---- + +[arabic, start=5] +. Once all the necessary permissions have been granted, repeat steps 1 +and 2 for each node in turn. As each node restarts and clients +reconnect, the enforcement of the granted permissions will begin. + +See also: xref:cql/security.adoc#grant-permission[`GRANT PERMISSION`], +xref:cql/security.adoc#grant-all[`GRANT ALL`] and +xref:cql/security.adoc#revoke-permission[`REVOKE PERMISSION`]. + +[[auth-caching]] +== Caching + +Enabling authentication and authorization places additional load on the +cluster by frequently reading from the `system_auth` tables. +Furthermore, these reads are in the critical paths of many client +operations, and so has the potential to severely impact quality of +service. To mitigate this, auth data such as credentials, permissions +and role details are cached for a configurable period. The caching can +be configured (and even disabled) from `cassandra.yaml` or using a JMX +client. The JMX interface also supports invalidation of the various +caches, but any changes made via JMX are not persistent and will be +re-read from `cassandra.yaml` when the node is restarted. + +Each cache has 3 options which can be set: + +Validity Period:: + Controls the expiration of cache entries. After this period, entries + are invalidated and removed from the cache. +Refresh Rate:: + Controls the rate at which background reads are performed to pick up + any changes to the underlying data. While these async refreshes are + performed, caches will continue to serve (possibly) stale data. + Typically, this will be set to a shorter time than the validity + period. +Max Entries:: + Controls the upper bound on cache size. + +The naming for these options in `cassandra.yaml` follows the convention: + +* `_validity_in_ms` +* `_update_interval_in_ms` +* `_cache_max_entries` + +Where `` is one of `credentials`, `permissions`, or `roles`. + +As mentioned, these are also exposed via JMX in the mbeans under the +`org.apache.cassandra.auth` domain. + +== JMX access + +Access control for JMX clients is configured separately to that for CQL. +For both authentication and authorization, two providers are available; +the first based on standard JMX security and the second which integrates +more closely with Cassandra's own auth subsystem. + +The default settings for Cassandra make JMX accessible only from +localhost. To enable remote JMX connections, edit `cassandra-env.sh` (or +`cassandra-env.ps1` on Windows) to change the `LOCAL_JMX` setting to +`no`. Under the standard configuration, when remote JMX connections are +enabled, `standard JMX authentication ` is also +switched on. + +Note that by default, local-only connections are not subject to +authentication, but this can be enabled. + +If enabling remote connections, it is recommended to also use +xref:operating/security.adoc#jmx-with-ssl[`SSL`] connections. + +Finally, after enabling auth and/or SSL, ensure that tools which use +JMX, such as xref:tools/nodetool/nodetools.adoc[`nodetool`] are correctly configured and working +as expected. + +=== Standard JMX Auth + +Users permitted to connect to the JMX server are specified in a simple +text file. The location of this file is set in `cassandra-env.sh` by the +line: + +[source,bash] +---- +JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.password.file=/etc/cassandra/jmxremote.password" +---- + +Edit the password file to add username/password pairs: + +[source,none] +---- +jmx_user jmx_password +---- + +Secure the credentials file so that only the user running the Cassandra +process can read it : + +[source,bash] +---- +$ chown cassandra:cassandra /etc/cassandra/jmxremote.password +$ chmod 400 /etc/cassandra/jmxremote.password +---- + +Optionally, enable access control to limit the scope of what defined +users can do via JMX. Note that this is a fairly blunt instrument in +this context as most operational tools in Cassandra require full +read/write access. To configure a simple access file, uncomment this +line in `cassandra-env.sh`: + +[source,bash] +---- +#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.access.file=/etc/cassandra/jmxremote.access" +---- + +Then edit the access file to grant your JMX user readwrite permission: + +[source,none] +---- +jmx_user readwrite +---- + +Cassandra must be restarted to pick up the new settings. + +See also : +http://docs.oracle.com/javase/7/docs/technotes/guides/management/agent.html#gdenv[Using +File-Based Password Authentication In JMX] + +=== Cassandra Integrated Auth + +An alternative to the out-of-the-box JMX auth is to useeCassandra's own +authentication and/or authorization providers for JMX clients. This is +potentially more flexible and secure but it come with one major caveat. +Namely that it is not available until [.title-ref]#after# a node has +joined the ring, because the auth subsystem is not fully configured +until that point However, it is often critical for monitoring purposes +to have JMX access particularly during bootstrap. So it is recommended, +where possible, to use local only JMX auth during bootstrap and then, if +remote connectivity is required, to switch to integrated auth once the +node has joined the ring and initial setup is complete. + +With this option, the same database roles used for CQL authentication +can be used to control access to JMX, so updates can be managed +centrally using just `cqlsh`. Furthermore, fine grained control over +exactly which operations are permitted on particular MBeans can be +acheived via xref:cql/security.adoc#grant-permission[`GRANT PERMISSION`]. + +To enable integrated authentication, edit `cassandra-env.sh` to +uncomment these lines: + +[source,bash] +---- +#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.login.config=CassandraLogin" +#JVM_OPTS="$JVM_OPTS -Djava.security.auth.login.config=$CASSANDRA_HOME/conf/cassandra-jaas.config" +---- + +And disable the JMX standard auth by commenting this line: + +[source,bash] +---- +JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.password.file=/etc/cassandra/jmxremote.password" +---- + +To enable integrated authorization, uncomment this line: + +[source,bash] +---- +#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.authorizer=org.apache.cassandra.auth.jmx.AuthorizationProxy" +---- + +Check standard access control is off by ensuring this line is commented +out: + +[source,bash] +---- +#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.access.file=/etc/cassandra/jmxremote.access" +---- + +With integrated authentication and authorization enabled, operators can +define specific roles and grant them access to the particular JMX +resources that they need. For example, a role with the necessary +permissions to use tools such as jconsole or jmc in read-only mode would +be defined as: + +[source,cql] +---- +CREATE ROLE jmx WITH LOGIN = false; +GRANT SELECT ON ALL MBEANS TO jmx; +GRANT DESCRIBE ON ALL MBEANS TO jmx; +GRANT EXECUTE ON MBEAN 'java.lang:type=Threading' TO jmx; +GRANT EXECUTE ON MBEAN 'com.sun.management:type=HotSpotDiagnostic' TO jmx; + +# Grant the role with necessary permissions to use nodetool commands (including nodetool status) in read-only mode +GRANT EXECUTE ON MBEAN 'org.apache.cassandra.db:type=EndpointSnitchInfo' TO jmx; +GRANT EXECUTE ON MBEAN 'org.apache.cassandra.db:type=StorageService' TO jmx; + +# Grant the jmx role to one with login permissions so that it can access the JMX tooling +CREATE ROLE ks_user WITH PASSWORD = 'password' AND LOGIN = true AND SUPERUSER = false; +GRANT jmx TO ks_user; +---- + +Fine grained access control to individual MBeans is also supported: + +[source,cql] +---- +GRANT EXECUTE ON MBEAN 'org.apache.cassandra.db:type=Tables,keyspace=test_keyspace,table=t1' TO ks_user; +GRANT EXECUTE ON MBEAN 'org.apache.cassandra.db:type=Tables,keyspace=test_keyspace,table=*' TO ks_owner; +---- + +This permits the `ks_user` role to invoke methods on the MBean +representing a single table in `test_keyspace`, while granting the same +permission for all table level MBeans in that keyspace to the `ks_owner` +role. + +Adding/removing roles and granting/revoking of permissions is handled +dynamically once the initial setup is complete, so no further restarts +are required if permissions are altered. + +See also: xref:cql/security.adoc#permissions[`Permissions`]. + +=== JMX With SSL + +JMX SSL configuration is controlled by a number of system properties, +some of which are optional. To turn on SSL, edit the relevant lines in +`cassandra-env.sh` (or `cassandra-env.ps1` on Windows) to uncomment and +set the values of these properties as required: + +`com.sun.management.jmxremote.ssl`:: + set to true to enable SSL +`com.sun.management.jmxremote.ssl.need.client.auth`:: + set to true to enable validation of client certificates +`com.sun.management.jmxremote.registry.ssl`:: + enables SSL sockets for the RMI registry from which clients obtain the + JMX connector stub +`com.sun.management.jmxremote.ssl.enabled.protocols`:: + by default, the protocols supported by the JVM will be used, override + with a comma-separated list. Note that this is not usually necessary + and using the defaults is the preferred option. +`com.sun.management.jmxremote.ssl.enabled.cipher.suites`:: + by default, the cipher suites supported by the JVM will be used, + override with a comma-separated list. Note that this is not usually + necessary and using the defaults is the preferred option. +`javax.net.ssl.keyStore`:: + set the path on the local filesystem of the keystore containing server + private keys and public certificates +`javax.net.ssl.keyStorePassword`:: + set the password of the keystore file +`javax.net.ssl.trustStore`:: + if validation of client certificates is required, use this property to + specify the path of the truststore containing the public certificates + of trusted clients +`javax.net.ssl.trustStorePassword`:: + set the password of the truststore file + +See also: +http://docs.oracle.com/javase/7/docs/technotes/guides/management/agent.html#gdemv[Oracle +Java7 Docs], +https://www.lullabot.com/articles/monitor-java-with-jmx[Monitor Java +with JMX] diff --git a/doc/modules/cassandra/pages/operating/topo_changes.adoc b/doc/modules/cassandra/pages/operating/topo_changes.adoc new file mode 100644 index 000000000000..368056d945e9 --- /dev/null +++ b/doc/modules/cassandra/pages/operating/topo_changes.adoc @@ -0,0 +1,133 @@ += Adding, replacing, moving and removing nodes + +== Bootstrap + +Adding new nodes is called "bootstrapping". The `num_tokens` parameter +will define the amount of virtual nodes (tokens) the joining node will +be assigned during bootstrap. The tokens define the sections of the ring +(token ranges) the node will become responsible for. + +=== Token allocation + +With the default token allocation algorithm the new node will pick +`num_tokens` random tokens to become responsible for. Since tokens are +distributed randomly, load distribution improves with a higher amount of +virtual nodes, but it also increases token management overhead. The +default of 256 virtual nodes should provide a reasonable load balance +with acceptable overhead. + +On 3.0+ a new token allocation algorithm was introduced to allocate +tokens based on the load of existing virtual nodes for a given keyspace, +and thus yield an improved load distribution with a lower number of +tokens. To use this approach, the new node must be started with the JVM +option `-Dcassandra.allocate_tokens_for_keyspace=`, where +`` is the keyspace from which the algorithm can find the load +information to optimize token assignment for. + +==== Manual token assignment + +You may specify a comma-separated list of tokens manually with the +`initial_token` `cassandra.yaml` parameter, and if that is specified +Cassandra will skip the token allocation process. This may be useful +when doing token assignment with an external tool or when restoring a +node with its previous tokens. + +=== Range streaming + +After the tokens are allocated, the joining node will pick current +replicas of the token ranges it will become responsible for to stream +data from. By default it will stream from the primary replica of each +token range in order to guarantee data in the new node will be +consistent with the current state. + +In the case of any unavailable replica, the consistent bootstrap process +will fail. To override this behavior and potentially miss data from an +unavailable replica, set the JVM flag +`-Dcassandra.consistent.rangemovement=false`. + +=== Resuming failed/hanged bootstrap + +On 2.2+, if the bootstrap process fails, it's possible to resume +bootstrap from the previous saved state by calling +`nodetool bootstrap resume`. If for some reason the bootstrap hangs or +stalls, it may also be resumed by simply restarting the node. In order +to cleanup bootstrap state and start fresh, you may set the JVM startup +flag `-Dcassandra.reset_bootstrap_progress=true`. + +On lower versions, when the bootstrap proces fails it is recommended to +wipe the node (remove all the data), and restart the bootstrap process +again. + +=== Manual bootstrapping + +It's possible to skip the bootstrapping process entirely and join the +ring straight away by setting the hidden parameter +`auto_bootstrap: false`. This may be useful when restoring a node from a +backup or creating a new data-center. + +== Removing nodes + +You can take a node out of the cluster with `nodetool decommission` to a +live node, or `nodetool removenode` (to any other machine) to remove a +dead one. This will assign the ranges the old node was responsible for +to other nodes, and replicate the appropriate data there. If +decommission is used, the data will stream from the decommissioned node. +If removenode is used, the data will stream from the remaining replicas. + +No data is removed automatically from the node being decommissioned, so +if you want to put the node back into service at a different token on +the ring, it should be removed manually. + +== Moving nodes + +When `num_tokens: 1` it's possible to move the node position in the ring +with `nodetool move`. Moving is both a convenience over and more +efficient than decommission + bootstrap. After moving a node, +`nodetool cleanup` should be run to remove any unnecessary data. + +== Replacing a dead node + +In order to replace a dead node, start cassandra with the JVM startup +flag `-Dcassandra.replace_address_first_boot=`. Once this +property is enabled the node starts in a hibernate state, during which +all the other nodes will see this node to be DOWN (DN), however this +node will see itself as UP (UN). Accurate replacement state can be found +in `nodetool netstats`. + +The replacing node will now start to bootstrap the data from the rest of +the nodes in the cluster. A replacing node will only receive writes +during the bootstrapping phase if it has a different ip address to the +node that is being replaced. (See CASSANDRA-8523 and CASSANDRA-12344) + +Once the bootstrapping is complete the node will be marked "UP". + +[NOTE] +.Note +==== +If any of the following cases apply, you *MUST* run repair to make the +replaced node consistent again, since it missed ongoing writes +during/prior to bootstrapping. The _replacement_ timeframe refers to the +period from when the node initially dies to when a new node completes +the replacement process. + +[arabic] +. The node is down for longer than `max_hint_window_in_ms` before being +replaced. +. You are replacing using the same IP address as the dead node *and* +replacement takes longer than `max_hint_window_in_ms`. +==== + +== Monitoring progress + +Bootstrap, replace, move and remove progress can be monitored using +`nodetool netstats` which will show the progress of the streaming +operations. + +== Cleanup data after range movements + +As a safety measure, Cassandra does not automatically remove data from +nodes that "lose" part of their token range due to a range movement +operation (bootstrap, move, replace). Run `nodetool cleanup` on the +nodes that lost ranges to the joining node when you are satisfied the +new node is up and working. If you do not do this the old data will +still be counted against the load on that node. diff --git a/doc/modules/cassandra/pages/plugins/index.adoc b/doc/modules/cassandra/pages/plugins/index.adoc new file mode 100644 index 000000000000..dbb048af3dd9 --- /dev/null +++ b/doc/modules/cassandra/pages/plugins/index.adoc @@ -0,0 +1,36 @@ += Third-Party Plugins + +Available third-party plugins for Apache Cassandra + +== CAPI-Rowcache + +The Coherent Accelerator Process Interface (CAPI) is a general term for +the infrastructure of attaching a Coherent accelerator to an IBM POWER +system. A key innovation in IBM POWER8’s open architecture is the CAPI. +It provides a high bandwidth, low latency path between external devices, +the POWER8 core, and the system’s open memory architecture. IBM Data +Engine for NoSQL is an integrated platform for large and fast growing +NoSQL data stores. It builds on the CAPI capability of POWER8 systems +and provides super-fast access to large flash storage capacity and +addresses the challenges associated with typical x86 server based +scale-out deployments. + +The official page for the +https://github.com/ppc64le/capi-rowcache[CAPI-Rowcache plugin] contains +further details how to build/run/download the plugin. + +== Stratio’s Cassandra Lucene Index + +Stratio’s Lucene index is a Cassandra secondary index implementation +based on http://lucene.apache.org/[Apache Lucene]. It extends +Cassandra’s functionality to provide near real-time distributed search +engine capabilities such as with ElasticSearch or +http://lucene.apache.org/solr/[Apache Solr], including full text search +capabilities, free multivariable, geospatial and bitemporal search, +relevance queries and sorting based on column value, relevance or +distance. Each node indexes its own data, so high availability and +scalability is guaranteed. + +The official Github repository +http://www.github.com/stratio/cassandra-lucene-index[Cassandra Lucene +Index] contains everything you need to build/run/configure the plugin. diff --git a/doc/modules/cassandra/pages/tools/cassandra_stress.adoc b/doc/modules/cassandra/pages/tools/cassandra_stress.adoc new file mode 100644 index 000000000000..bcef193c373d --- /dev/null +++ b/doc/modules/cassandra/pages/tools/cassandra_stress.adoc @@ -0,0 +1,326 @@ += Cassandra Stress + +The `cassandra-stress` tool is used to benchmark and load-test a Cassandra +cluster. +`cassandra-stress` supports testing arbitrary CQL tables and queries, allowing users to benchmark their own data model. + +This documentation focuses on user mode to test personal schema. + +== Usage + +There are several operation types: + +* write-only, read-only, and mixed workloads of standard data +* write-only and read-only workloads for counter columns +* user configured workloads, running custom queries on custom schemas + +The syntax is `cassandra-stress [options]`. +For more information on a given command or options, run `cassandra-stress help `. + +Commands::: + read:;; + Multiple concurrent reads - the cluster must first be populated by a + write test + write:;; + Multiple concurrent writes against the cluster + mixed:;; + Interleaving of any basic commands, with configurable ratio and + distribution - the cluster must first be populated by a write test + counter_write:;; + Multiple concurrent updates of counters. + counter_read:;; + Multiple concurrent reads of counters. The cluster must first be + populated by a counterwrite test. + user:;; + Interleaving of user provided queries, with configurable ratio and + distribution. + help:;; + Print help for a command or option + print:;; + Inspect the output of a distribution definition + legacy:;; + Legacy support mode +Primary Options::: + -pop:;; + Population distribution and intra-partition visit order + -insert:;; + Insert specific options relating to various methods for batching and + splitting partition updates + -col:;; + Column details such as size and count distribution, data generator, + names, comparator and if super columns should be used + -rate:;; + Thread count, rate limit or automatic mode (default is auto) + -mode:;; + Thrift or CQL with options + -errors:;; + How to handle errors when encountered during stress + -sample:;; + Specify the number of samples to collect for measuring latency + -schema:;; + Replication settings, compression, compaction, etc. + -node:;; + Nodes to connect to + -log:;; + Where to log progress to, and the interval at which to do it + -transport:;; + Custom transport factories + -port:;; + The port to connect to cassandra nodes on + -sendto:;; + Specify a stress server to send this command to + -graph:;; + Graph recorded metrics + -tokenrange:;; + Token range settings +Suboptions::: + Every command and primary option has its own collection of suboptions. + These are too numerous to list here. For information on the suboptions + for each command or option, please use the help command, + `cassandra-stress help `. + +== User mode + +User mode allows you to stress your own schemas, to save you time +in the long run. Find out if your application can scale using stress test with your schema. + +=== Profile + +User mode defines a profile using YAML. +Multiple YAML files may be specified, in which case operations in the ops argument are referenced as +specname.opname. + +An identifier for the profile: + +[source,yaml] +---- +specname: staff_activities +---- + +The keyspace for the test: + +[source,yaml] +---- +keyspace: staff +---- + +CQL for the keyspace. Optional if the keyspace already exists: + +[source,yaml] +---- +keyspace_definition: | + CREATE KEYSPACE stresscql WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3}; +---- + +The table to be stressed: + +[source,yaml] +---- +table: staff_activities +---- + +CQL for the table. Optional if the table already exists: + +[source,yaml] +---- +table_definition: | + CREATE TABLE staff_activities ( + name text, + when timeuuid, + what text, + PRIMARY KEY(name, when, what) + ) +---- + +Optional meta-information on the generated columns in the above table. +The min and max only apply to text and blob types. The distribution +field represents the total unique population distribution of that column +across rows: + +[source,yaml] +---- +columnspec: + - name: name + size: uniform(5..10) # The names of the staff members are between 5-10 characters + population: uniform(1..10) # 10 possible staff members to pick from + - name: when + cluster: uniform(20..500) # Staff members do between 20 and 500 events + - name: what + size: normal(10..100,50) +---- + +Supported types are: + +An exponential distribution over the range [min..max]: + +[source,yaml] +---- +EXP(min..max) +---- + +An extreme value (Weibull) distribution over the range [min..max]: + +[source,yaml] +---- +EXTREME(min..max,shape) +---- + +A gaussian/normal distribution, where mean=(min+max)/2, and stdev is +(mean-min)/stdvrng: + +[source,yaml] +---- +GAUSSIAN(min..max,stdvrng) +---- + +A gaussian/normal distribution, with explicitly defined mean and stdev: + +[source,yaml] +---- +GAUSSIAN(min..max,mean,stdev) +---- + +A uniform distribution over the range [min, max]: + +[source,yaml] +---- +UNIFORM(min..max) +---- + +A fixed distribution, always returning the same value: + +[source,yaml] +---- +FIXED(val) +---- + +If preceded by ~, the distribution is inverted + +Defaults for all columns are size: uniform(4..8), population: +uniform(1..100B), cluster: fixed(1) + +Insert distributions: + +[source,yaml] +---- +insert: + # How many partition to insert per batch + partitions: fixed(1) + # How many rows to update per partition + select: fixed(1)/500 + # UNLOGGED or LOGGED batch for insert + batchtype: UNLOGGED +---- + +Currently all inserts are done inside batches. + +Read statements to use during the test: + +[source,yaml] +---- +queries: + events: + cql: select * from staff_activities where name = ? + fields: samerow + latest_event: + cql: select * from staff_activities where name = ? LIMIT 1 + fields: samerow +---- + +Running a user mode test: + +[source,yaml] +---- +cassandra-stress user profile=./example.yaml duration=1m "ops(insert=1,latest_event=1,events=1)" truncate=once +---- + +This will create the schema then run tests for 1 minute with an equal +number of inserts, latest_event queries and events queries. Additionally +the table will be truncated once before the test. + +The full example can be found here: +[source, yaml] +---- +include::example$YAML/stress-example.yaml[] +---- + +Running a user mode test with multiple yaml files:::: + cassandra-stress user profile=./example.yaml,./example2.yaml + duration=1m "ops(ex1.insert=1,ex1.latest_event=1,ex2.insert=2)" + truncate=once +This will run operations as specified in both the example.yaml and +example2.yaml files. example.yaml and example2.yaml can reference the +same table, although care must be taken that the table definition is identical + (data generation specs can be different). + +=== Lightweight transaction support + +cassandra-stress supports lightweight transactions. +To use this feature, the command will first read current data from Cassandra, and then uses read values to +fulfill lightweight transaction conditions. + +Lightweight transaction update query: + +[source,yaml] +---- +queries: + regularupdate: + cql: update blogposts set author = ? where domain = ? and published_date = ? + fields: samerow + updatewithlwt: + cql: update blogposts set author = ? where domain = ? and published_date = ? IF body = ? AND url = ? + fields: samerow +---- + +The full example can be found here: +[source, yaml] +---- +include::example$YAML/stress-lwt-example.yaml[] +---- + +== Graphing + +Graphs can be generated for each run of stress. + +image::example-stress-graph.png[example cassandra-stress graph] + +To create a new graph: + +[source,yaml] +---- +cassandra-stress user profile=./stress-example.yaml "ops(insert=1,latest_event=1,events=1)" -graph file=graph.html title="Awesome graph" +---- + +To add a new run to an existing graph point to an existing file and add +a revision name: + +[source,yaml] +---- +cassandra-stress user profile=./stress-example.yaml duration=1m "ops(insert=1,latest_event=1,events=1)" -graph file=graph.html title="Awesome graph" revision="Second run" +---- + +== FAQ + +*How do you use NetworkTopologyStrategy for the keyspace?* + +Use the schema option making sure to either escape the parenthesis or +enclose in quotes: + +[source,yaml] +---- +cassandra-stress write -schema "replication(strategy=NetworkTopologyStrategy,datacenter1=3)" +---- + +*How do you use SSL?* + +Use the transport option: + +[source,yaml] +---- +cassandra-stress "write n=100k cl=ONE no-warmup" -transport "truststore=$HOME/jks/truststore.jks truststore-password=cassandra" +---- + +*Is Cassandra Stress a secured tool?* + +Cassandra stress is not a secured tool. Serialization and other aspects +of the tool offer no security guarantees. diff --git a/doc/modules/cassandra/pages/tools/cqlsh.adoc b/doc/modules/cassandra/pages/tools/cqlsh.adoc new file mode 100644 index 000000000000..162259a337b6 --- /dev/null +++ b/doc/modules/cassandra/pages/tools/cqlsh.adoc @@ -0,0 +1,482 @@ += cqlsh: the CQL shell + +`cqlsh` is a command-line interface for interacting with Cassandra using CQL (the Cassandra Query Language). +It is shipped with every Cassandra package, and can be found in the bin/ directory alongside the cassandra +executable. +`cqlsh` is implemented with the Python native protocol driver, and connects to the single specified node. + +== Compatibility + +`cqlsh` is compatible with Python 2.7. + +In general, a given version of `cqlsh` is only guaranteed to work with the +version of Cassandra that it was released with. +In some cases, `cqlsh` may work with older or newer versions of Cassandra, but this is not +officially supported. + +== Optional Dependencies + +`cqlsh` ships with all essential dependencies. However, there are some +optional dependencies that can be installed to improve the capabilities +of `cqlsh`. + +=== pytz + +By default, `cqlsh` displays all timestamps with a UTC timezone. +To support display of timestamps with another timezone, install +the http://pytz.sourceforge.net/[pytz] library. +See the `timezone` option in xref:cql/tools/cqlsh.adoc#cqlshrc[cqlshrc] for specifying a timezone to +use. + +=== cython + +The performance of cqlsh's `COPY` operations can be improved by +installing http://cython.org/[cython]. This will compile the python +modules that are central to the performance of `COPY`. + +[[cqlshrc]] +== cqlshrc + +The `cqlshrc` file holds configuration options for `cqlsh`. +By default, the file is locagted the user's home directory at `~/.cassandra/cqlsh`, but a +custom location can be specified with the `--cqlshrc` option. + +Example config values and documentation can be found in the +`conf/cqlshrc.sample` file of a tarball installation. +You can also view the latest version of the +https://github.com/apache/cassandra/blob/trunk/conf/cqlshrc.sample[cqlshrc file online]. + +== Command Line Options + +Usage: + +`cqlsh [options] [host [port]]` + +Options: + +`-C` `--color`:: + Force color output +`--no-color`:: + Disable color output +`--browser`:: + Specify the browser to use for displaying cqlsh help. This can be one + of the https://docs.python.org/2/library/webbrowser.html[supported + browser names] (e.g. `firefox`) or a browser path followed by `%s` + (e.g. `/usr/bin/google-chrome-stable %s`). +`--ssl`:: + Use SSL when connecting to Cassandra +`-u` `--user`:: + Username to authenticate against Cassandra with +`-p` `--password`:: + Password to authenticate against Cassandra with, should be used in + conjunction with `--user` +`-k` `--keyspace`:: + Keyspace to authenticate to, should be used in conjunction with + `--user` +`-f` `--file`:: + Execute commands from the given file, then exit +`--debug`:: + Print additional debugging information +`--encoding`:: + Specify a non-default encoding for output (defaults to UTF-8) +`--cqlshrc`:: + Specify a non-default location for the `cqlshrc` file +`-e` `--execute`:: + Execute the given statement, then exit +`--connect-timeout`:: + Specify the connection timeout in seconds (defaults to 2s) +`--python /path/to/python`:: + Specify the full path to Python interpreter to override default on + systems with multiple interpreters installed +`--request-timeout`:: + Specify the request timeout in seconds (defaults to 10s) +`-t` `--tty`:: + Force tty mode (command prompt) + +== Special Commands + +In addition to supporting regular CQL statements, `cqlsh` also supports a +number of special commands that are not part of CQL. These are detailed +below. + +=== `CONSISTENCY` + +`Usage`: `CONSISTENCY ` + +Sets the consistency level for operations to follow. Valid arguments +include: + +* `ANY` +* `ONE` +* `TWO` +* `THREE` +* `QUORUM` +* `ALL` +* `LOCAL_QUORUM` +* `LOCAL_ONE` +* `SERIAL` +* `LOCAL_SERIAL` + +=== `SERIAL CONSISTENCY` + +`Usage`: `SERIAL CONSISTENCY ` + +Sets the serial consistency level for operations to follow. Valid +arguments include: + +* `SERIAL` +* `LOCAL_SERIAL` + +The serial consistency level is only used by conditional updates +(`INSERT`, `UPDATE` and `DELETE` with an `IF` condition). For those, the +serial consistency level defines the consistency level of the serial +phase (or “paxos” phase) while the normal consistency level defines the +consistency for the “learn” phase, i.e. what type of reads will be +guaranteed to see the update right away. For example, if a conditional +write has a consistency level of `QUORUM` (and is successful), then a +`QUORUM` read is guaranteed to see that write. But if the regular +consistency level of that write is `ANY`, then only a read with a +consistency level of `SERIAL` is guaranteed to see it (even a read with +consistency `ALL` is not guaranteed to be enough). + +=== `SHOW VERSION` + +Prints the `cqlsh`, Cassandra, CQL, and native protocol versions in use. +Example: + +[source,none] +---- +cqlsh> SHOW VERSION +[cqlsh 5.0.1 | Cassandra 3.8 | CQL spec 3.4.2 | Native protocol v4] +---- + +=== `SHOW HOST` + +Prints the IP address and port of the Cassandra node that `cqlsh` is +connected to in addition to the cluster name. Example: + +[source,none] +---- +cqlsh> SHOW HOST +Connected to Prod_Cluster at 192.0.0.1:9042. +---- + +=== `SHOW SESSION` + +Pretty prints a specific tracing session. + +`Usage`: `SHOW SESSION ` + +Example usage: + +[source,none] +---- +cqlsh> SHOW SESSION 95ac6470-327e-11e6-beca-dfb660d92ad8 + +Tracing session: 95ac6470-327e-11e6-beca-dfb660d92ad8 + + activity | timestamp | source | source_elapsed | client +-----------------------------------------------------------+----------------------------+-----------+----------------+----------- + Execute CQL3 query | 2016-06-14 17:23:13.979000 | 127.0.0.1 | 0 | 127.0.0.1 + Parsing SELECT * FROM system.local; [SharedPool-Worker-1] | 2016-06-14 17:23:13.982000 | 127.0.0.1 | 3843 | 127.0.0.1 +... +---- + +=== `SOURCE` + +Reads the contents of a file and executes each line as a CQL statement +or special cqlsh command. + +`Usage`: `SOURCE ` + +Example usage: + +[source,none] +---- +cqlsh> SOURCE '/home/calvinhobbs/commands.cql' +---- + +=== `CAPTURE` + +Begins capturing command output and appending it to a specified file. +Output will not be shown at the console while it is captured. + +`Usage`: + +[source,none] +---- +CAPTURE ''; +CAPTURE OFF; +CAPTURE; +---- + +That is, the path to the file to be appended to must be given inside a +string literal. The path is interpreted relative to the current working +directory. The tilde shorthand notation (`'~/mydir'`) is supported for +referring to `$HOME`. + +Only query result output is captured. Errors and output from cqlsh-only +commands will still be shown in the cqlsh session. + +To stop capturing output and show it in the cqlsh session again, use +`CAPTURE OFF`. + +To inspect the current capture configuration, use `CAPTURE` with no +arguments. + +=== `HELP` + +Gives information about cqlsh commands. To see available topics, enter +`HELP` without any arguments. To see help on a topic, use +`HELP `. Also see the `--browser` argument for controlling what +browser is used to display help. + +=== `TRACING` + +Enables or disables tracing for queries. When tracing is enabled, once a +query completes, a trace of the events during the query will be printed. + +`Usage`: + +[source,none] +---- +TRACING ON +TRACING OFF +---- + +=== `PAGING` + +Enables paging, disables paging, or sets the page size for read queries. +When paging is enabled, only one page of data will be fetched at a time +and a prompt will appear to fetch the next page. Generally, it's a good +idea to leave paging enabled in an interactive session to avoid fetching +and printing large amounts of data at once. + +`Usage`: + +[source,none] +---- +PAGING ON +PAGING OFF +PAGING +---- + +=== `EXPAND` + +Enables or disables vertical printing of rows. Enabling `EXPAND` is +useful when many columns are fetched, or the contents of a single column +are large. + +`Usage`: + +[source,none] +---- +EXPAND ON +EXPAND OFF +---- + +=== `LOGIN` + +Authenticate as a specified Cassandra user for the current session. + +`Usage`: + +[source,none] +---- +LOGIN [] +---- + +=== `EXIT` + +Ends the current session and terminates the cqlsh process. + +`Usage`: + +[source,none] +---- +EXIT +QUIT +---- + +=== `CLEAR` + +Clears the console. + +`Usage`: + +[source,none] +---- +CLEAR +CLS +---- + +=== `DESCRIBE` + +Prints a description (typically a series of DDL statements) of a schema +element or the cluster. This is useful for dumping all or portions of +the schema. + +`Usage`: + +[source,none] +---- +DESCRIBE CLUSTER +DESCRIBE SCHEMA +DESCRIBE KEYSPACES +DESCRIBE KEYSPACE +DESCRIBE TABLES +DESCRIBE TABLE
+DESCRIBE INDEX +DESCRIBE MATERIALIZED VIEW +DESCRIBE TYPES +DESCRIBE TYPE +DESCRIBE FUNCTIONS +DESCRIBE FUNCTION +DESCRIBE AGGREGATES +DESCRIBE AGGREGATE +---- + +In any of the commands, `DESC` may be used in place of `DESCRIBE`. + +The `DESCRIBE CLUSTER` command prints the cluster name and partitioner: + +[source,none] +---- +cqlsh> DESCRIBE CLUSTER + +Cluster: Test Cluster +Partitioner: Murmur3Partitioner +---- + +The `DESCRIBE SCHEMA` command prints the DDL statements needed to +recreate the entire schema. This is especially useful for dumping the +schema in order to clone a cluster or restore from a backup. + +=== `COPY TO` + +Copies data from a table to a CSV file. + +`Usage`: + +[source,none] +---- +COPY
[(, ...)] TO WITH [AND ...] +---- + +If no columns are specified, all columns from the table will be copied +to the CSV file. A subset of columns to copy may be specified by adding +a comma-separated list of column names surrounded by parenthesis after +the table name. + +The `` should be a string literal (with single quotes) +representing a path to the destination file. This can also the special +value `STDOUT` (without single quotes) to print the CSV to stdout. + +See `shared-copy-options` for options that apply to both `COPY TO` and +`COPY FROM`. + +==== Options for `COPY TO` + +`MAXREQUESTS`:: + The maximum number token ranges to fetch simultaneously. Defaults to + 6. +`PAGESIZE`:: + The number of rows to fetch in a single page. Defaults to 1000. +`PAGETIMEOUT`:: + By default the page timeout is 10 seconds per 1000 entries in the page + size or 10 seconds if pagesize is smaller. +`BEGINTOKEN`, `ENDTOKEN`:: + Token range to export. Defaults to exporting the full ring. +`MAXOUTPUTSIZE`:: + The maximum size of the output file measured in number of lines; + beyond this maximum the output file will be split into segments. -1 + means unlimited, and is the default. +`ENCODING`:: + The encoding used for characters. Defaults to `utf8`. + +=== `COPY FROM` + +Copies data from a CSV file to table. + +`Usage`: + +[source,none] +---- +COPY
[(, ...)] FROM WITH [AND ...] +---- + +If no columns are specified, all columns from the CSV file will be +copied to the table. A subset of columns to copy may be specified by +adding a comma-separated list of column names surrounded by parenthesis +after the table name. + +The `` should be a string literal (with single quotes) +representing a path to the source file. This can also the special value +`STDIN` (without single quotes) to read the CSV data from stdin. + +See `shared-copy-options` for options that apply to both `COPY TO` and +`COPY FROM`. + +==== Options for `COPY TO` + +`INGESTRATE`:: + The maximum number of rows to process per second. Defaults to 100000. +`MAXROWS`:: + The maximum number of rows to import. -1 means unlimited, and is the + default. +`SKIPROWS`:: + A number of initial rows to skip. Defaults to 0. +`SKIPCOLS`:: + A comma-separated list of column names to ignore. By default, no + columns are skipped. +`MAXPARSEERRORS`:: + The maximum global number of parsing errors to ignore. -1 means + unlimited, and is the default. +`MAXINSERTERRORS`:: + The maximum global number of insert errors to ignore. -1 means + unlimited. The default is 1000. +`ERRFILE` =:: + A file to store all rows that could not be imported, by default this + is `import__
.err` where `` is your keyspace and + `
` is your table name. +`MAXBATCHSIZE`:: + The max number of rows inserted in a single batch. Defaults to 20. +`MINBATCHSIZE`:: + The min number of rows inserted in a single batch. Defaults to 2. +`CHUNKSIZE`:: + The number of rows that are passed to child worker processes from the + main process at a time. Defaults to 1000. + +==== Shared COPY Options + +Options that are common to both `COPY TO` and `COPY FROM`. + +`NULLVAL`:: + The string placeholder for null values. Defaults to `null`. +`HEADER`:: + For `COPY TO`, controls whether the first line in the CSV output file + will contain the column names. For COPY FROM, specifies whether the + first line in the CSV input file contains column names. Defaults to + `false`. +`DECIMALSEP`:: + The character that is used as the decimal point separator. Defaults to + `.`. +`THOUSANDSSEP`:: + The character that is used to separate thousands. Defaults to the + empty string. +`BOOLSTYlE`:: + The string literal format for boolean values. Defaults to + `True,False`. +`NUMPROCESSES`:: + The number of child worker processes to create for `COPY` tasks. + Defaults to a max of 4 for `COPY FROM` and 16 for `COPY TO`. However, + at most (num_cores - 1) processes will be created. +`MAXATTEMPTS`:: + The maximum number of failed attempts to fetch a range of data (when + using `COPY TO`) or insert a chunk of data (when using `COPY FROM`) + before giving up. Defaults to 5. +`REPORTFREQUENCY`:: + How often status updates are refreshed, in seconds. Defaults to 0.25. +`RATEFILE`:: + An optional file to output rate statistics to. By default, statistics + are not output to a file. diff --git a/doc/modules/cassandra/pages/tools/index.adoc b/doc/modules/cassandra/pages/tools/index.adoc new file mode 100644 index 000000000000..a25af555cb46 --- /dev/null +++ b/doc/modules/cassandra/pages/tools/index.adoc @@ -0,0 +1,9 @@ += Cassandra Tools + +This section describes the command line tools provided with Apache +Cassandra. + +* xref:tools/cqlsh.adoc[CQL shell] +* xref:tools/nodetool/nodetool.adoc[nodetool] +* xref:tools/sstable/index.adoc[SSTable tools] +* xref:tools/cassandra_stress.adoc[cassandra-stress tool] diff --git a/doc/modules/cassandra/pages/tools/sstable/index.adoc b/doc/modules/cassandra/pages/tools/sstable/index.adoc new file mode 100644 index 000000000000..cb787ece2176 --- /dev/null +++ b/doc/modules/cassandra/pages/tools/sstable/index.adoc @@ -0,0 +1,20 @@ += SSTable Tools + +This section describes the functionality of the various sstable tools. + +Cassandra must be stopped before these tools are executed, or unexpected +results will occur. Note: the scripts do not verify that Cassandra is +stopped. + +* xref:tools/sstable/sstabledump.adoc[sstabledump] +* xref:tools/sstable/sstableexpiredblockers.adoc[sstableexpiredblockers] +* xref:tools/sstable/sstablelevelreset.adoc[sstablelevelreset] +* xref:tools/sstable/sstableloader.adoc[sstableloader] +* xref:tools/sstable/sstablemetadata.adoc[sstablemetadata] +* xref:tools/sstable/sstableofflinerelevel.adoc[sstableofflinerelevel] +* xref:tools/sstable/sstablerepairedset.adoc[sstablerepairdset] +* xref:tools/sstable/sstablescrub.adoc[sstablescrub] +* xref:tools/sstable/sstablesplit.adoc[sstablesplit] +* xref:tools/sstable/sstableupgrade.adoc[sstableupgrade] +* xref:tools/sstable/sstableutil.adoc[sstableutil] +* xref:tools/sstable/sstableverify.adoc[sstableverify] diff --git a/doc/modules/cassandra/pages/tools/sstable/sstabledump.adoc b/doc/modules/cassandra/pages/tools/sstable/sstabledump.adoc new file mode 100644 index 000000000000..90f66b885485 --- /dev/null +++ b/doc/modules/cassandra/pages/tools/sstable/sstabledump.adoc @@ -0,0 +1,286 @@ += sstabledump + +Dump contents of a given SSTable to standard output in JSON format. + +You must supply exactly one sstable. + +Cassandra must be stopped before this tool is executed, or unexpected +results will occur. Note: the script does not verify that Cassandra is +stopped. + +== Usage + +sstabledump + +[cols=",",] +|=== +|-d |CQL row per line internal representation +|-e |Enumerate partition keys only +|-k |Partition key +|-x |Excluded partition key(s) +|-t |Print raw timestamps instead of iso8601 date strings +|-l |Output each row as a separate JSON object +|=== + +If necessary, use sstableutil first to find out the sstables used by a +table. + +== Dump entire table + +Dump the entire table without any options. + +Example: + +.... +sstabledump /var/lib/cassandra/data/keyspace/eventlog-65c429e08c5a11e8939edf4f403979ef/mc-1-big-Data.db > eventlog_dump_2018Jul26 + +cat eventlog_dump_2018Jul26 +[ + { + "partition" : { + "key" : [ "3578d7de-c60d-4599-aefb-3f22a07b2bc6" ], + "position" : 0 + }, + "rows" : [ + { + "type" : "row", + "position" : 61, + "liveness_info" : { "tstamp" : "2018-07-20T20:23:08.378711Z" }, + "cells" : [ + { "name" : "event", "value" : "party" }, + { "name" : "insertedtimestamp", "value" : "2018-07-20 20:23:08.384Z" }, + { "name" : "source", "value" : "asdf" } + ] + } + ] + }, + { + "partition" : { + "key" : [ "d18250c0-84fc-4d40-b957-4248dc9d790e" ], + "position" : 62 + }, + "rows" : [ + { + "type" : "row", + "position" : 123, + "liveness_info" : { "tstamp" : "2018-07-20T20:23:07.783522Z" }, + "cells" : [ + { "name" : "event", "value" : "party" }, + { "name" : "insertedtimestamp", "value" : "2018-07-20 20:23:07.789Z" }, + { "name" : "source", "value" : "asdf" } + ] + } + ] + }, + { + "partition" : { + "key" : [ "cf188983-d85b-48d6-9365-25005289beb2" ], + "position" : 124 + }, + "rows" : [ + { + "type" : "row", + "position" : 182, + "liveness_info" : { "tstamp" : "2018-07-20T20:22:27.028809Z" }, + "cells" : [ + { "name" : "event", "value" : "party" }, + { "name" : "insertedtimestamp", "value" : "2018-07-20 20:22:27.055Z" }, + { "name" : "source", "value" : "asdf" } + ] + } + ] + } +] +.... + +== Dump table in a more manageable format + +Use the -l option to dump each row as a separate JSON object. This will +make the output easier to manipulate for large data sets. ref: +https://issues.apache.org/jira/browse/CASSANDRA-13848 + +Example: + +.... +sstabledump /var/lib/cassandra/data/keyspace/eventlog-65c429e08c5a11e8939edf4f403979ef/mc-1-big-Data.db -l > eventlog_dump_2018Jul26_justlines + +cat eventlog_dump_2018Jul26_justlines +[ + { + "partition" : { + "key" : [ "3578d7de-c60d-4599-aefb-3f22a07b2bc6" ], + "position" : 0 + }, + "rows" : [ + { + "type" : "row", + "position" : 61, + "liveness_info" : { "tstamp" : "2018-07-20T20:23:08.378711Z" }, + "cells" : [ + { "name" : "event", "value" : "party" }, + { "name" : "insertedtimestamp", "value" : "2018-07-20 20:23:08.384Z" }, + { "name" : "source", "value" : "asdf" } + ] + } + ] + }, + { + "partition" : { + "key" : [ "d18250c0-84fc-4d40-b957-4248dc9d790e" ], + "position" : 62 + }, + "rows" : [ + { + "type" : "row", + "position" : 123, + "liveness_info" : { "tstamp" : "2018-07-20T20:23:07.783522Z" }, + "cells" : [ + { "name" : "event", "value" : "party" }, + { "name" : "insertedtimestamp", "value" : "2018-07-20 20:23:07.789Z" }, + { "name" : "source", "value" : "asdf" } + ] + } + ] + }, + { + "partition" : { + "key" : [ "cf188983-d85b-48d6-9365-25005289beb2" ], + "position" : 124 + }, + "rows" : [ + { + "type" : "row", + "position" : 182, + "liveness_info" : { "tstamp" : "2018-07-20T20:22:27.028809Z" }, + "cells" : [ + { "name" : "event", "value" : "party" }, + { "name" : "insertedtimestamp", "value" : "2018-07-20 20:22:27.055Z" }, + { "name" : "source", "value" : "asdf" } + ] + } + ] + } +.... + +== Dump only keys + +Dump only the keys by using the -e option. + +Example: + +.... +sstabledump /var/lib/cassandra/data/keyspace/eventlog-65c429e08c5a11e8939edf4f403979ef/mc-1-big-Data.db -e > eventlog_dump_2018Jul26_justkeys + +cat eventlog_dump_2018Jul26b +[ [ "3578d7de-c60d-4599-aefb-3f22a07b2bc6" ], [ "d18250c0-84fc-4d40-b957-4248dc9d790e" ], [ "cf188983-d85b-48d6-9365-25005289beb2" ] +.... + +== Dump row for a single key + +Dump a single key using the -k option. + +Example: + +.... +sstabledump /var/lib/cassandra/data/keyspace/eventlog-65c429e08c5a11e8939edf4f403979ef/mc-1-big-Data.db -k 3578d7de-c60d-4599-aefb-3f22a07b2bc6 > eventlog_dump_2018Jul26_singlekey + +cat eventlog_dump_2018Jul26_singlekey +[ + { + "partition" : { + "key" : [ "3578d7de-c60d-4599-aefb-3f22a07b2bc6" ], + "position" : 0 + }, + "rows" : [ + { + "type" : "row", + "position" : 61, + "liveness_info" : { "tstamp" : "2018-07-20T20:23:08.378711Z" }, + "cells" : [ + { "name" : "event", "value" : "party" }, + { "name" : "insertedtimestamp", "value" : "2018-07-20 20:23:08.384Z" }, + { "name" : "source", "value" : "asdf" } + ] + } + ] + } +.... + +== Exclude a key or keys in dump of rows + +Dump a table except for the rows excluded with the -x option. Multiple +keys can be used. + +Example: + +.... +sstabledump /var/lib/cassandra/data/keyspace/eventlog-65c429e08c5a11e8939edf4f403979ef/mc-1-big-Data.db -x 3578d7de-c60d-4599-aefb-3f22a07b2bc6 d18250c0-84fc-4d40-b957-4248dc9d790e > eventlog_dump_2018Jul26_excludekeys + +cat eventlog_dump_2018Jul26_excludekeys +[ + { + "partition" : { + "key" : [ "cf188983-d85b-48d6-9365-25005289beb2" ], + "position" : 0 + }, + "rows" : [ + { + "type" : "row", + "position" : 182, + "liveness_info" : { "tstamp" : "2018-07-20T20:22:27.028809Z" }, + "cells" : [ + { "name" : "event", "value" : "party" }, + { "name" : "insertedtimestamp", "value" : "2018-07-20 20:22:27.055Z" }, + { "name" : "source", "value" : "asdf" } + ] + } + ] + } +.... + +== Display raw timestamps + +By default, dates are displayed in iso8601 date format. Using the -t +option will dump the data with the raw timestamp. + +Example: + +.... +sstabledump /var/lib/cassandra/data/keyspace/eventlog-65c429e08c5a11e8939edf4f403979ef/mc-1-big-Data.db -t -k cf188983-d85b-48d6-9365-25005289beb2 > eventlog_dump_2018Jul26_times + +cat eventlog_dump_2018Jul26_times +[ + { + "partition" : { + "key" : [ "cf188983-d85b-48d6-9365-25005289beb2" ], + "position" : 124 + }, + "rows" : [ + { + "type" : "row", + "position" : 182, + "liveness_info" : { "tstamp" : "1532118147028809" }, + "cells" : [ + { "name" : "event", "value" : "party" }, + { "name" : "insertedtimestamp", "value" : "2018-07-20 20:22:27.055Z" }, + { "name" : "source", "value" : "asdf" } + ] + } + ] + } +.... + +== Display internal structure in output + +Dump the table in a format that reflects the internal structure. + +Example: + +.... +sstabledump /var/lib/cassandra/data/keyspace/eventlog-65c429e08c5a11e8939edf4f403979ef/mc-1-big-Data.db -d > eventlog_dump_2018Jul26_d + +cat eventlog_dump_2018Jul26_d +[3578d7de-c60d-4599-aefb-3f22a07b2bc6]@0 Row[info=[ts=1532118188378711] ]: | [event=party ts=1532118188378711], [insertedtimestamp=2018-07-20 20:23Z ts=1532118188378711], [source=asdf ts=1532118188378711] +[d18250c0-84fc-4d40-b957-4248dc9d790e]@62 Row[info=[ts=1532118187783522] ]: | [event=party ts=1532118187783522], [insertedtimestamp=2018-07-20 20:23Z ts=1532118187783522], [source=asdf ts=1532118187783522] +[cf188983-d85b-48d6-9365-25005289beb2]@124 Row[info=[ts=1532118147028809] ]: | [event=party ts=1532118147028809], [insertedtimestamp=2018-07-20 20:22Z ts=1532118147028809], [source=asdf ts=1532118147028809] +.... diff --git a/doc/modules/cassandra/pages/tools/sstable/sstableexpiredblockers.adoc b/doc/modules/cassandra/pages/tools/sstable/sstableexpiredblockers.adoc new file mode 100644 index 000000000000..2090ad5fec18 --- /dev/null +++ b/doc/modules/cassandra/pages/tools/sstable/sstableexpiredblockers.adoc @@ -0,0 +1,42 @@ += sstableexpiredblockers + +During compaction, entire sstables can be dropped if they contain only +expired tombstones, and if it is guaranteed that the data is not newer +than the data in other sstables. An expired sstable can be blocked from +getting dropped if its newest timestamp is newer than the oldest data in +another sstable. + +This tool is used to list all sstables that are blocking other sstables +from getting dropped (by having older data than the newest tombstone in +an expired sstable) so a user can figure out why certain sstables are +still on disk. + +ref: https://issues.apache.org/jira/browse/CASSANDRA-10015 + +Cassandra must be stopped before this tool is executed, or unexpected +results will occur. Note: the script does not verify that Cassandra is +stopped. + +== Usage + +sstableexpiredblockers
+ +== Output blocked sstables + +If the sstables exist for the table, but no tables have older data than +the newest tombstone in an expired sstable, the script will return +nothing. + +Otherwise, the script will return [.title-ref]# blocks <#> +expired sstables from getting dropped# followed by a list of the blocked +sstables. + +Example: + +.... +sstableexpiredblockers keyspace1 standard1 + +[BigTableReader(path='/var/lib/cassandra/data/keyspace1/standard1-0665ae80b2d711e886c66d2c86545d91/mc-2-big-Data.db') (minTS = 5, maxTS = 5, maxLDT = 2147483647)], blocks 1 expired sstables from getting dropped: [BigTableReader(path='/var/lib/cassandra/data/keyspace1/standard1-0665ae80b2d711e886c66d2c86545d91/mc-3-big-Data.db') (minTS = 1536349775157606, maxTS = 1536349780311159, maxLDT = 1536349780)], + +[BigTableReader(path='/var/lib/cassandra/data/keyspace1/standard1-0665ae80b2d711e886c66d2c86545d91/mc-1-big-Data.db') (minTS = 1, maxTS = 10, maxLDT = 2147483647)], blocks 1 expired sstables from getting dropped: [BigTableReader(path='/var/lib/cassandra/data/keyspace1/standard1-0665ae80b2d711e886c66d2c86545d91/mc-3-big-Data.db') (minTS = 1536349775157606, maxTS = 1536349780311159, maxLDT = 1536349780)], +.... diff --git a/doc/modules/cassandra/pages/tools/sstable/sstablelevelreset.adoc b/doc/modules/cassandra/pages/tools/sstable/sstablelevelreset.adoc new file mode 100644 index 000000000000..65dc02e25c58 --- /dev/null +++ b/doc/modules/cassandra/pages/tools/sstable/sstablelevelreset.adoc @@ -0,0 +1,69 @@ += sstablelevelreset + +If LeveledCompactionStrategy is set, this script can be used to reset +level to 0 on a given set of sstables. This is useful if you want to, +for example, change the minimum sstable size, and therefore restart the +compaction process using this new configuration. + +See +http://cassandra.apache.org/doc/latest/operating/compaction.html#leveled-compaction-strategy +for information on how levels are used in this compaction strategy. + +Cassandra must be stopped before this tool is executed, or unexpected +results will occur. Note: the script does not verify that Cassandra is +stopped. + +ref: https://issues.apache.org/jira/browse/CASSANDRA-5271 + +== Usage + +sstablelevelreset --really-reset
+ +The really-reset flag is required, to ensure this intrusive command is +not run accidentally. + +== Table not found + +If the keyspace and/or table is not in the schema (e.g., if you +misspelled the table name), the script will return an error. + +Example: + +.... +ColumnFamily not found: keyspace/evenlog. +.... + +== Table has no sstables + +Example: + +.... +Found no sstables, did you give the correct keyspace/table? +.... + +== Table already at level 0 + +The script will not set the level if it is already set to 0. + +Example: + +.... +Skipped /var/lib/cassandra/data/keyspace/eventlog-65c429e08c5a11e8939edf4f403979ef/mc-1-big-Data.db since it is already on level 0 +.... + +== Table levels reduced to 0 + +If the level is not already 0, then this will reset it to 0. + +Example: + +.... +sstablemetadata /var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-8-big-Data.db | grep -i level +SSTable Level: 1 + +sstablelevelreset --really-reset keyspace eventlog +Changing level from 1 to 0 on /var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-8-big-Data.db + +sstablemetadata /var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-8-big-Data.db | grep -i level +SSTable Level: 0 +.... diff --git a/doc/modules/cassandra/pages/tools/sstable/sstableloader.adoc b/doc/modules/cassandra/pages/tools/sstable/sstableloader.adoc new file mode 100644 index 000000000000..4234a0baa4fc --- /dev/null +++ b/doc/modules/cassandra/pages/tools/sstable/sstableloader.adoc @@ -0,0 +1,316 @@ += sstableloader + +Bulk-load the sstables found in the directory to the +configured cluster. The parent directories of are used as the +target keyspace/table name. For example, to load an sstable named +ma-1-big-Data.db into keyspace1/standard1, you will need to have the +files ma-1-big-Data.db and ma-1-big-Index.db in a directory +/path/to/keyspace1/standard1/. The tool will create new sstables, and +does not clean up your copied files. + +Several of the options listed below don't work quite as intended, and in +those cases, workarounds are mentioned for specific use cases. + +To avoid having the sstable files to be loaded compacted while reading +them, place the files in an alternate keyspace/table path than the data +directory. + +ref: https://issues.apache.org/jira/browse/CASSANDRA-1278 + +Cassandra must be stopped before this tool is executed, or unexpected +results will occur. Note: the script does not verify that Cassandra is +stopped. + +== Usage + +sstableloader + +[cols=",",] +|=== +|-d, --nodes |Required. Try to connect to these hosts +(comma-separated) initially for ring information + +|-u, --username |username for Cassandra authentication + +|-pw, --password |password for Cassandra authentication + +|-p, --port |port used for native connection +(default 9042) + +|-sp, --storage-port |port used for internode +communication (default 7000) + +|-ssp, --ssl-storage-port |port used for TLS +internode communication (default 7001) + +|--no-progress |don't display progress + +|-t, --throttle |throttle speed in Mbits (default unlimited) + +|-idct, --inter-dc-throttle |inter-datacenter +throttle speed in Mbits (default unlimited) + +|-cph, --connections-per-host |number of concurrent +connections-per-host + +|-i, --ignore |don't stream to this (comma separated) list of +nodes + +|-alg, --ssl-alg |Client SSL: algorithm (default: SunX509) + +|-ciphers, --ssl-ciphers |Client SSL: comma-separated +list of encryption suites to use + +|-ks, --keystore |Client SSL: full path to keystore + +|-kspw, --keystore-password |Client SSL: password of +the keystore + +|-st, --store-type |Client SSL: type of store + +|-ts, --truststore |Client SSL: full path to truststore + +|-tspw, --truststore-password |Client SSL: +password of the truststore + +|-prtcl, --ssl-protocol |Client SSL: connections protocol to +use (default: TLS) + +|-ap, --auth-provider |custom AuthProvider class name +for cassandra authentication + +|-f, --conf-path |cassandra.yaml file path for +streaming throughput and client/server SSL + +|-v, --verbose |verbose output + +|-h, --help |display this help message +|=== + +You can provide a cassandra.yaml file with the -f command line option to +set up streaming throughput, and client and server encryption options. +Only stream_throughput_outbound_megabits_per_sec, +server_encryption_options, and client_encryption_options are read from +yaml. You can override options read from cassandra.yaml with +corresponding command line options. + +== Load sstables from a Snapshot + +Copy the snapshot sstables into an accessible directory and use +sstableloader to restore them. + +Example: + +.... +cp snapshots/1535397029191/* /path/to/keyspace1/standard1/ + +sstableloader --nodes 172.17.0.2 /var/lib/cassandra/loadme/keyspace1/standard1-f8a4fa30aa2a11e8af27091830ac5256/ +Established connection to initial hosts +Opening sstables and calculating sections to stream +Streaming relevant part of /var/lib/cassandra/loadme/keyspace1/standard1-f8a4fa30aa2a11e8af27091830ac5256/ma-3-big-Data.db to [/172.17.0.2] +progress: [/172.17.0.2]0:1/1 100% total: 100% 0 MB/s(avg: 1 MB/s) +Summary statistics: + Connections per host: : 1 + Total files transferred: : 1 + Total bytes transferred: : 4700000 + Total duration (ms): : 4390 + Average transfer rate (MB/s): : 1 + Peak transfer rate (MB/s): : 1 +.... + +The -d or --nodes option is required, or the script will not run. + +Example: + +.... +sstableloader /var/lib/cassandra/loadme/keyspace1/standard1-f8a4fa30aa2a11e8af27091830ac5256/ +Initial hosts must be specified (-d) +.... + +== Use a Config File for SSL Clusters + +If SSL encryption is enabled in the cluster, use the --conf-path option +with sstableloader to point the tool to the cassandra.yaml with the +relevant server_encryption_options (e.g., truststore location, +algorithm). This will work better than passing individual ssl options +shown above to sstableloader on the command line. + +Example: + +.... +sstableloader --nodes 172.17.0.2 --conf-path /etc/cassandra/cassandra.yaml /var/lib/cassandra/loadme/keyspace1/standard1-0974e5a0aa5811e8a0a06d2c86545d91/snapshots/ +Established connection to initial hosts +Opening sstables and calculating sections to stream +Streaming relevant part of /var/lib/cassandra/loadme/keyspace1/standard1-0974e5a0aa5811e8a0a06d2c86545d91/mc-1-big-Data.db to [/172.17.0.2] +progress: [/172.17.0.2]0:0/1 1 % total: 1% 9.165KiB/s (avg: 9.165KiB/s) +progress: [/172.17.0.2]0:0/1 2 % total: 2% 5.147MiB/s (avg: 18.299KiB/s) +progress: [/172.17.0.2]0:0/1 4 % total: 4% 9.751MiB/s (avg: 27.423KiB/s) +progress: [/172.17.0.2]0:0/1 5 % total: 5% 8.203MiB/s (avg: 36.524KiB/s) +... +progress: [/172.17.0.2]0:1/1 100% total: 100% 0.000KiB/s (avg: 480.513KiB/s) + +Summary statistics: + Connections per host : 1 + Total files transferred : 1 + Total bytes transferred : 4.387MiB + Total duration : 9356 ms + Average transfer rate : 480.105KiB/s + Peak transfer rate : 586.410KiB/s +.... + +== Hide Progress Output + +To hide the output of progress and the summary statistics (e.g., if you +wanted to use this tool in a script), use the --no-progress option. + +Example: + +.... +sstableloader --nodes 172.17.0.2 --no-progress /var/lib/cassandra/loadme/keyspace1/standard1-f8a4fa30aa2a11e8af27091830ac5256/ +Established connection to initial hosts +Opening sstables and calculating sections to stream +Streaming relevant part of /var/lib/cassandra/loadme/keyspace1/standard1-f8a4fa30aa2a11e8af27091830ac5256/ma-4-big-Data.db to [/172.17.0.2] +.... + +== Get More Detail + +Using the --verbose option will provide much more progress output. + +Example: + +.... +sstableloader --nodes 172.17.0.2 --verbose /var/lib/cassandra/loadme/keyspace1/standard1-0974e5a0aa5811e8a0a06d2c86545d91/ +Established connection to initial hosts +Opening sstables and calculating sections to stream +Streaming relevant part of /var/lib/cassandra/loadme/keyspace1/standard1-0974e5a0aa5811e8a0a06d2c86545d91/mc-1-big-Data.db to [/172.17.0.2] +progress: [/172.17.0.2]0:0/1 1 % total: 1% 12.056KiB/s (avg: 12.056KiB/s) +progress: [/172.17.0.2]0:0/1 2 % total: 2% 9.092MiB/s (avg: 24.081KiB/s) +progress: [/172.17.0.2]0:0/1 4 % total: 4% 18.832MiB/s (avg: 36.099KiB/s) +progress: [/172.17.0.2]0:0/1 5 % total: 5% 2.253MiB/s (avg: 47.882KiB/s) +progress: [/172.17.0.2]0:0/1 7 % total: 7% 6.388MiB/s (avg: 59.743KiB/s) +progress: [/172.17.0.2]0:0/1 8 % total: 8% 14.606MiB/s (avg: 71.635KiB/s) +progress: [/172.17.0.2]0:0/1 9 % total: 9% 8.880MiB/s (avg: 83.465KiB/s) +progress: [/172.17.0.2]0:0/1 11 % total: 11% 5.217MiB/s (avg: 95.176KiB/s) +progress: [/172.17.0.2]0:0/1 12 % total: 12% 12.563MiB/s (avg: 106.975KiB/s) +progress: [/172.17.0.2]0:0/1 14 % total: 14% 2.550MiB/s (avg: 118.322KiB/s) +progress: [/172.17.0.2]0:0/1 15 % total: 15% 16.638MiB/s (avg: 130.063KiB/s) +progress: [/172.17.0.2]0:0/1 17 % total: 17% 17.270MiB/s (avg: 141.793KiB/s) +progress: [/172.17.0.2]0:0/1 18 % total: 18% 11.280MiB/s (avg: 153.452KiB/s) +progress: [/172.17.0.2]0:0/1 19 % total: 19% 2.903MiB/s (avg: 164.603KiB/s) +progress: [/172.17.0.2]0:0/1 21 % total: 21% 6.744MiB/s (avg: 176.061KiB/s) +progress: [/172.17.0.2]0:0/1 22 % total: 22% 6.011MiB/s (avg: 187.440KiB/s) +progress: [/172.17.0.2]0:0/1 24 % total: 24% 9.690MiB/s (avg: 198.920KiB/s) +progress: [/172.17.0.2]0:0/1 25 % total: 25% 11.481MiB/s (avg: 210.412KiB/s) +progress: [/172.17.0.2]0:0/1 27 % total: 27% 9.957MiB/s (avg: 221.848KiB/s) +progress: [/172.17.0.2]0:0/1 28 % total: 28% 10.270MiB/s (avg: 233.265KiB/s) +progress: [/172.17.0.2]0:0/1 29 % total: 29% 7.812MiB/s (avg: 244.571KiB/s) +progress: [/172.17.0.2]0:0/1 31 % total: 31% 14.843MiB/s (avg: 256.021KiB/s) +progress: [/172.17.0.2]0:0/1 32 % total: 32% 11.457MiB/s (avg: 267.394KiB/s) +progress: [/172.17.0.2]0:0/1 34 % total: 34% 6.550MiB/s (avg: 278.536KiB/s) +progress: [/172.17.0.2]0:0/1 35 % total: 35% 9.115MiB/s (avg: 289.782KiB/s) +progress: [/172.17.0.2]0:0/1 37 % total: 37% 11.054MiB/s (avg: 301.064KiB/s) +progress: [/172.17.0.2]0:0/1 38 % total: 38% 10.449MiB/s (avg: 312.307KiB/s) +progress: [/172.17.0.2]0:0/1 39 % total: 39% 1.646MiB/s (avg: 321.665KiB/s) +progress: [/172.17.0.2]0:0/1 41 % total: 41% 13.300MiB/s (avg: 332.872KiB/s) +progress: [/172.17.0.2]0:0/1 42 % total: 42% 14.370MiB/s (avg: 344.082KiB/s) +progress: [/172.17.0.2]0:0/1 44 % total: 44% 16.734MiB/s (avg: 355.314KiB/s) +progress: [/172.17.0.2]0:0/1 45 % total: 45% 22.245MiB/s (avg: 366.592KiB/s) +progress: [/172.17.0.2]0:0/1 47 % total: 47% 25.561MiB/s (avg: 377.882KiB/s) +progress: [/172.17.0.2]0:0/1 48 % total: 48% 24.543MiB/s (avg: 389.155KiB/s) +progress: [/172.17.0.2]0:0/1 49 % total: 49% 4.894MiB/s (avg: 399.688KiB/s) +progress: [/172.17.0.2]0:0/1 51 % total: 51% 8.331MiB/s (avg: 410.559KiB/s) +progress: [/172.17.0.2]0:0/1 52 % total: 52% 5.771MiB/s (avg: 421.150KiB/s) +progress: [/172.17.0.2]0:0/1 54 % total: 54% 8.738MiB/s (avg: 431.983KiB/s) +progress: [/172.17.0.2]0:0/1 55 % total: 55% 3.406MiB/s (avg: 441.911KiB/s) +progress: [/172.17.0.2]0:0/1 56 % total: 56% 9.791MiB/s (avg: 452.730KiB/s) +progress: [/172.17.0.2]0:0/1 58 % total: 58% 3.401MiB/s (avg: 462.545KiB/s) +progress: [/172.17.0.2]0:0/1 59 % total: 59% 5.280MiB/s (avg: 472.840KiB/s) +progress: [/172.17.0.2]0:0/1 61 % total: 61% 12.232MiB/s (avg: 483.663KiB/s) +progress: [/172.17.0.2]0:0/1 62 % total: 62% 9.258MiB/s (avg: 494.325KiB/s) +progress: [/172.17.0.2]0:0/1 64 % total: 64% 2.877MiB/s (avg: 503.640KiB/s) +progress: [/172.17.0.2]0:0/1 65 % total: 65% 7.461MiB/s (avg: 514.078KiB/s) +progress: [/172.17.0.2]0:0/1 66 % total: 66% 24.247MiB/s (avg: 525.018KiB/s) +progress: [/172.17.0.2]0:0/1 68 % total: 68% 9.348MiB/s (avg: 535.563KiB/s) +progress: [/172.17.0.2]0:0/1 69 % total: 69% 5.130MiB/s (avg: 545.563KiB/s) +progress: [/172.17.0.2]0:0/1 71 % total: 71% 19.861MiB/s (avg: 556.392KiB/s) +progress: [/172.17.0.2]0:0/1 72 % total: 72% 15.501MiB/s (avg: 567.122KiB/s) +progress: [/172.17.0.2]0:0/1 74 % total: 74% 5.031MiB/s (avg: 576.996KiB/s) +progress: [/172.17.0.2]0:0/1 75 % total: 75% 22.771MiB/s (avg: 587.813KiB/s) +progress: [/172.17.0.2]0:0/1 76 % total: 76% 22.780MiB/s (avg: 598.619KiB/s) +progress: [/172.17.0.2]0:0/1 78 % total: 78% 20.684MiB/s (avg: 609.386KiB/s) +progress: [/172.17.0.2]0:0/1 79 % total: 79% 22.920MiB/s (avg: 620.173KiB/s) +progress: [/172.17.0.2]0:0/1 81 % total: 81% 7.458MiB/s (avg: 630.333KiB/s) +progress: [/172.17.0.2]0:0/1 82 % total: 82% 22.993MiB/s (avg: 641.090KiB/s) +progress: [/172.17.0.2]0:0/1 84 % total: 84% 21.392MiB/s (avg: 651.814KiB/s) +progress: [/172.17.0.2]0:0/1 85 % total: 85% 7.732MiB/s (avg: 661.938KiB/s) +progress: [/172.17.0.2]0:0/1 86 % total: 86% 3.476MiB/s (avg: 670.892KiB/s) +progress: [/172.17.0.2]0:0/1 88 % total: 88% 19.889MiB/s (avg: 681.521KiB/s) +progress: [/172.17.0.2]0:0/1 89 % total: 89% 21.077MiB/s (avg: 692.162KiB/s) +progress: [/172.17.0.2]0:0/1 91 % total: 91% 24.062MiB/s (avg: 702.835KiB/s) +progress: [/172.17.0.2]0:0/1 92 % total: 92% 19.798MiB/s (avg: 713.431KiB/s) +progress: [/172.17.0.2]0:0/1 94 % total: 94% 17.591MiB/s (avg: 723.965KiB/s) +progress: [/172.17.0.2]0:0/1 95 % total: 95% 13.725MiB/s (avg: 734.361KiB/s) +progress: [/172.17.0.2]0:0/1 96 % total: 96% 16.737MiB/s (avg: 744.846KiB/s) +progress: [/172.17.0.2]0:0/1 98 % total: 98% 22.701MiB/s (avg: 755.443KiB/s) +progress: [/172.17.0.2]0:0/1 99 % total: 99% 18.718MiB/s (avg: 765.954KiB/s) +progress: [/172.17.0.2]0:1/1 100% total: 100% 6.613MiB/s (avg: 767.802KiB/s) +progress: [/172.17.0.2]0:1/1 100% total: 100% 0.000KiB/s (avg: 670.295KiB/s) + +Summary statistics: + Connections per host : 1 + Total files transferred : 1 + Total bytes transferred : 4.387MiB + Total duration : 6706 ms + Average transfer rate : 669.835KiB/s + Peak transfer rate : 767.802KiB/s +.... + +== Throttling Load + +To prevent the table loader from overloading the system resources, you +can throttle the process with the --throttle option. The default is +unlimited (no throttling). Throttle units are in megabits. Note that the +total duration is increased in the example below. + +Example: + +.... +sstableloader --nodes 172.17.0.2 --throttle 1 /var/lib/cassandra/loadme/keyspace1/standard1-f8a4fa30aa2a11e8af27091830ac5256/ +Established connection to initial hosts +Opening sstables and calculating sections to stream +Streaming relevant part of /var/lib/cassandra/loadme/keyspace1/standard1-f8a4fa30aa2a11e8af27091830ac5256/ma-6-big-Data.db to [/172.17.0.2] +progress: [/172.17.0.2]0:1/1 100% total: 100% 0 MB/s(avg: 0 MB/s) +Summary statistics: + Connections per host: : 1 + Total files transferred: : 1 + Total bytes transferred: : 4595705 + Total duration (ms): : 37634 + Average transfer rate (MB/s): : 0 + Peak transfer rate (MB/s): : 0 +.... + +== Speeding up Load + +To speed up the load process, the number of connections per host can be +increased. + +Example: + +.... +sstableloader --nodes 172.17.0.2 --connections-per-host 100 /var/lib/cassandra/loadme/keyspace1/standard1-f8a4fa30aa2a11e8af27091830ac5256/ +Established connection to initial hosts +Opening sstables and calculating sections to stream +Streaming relevant part of /var/lib/cassandra/loadme/keyspace1/standard1-f8a4fa30aa2a11e8af27091830ac5256/ma-9-big-Data.db to [/172.17.0.2] +progress: [/172.17.0.2]0:1/1 100% total: 100% 0 MB/s(avg: 1 MB/s) +Summary statistics: + Connections per host: : 100 + Total files transferred: : 1 + Total bytes transferred: : 4595705 + Total duration (ms): : 3486 + Average transfer rate (MB/s): : 1 + Peak transfer rate (MB/s): : 1 +.... + +This small data set doesn't benefit much from the increase in +connections per host, but note that the total duration has decreased in +this example. diff --git a/doc/modules/cassandra/pages/tools/sstable/sstablemetadata.adoc b/doc/modules/cassandra/pages/tools/sstable/sstablemetadata.adoc new file mode 100644 index 000000000000..0516bef3e732 --- /dev/null +++ b/doc/modules/cassandra/pages/tools/sstable/sstablemetadata.adoc @@ -0,0 +1,320 @@ += sstablemetadata + +Print information about an sstable from the related Statistics.db and +Summary.db files to standard output. + +ref: https://issues.apache.org/jira/browse/CASSANDRA-7159 and +https://issues.apache.org/jira/browse/CASSANDRA-10838 + +Cassandra must be stopped before this tool is executed, or unexpected +results will occur. Note: the script does not verify that Cassandra is +stopped. + +== Usage + +sstablemetadata + +[cols=",",] +|=== +|--gc_grace_seconds |The gc_grace_seconds to use when calculating +droppable tombstones +|=== + +== Print all the metadata + +Run sstablemetadata against the __Data.db file(s) related to a table. If +necessary, find the__Data.db file(s) using sstableutil. + +Example: + +.... +sstableutil keyspace1 standard1 | grep Data +/var/lib/cassandra/data/keyspace1/standard1-f6845640a6cb11e8b6836d2c86545d91/mc-1-big-Data.db + +sstablemetadata /var/lib/cassandra/data/keyspace1/standard1-f6845640a6cb11e8b6836d2c86545d91/mc-1-big-Data.db + +SSTable: /var/lib/cassandra/data/keyspace1/standard1-f6845640a6cb11e8b6836d2c86545d91/mc-1-big +Partitioner: org.apache.cassandra.dht.Murmur3Partitioner +Bloom Filter FP chance: 0.010000 +Minimum timestamp: 1535025576141000 +Maximum timestamp: 1535025604309000 +SSTable min local deletion time: 2147483647 +SSTable max local deletion time: 2147483647 +Compressor: org.apache.cassandra.io.compress.LZ4Compressor +TTL min: 86400 +TTL max: 86400 +First token: -9223004712949498654 (key=39373333373831303130) +Last token: 9222554117157811897 (key=4f3438394e39374d3730) +Estimated droppable tombstones: 0.9188263888888889 +SSTable Level: 0 +Repaired at: 0 +Replay positions covered: {CommitLogPosition(segmentId=1535025390651, position=226400)=CommitLogPosition(segmentId=1535025390651, position=6849139)} +totalColumnsSet: 100000 +totalRows: 20000 +Estimated tombstone drop times: +1535039100: 80390 +1535039160: 5645 +1535039220: 13965 +Count Row Size Cell Count +1 0 0 +2 0 0 +3 0 0 +4 0 0 +5 0 20000 +6 0 0 +7 0 0 +8 0 0 +10 0 0 +12 0 0 +14 0 0 +17 0 0 +20 0 0 +24 0 0 +29 0 0 +35 0 0 +42 0 0 +50 0 0 +60 0 0 +72 0 0 +86 0 0 +103 0 0 +124 0 0 +149 0 0 +179 0 0 +215 0 0 +258 20000 0 +310 0 0 +372 0 0 +446 0 0 +535 0 0 +642 0 0 +770 0 0 +924 0 0 +1109 0 0 +1331 0 0 +1597 0 0 +1916 0 0 +2299 0 0 +2759 0 0 +3311 0 0 +3973 0 0 +4768 0 0 +5722 0 0 +6866 0 0 +8239 0 0 +9887 0 0 +11864 0 0 +14237 0 0 +17084 0 0 +20501 0 0 +24601 0 0 +29521 0 0 +35425 0 0 +42510 0 0 +51012 0 0 +61214 0 0 +73457 0 0 +88148 0 0 +105778 0 0 +126934 0 0 +152321 0 0 +182785 0 0 +219342 0 0 +263210 0 0 +315852 0 0 +379022 0 0 +454826 0 0 +545791 0 0 +654949 0 0 +785939 0 0 +943127 0 0 +1131752 0 0 +1358102 0 0 +1629722 0 0 +1955666 0 0 +2346799 0 0 +2816159 0 0 +3379391 0 0 +4055269 0 0 +4866323 0 0 +5839588 0 0 +7007506 0 0 +8409007 0 0 +10090808 0 0 +12108970 0 0 +14530764 0 0 +17436917 0 0 +20924300 0 0 +25109160 0 0 +30130992 0 0 +36157190 0 0 +43388628 0 0 +52066354 0 0 +62479625 0 0 +74975550 0 0 +89970660 0 0 +107964792 0 0 +129557750 0 0 +155469300 0 0 +186563160 0 0 +223875792 0 0 +268650950 0 0 +322381140 0 0 +386857368 0 0 +464228842 0 0 +557074610 0 0 +668489532 0 0 +802187438 0 0 +962624926 0 0 +1155149911 0 0 +1386179893 0 0 +1663415872 0 0 +1996099046 0 0 +2395318855 0 0 +2874382626 0 +3449259151 0 +4139110981 0 +4966933177 0 +5960319812 0 +7152383774 0 +8582860529 0 +10299432635 0 +12359319162 0 +14831182994 0 +17797419593 0 +21356903512 0 +25628284214 0 +30753941057 0 +36904729268 0 +44285675122 0 +53142810146 0 +63771372175 0 +76525646610 0 +91830775932 0 +110196931118 0 +132236317342 0 +158683580810 0 +190420296972 0 +228504356366 0 +274205227639 0 +329046273167 0 +394855527800 0 +473826633360 0 +568591960032 0 +682310352038 0 +818772422446 0 +982526906935 0 +1179032288322 0 +1414838745986 0 +Estimated cardinality: 20196 +EncodingStats minTTL: 0 +EncodingStats minLocalDeletionTime: 1442880000 +EncodingStats minTimestamp: 1535025565275000 +KeyType: org.apache.cassandra.db.marshal.BytesType +ClusteringTypes: [org.apache.cassandra.db.marshal.UTF8Type] +StaticColumns: {C3:org.apache.cassandra.db.marshal.BytesType, C4:org.apache.cassandra.db.marshal.BytesType, C0:org.apache.cassandra.db.marshal.BytesType, C1:org.apache.cassandra.db.marshal.BytesType, C2:org.apache.cassandra.db.marshal.BytesType} +RegularColumns: {} +.... + +== Specify gc grace seconds + +To see the ratio of droppable tombstones given a configured gc grace +seconds, use the gc_grace_seconds option. Because the sstablemetadata +tool doesn't access the schema directly, this is a way to more +accurately estimate droppable tombstones -- for example, if you pass in +gc_grace_seconds matching what is configured in the schema. The +gc_grace_seconds value provided is subtracted from the curent machine +time (in seconds). + +ref: https://issues.apache.org/jira/browse/CASSANDRA-12208 + +Example: + +.... +sstablemetadata /var/lib/cassandra/data/keyspace1/standard1-41b52700b4ed11e896476d2c86545d91/mc-12-big-Data.db | grep "Estimated tombstone drop times" -A4 +Estimated tombstone drop times: +1536599100: 1 +1536599640: 1 +1536599700: 2 + +echo $(date +%s) +1536602005 + +# if gc_grace_seconds was configured at 100, all of the tombstones would be currently droppable +sstablemetadata --gc_grace_seconds 100 /var/lib/cassandra/data/keyspace1/standard1-41b52700b4ed11e896476d2c86545d91/mc-12-big-Data.db | grep "Estimated droppable tombstones" +Estimated droppable tombstones: 4.0E-5 + +# if gc_grace_seconds was configured at 4700, some of the tombstones would be currently droppable +sstablemetadata --gc_grace_seconds 4700 /var/lib/cassandra/data/keyspace1/standard1-41b52700b4ed11e896476d2c86545d91/mc-12-big-Data.db | grep "Estimated droppable tombstones" +Estimated droppable tombstones: 9.61111111111111E-6 + +# if gc_grace_seconds was configured at 100, none of the tombstones would be currently droppable +sstablemetadata --gc_grace_seconds 5000 /var/lib/cassandra/data/keyspace1/standard1-41b52700b4ed11e896476d2c86545d91/mc-12-big-Data.db | grep "Estimated droppable tombstones" +Estimated droppable tombstones: 0.0 +.... + +== Explanation of each value printed above + +|=== +|Value |Explanation + + +|SSTable |prefix of the sstable filenames related to this sstable +|Partitioner |partitioner type used to distribute data across nodes; +defined in cassandra.yaml +|Bloom Filter FP |precision of Bloom filter used +in reads; defined in the table definition +|Minimum timestamp |minimum +timestamp of any entry in this sstable, in epoch microseconds +|Maximum +timestamp |maximum timestamp of any entry in this sstable, in epoch +microseconds +|SSTable min local deletion time |minimum timestamp of +deletion date, based on TTL, in epoch seconds +|SSTable max local deletion +time |maximum timestamp of deletion date, based on TTL, in epoch seconds +|Compressor |blank (-) by default; if not blank, indicates type of +compression enabled on the table +|TTL min |time-to-live in seconds; +default 0 unless defined in the table definition +|TTL max |time-to-live in +seconds; default 0 unless defined in the table definition +|First token |lowest token and related key found in the sstable summary +|Last token |highest token and related key found in the sstable summary +|Estimated +droppable tombstones |ratio of tombstones to columns, using configured gc +grace seconds if relevant +|SSTable level |compaction level of this +sstable, if leveled compaction (LCS) is used +|Repaired at |the timestamp +this sstable was marked as repaired via sstablerepairedset, in epoch +milliseconds +|Replay positions covered |the interval of time and commitlog +positions related to this sstable +|totalColumnsSet |number of cells in the +table +|totalRows |number of rows in the table +|Estimated tombstone drop +times |approximate number of rows that will expire, ordered by epoch +seconds +|Count Row Size Cell Count |two histograms in two columns; one +represents distribution of Row Size and the other represents +distribution of Cell Count +|Estimated cardinality an estimate of unique +values, used for compaction +|EncodingStats* minTTL |in epoch milliseconds +|EncodingStats* minLocalDeletionTime |in epoch seconds +|EncodingStats* +minTimestamp |in epoch microseconds +|KeyType |the type of partition key, +useful in reading and writing data from/to storage; defined in the table +definition +|ClusteringTypes |the type of clustering key, useful in reading +and writing data from/to storage; defined in the table definition +|StaticColumns |a list of the shared columns in the table +|RegularColumns |a +list of non-static, non-key columns in the table +|=== + +`*` For the encoding stats values, the delta of this and the current epoch +time is used when encoding and storing data in the most optimal way. diff --git a/doc/modules/cassandra/pages/tools/sstable/sstableofflinerelevel.adoc b/doc/modules/cassandra/pages/tools/sstable/sstableofflinerelevel.adoc new file mode 100644 index 000000000000..71bffbf2335b --- /dev/null +++ b/doc/modules/cassandra/pages/tools/sstable/sstableofflinerelevel.adoc @@ -0,0 +1,94 @@ += sstableofflinerelevel + +When using LeveledCompactionStrategy, sstables can get stuck at L0 on a +recently bootstrapped node, and compactions may never catch up. This +tool is used to bump sstables into the highest level possible. + +ref: https://issues.apache.org/jira/browse/CASSANDRA-8301 + +The way this is done is: sstables are storted by their last token. Given +an original leveling like this (note that [ ] indicates token +boundaries, not sstable size on disk; all sstables are the same size): + +.... +L3 [][][][][][][][][][][] +L2 [ ][ ][ ][ ] +L1 [ ][ ] +L0 [ ] +.... + +Will look like this after being dropped to L0 and sorted by last token +(and, to illustrate overlap, the overlapping ones are put on a new +line): + +.... +[][][] +[ ][][][] + [ ] +[ ] +... +.... + +Then, we start iterating from the smallest last-token and adding all +sstables that do not cause an overlap to a level. We will reconstruct +the original leveling top-down. Whenever we add an sstable to the level, +we remove it from the sorted list. Once we reach the end of the sorted +list, we have a full level, and can start over with the level below. + +If we end up with more levels than expected, we put all levels exceeding +the expected in L0, for example, original L0 files will most likely be +put in a level of its own since they most often overlap many other +sstables. + +Cassandra must be stopped before this tool is executed, or unexpected +results will occur. Note: the script does not verify that Cassandra is +stopped. + +== Usage + +sstableofflinerelevel [--dry-run]
+ +== Doing a dry run + +Use the --dry-run option to see the current level distribution and +predicted level after the change. + +Example: + +.... +sstableofflinerelevel --dry-run keyspace eventlog +For sstables in /var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753: +Current leveling: +L0=2 +Potential leveling: +L0=1 +L1=1 +.... + +== Running a relevel + +Example: + +.... +sstableofflinerelevel keyspace eventlog +For sstables in /var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753: +Current leveling: +L0=2 +New leveling: +L0=1 +L1=1 +.... + +== Keyspace or table not found + +If an invalid keyspace and/or table is provided, an exception will be +thrown. + +Example: + +.... +sstableofflinerelevel --dry-run keyspace evenlog + +Exception in thread "main" java.lang.IllegalArgumentException: Unknown keyspace/columnFamily keyspace1.evenlog + at org.apache.cassandra.tools.SSTableOfflineRelevel.main(SSTableOfflineRelevel.java:96) +.... diff --git a/doc/modules/cassandra/pages/tools/sstable/sstablerepairedset.adoc b/doc/modules/cassandra/pages/tools/sstable/sstablerepairedset.adoc new file mode 100644 index 000000000000..e18859b8bc88 --- /dev/null +++ b/doc/modules/cassandra/pages/tools/sstable/sstablerepairedset.adoc @@ -0,0 +1,83 @@ += sstablerepairedset + +Repairs can take a very long time in some environments, for large sizes +of data. Use this tool to set the repairedAt status on a given set of +sstables, so that repairs can be run on only un-repaired sstables if +desired. + +Note that running a repair (e.g., via nodetool repair) doesn't set the +status of this metadata. Only setting the status of this metadata via +this tool does. + +ref: https://issues.apache.org/jira/browse/CASSANDRA-5351 + +Cassandra must be stopped before this tool is executed, or unexpected +results will occur. Note: the script does not verify that Cassandra is +stopped. + +== Usage + +sstablerepairedset --really-set [-f | +] + +[cols=",",] +|=== +|--really-set |required if you want to really set the status +|--is-repaired |set the repairedAt status to the last modified time +|--is-unrepaired |set the repairedAt status to 0 +|-f |use a file containing a list of sstables as the input +|=== + +== Set a lot of sstables to unrepaired status + +There are many ways to do this programmatically. This way would likely +include variables for the keyspace and table. + +Example: + +.... +find /var/lib/cassandra/data/keyspace1/standard1-d936bd20a17c11e8bc92a55ed562cd82/* -name "*Data.db" -print0 | xargs -0 -I % sstablerepairedset --really-set --is-unrepaired % +.... + +== Set one to many sstables to repaired status + +Set the repairedAt status after a repair to mark the sstables as +repaired. Again, using variables for the keyspace and table names is a +good choice. + +Example: + +.... +nodetool repair keyspace1 standard1 +find /var/lib/cassandra/data/keyspace1/standard1-d936bd20a17c11e8bc92a55ed562cd82/* -name "*Data.db" -print0 | xargs -0 -I % sstablerepairedset --really-set --is-repaired % +.... + +== Print metadata showing repaired status + +sstablemetadata can be used to view the status set or unset using this +command. + +Example: + +____ +sstablerepairedset --really-set --is-repaired +/var/lib/cassandra/data/keyspace1/standard1-d936bd20a17c11e8bc92a55ed562cd82/mc-1-big-Data.db +sstablemetadata +/var/lib/cassandra/data/keyspace1/standard1-d936bd20a17c11e8bc92a55ed562cd82/mc-1-big-Data.db +| grep "Repaired at" Repaired at: 1534443974000 + +sstablerepairedset --really-set --is-unrepaired +/var/lib/cassandra/data/keyspace1/standard1-d936bd20a17c11e8bc92a55ed562cd82/mc-1-big-Data.db +sstablemetadata +/var/lib/cassandra/data/keyspace1/standard1-d936bd20a17c11e8bc92a55ed562cd82/mc-1-big-Data.db +| grep "Repaired at" Repaired at: 0 +____ + +== Using command in a script + +If you know you ran repair 2 weeks ago, you can do something like the +following: + +.... +sstablerepairset --is-repaired -f <(find /var/lib/cassandra/data/.../ -iname "*Data.db*" -mtime +14) +.... diff --git a/doc/modules/cassandra/pages/tools/sstable/sstablescrub.adoc b/doc/modules/cassandra/pages/tools/sstable/sstablescrub.adoc new file mode 100644 index 000000000000..1826e9ea8f7d --- /dev/null +++ b/doc/modules/cassandra/pages/tools/sstable/sstablescrub.adoc @@ -0,0 +1,102 @@ += sstablescrub + +Fix a broken sstable. The scrub process rewrites the sstable, skipping +any corrupted rows. Because these rows are lost, follow this process +with a repair. + +ref: https://issues.apache.org/jira/browse/CASSANDRA-4321 + +Cassandra must be stopped before this tool is executed, or unexpected +results will occur. Note: the script does not verify that Cassandra is +stopped. + +== Usage + +sstablescrub
+ +[cols=",",] +|=== +|--debug |display stack traces + +|-h,--help |display this help message + +|-m,--manifest-check |only check and repair the leveled manifest, +without actually scrubbing the sstables + +|-n,--no-validate |do not validate columns using column validator + +|-r,--reinsert-overflowed-ttl |Rewrites rows with overflowed expiration +date affected by CASSANDRA-14092 with the maximum supported expiration +date of 2038-01-19T03:14:06+00:00. The rows are rewritten with the +original timestamp incremented by one millisecond to override/supersede +any potential tombstone that may have been generated during compaction +of the affected rows. + +|-s,--skip-corrupted |skip corrupt rows in counter tables + +|-v,--verbose |verbose output +|=== + +== Basic Scrub + +The scrub without options will do a snapshot first, then write all +non-corrupted files to a new sstable. + +Example: + +.... +sstablescrub keyspace1 standard1 +Pre-scrub sstables snapshotted into snapshot pre-scrub-1534424070883 +Scrubbing BigTableReader(path='/var/lib/cassandra/data/keyspace1/standard1-6365332094dd11e88f324f9c503e4753/mc-5-big-Data.db') (17.142MiB) +Scrub of BigTableReader(path='/var/lib/cassandra/data/keyspace1/standard1-6365332094dd11e88f324f9c503e4753/mc-5-big-Data.db') complete: 73367 rows in new sstable and 0 empty (tombstoned) rows dropped +Checking leveled manifest +.... + +== Scrub without Validation + +ref: https://issues.apache.org/jira/browse/CASSANDRA-9406 + +Use the --no-validate option to retain data that may be misrepresented +(e.g., an integer stored in a long field) but not corrupt. This data +usually doesn not present any errors to the client. + +Example: + +.... +sstablescrub --no-validate keyspace1 standard1 +Pre-scrub sstables snapshotted into snapshot pre-scrub-1536243158517 +Scrubbing BigTableReader(path='/var/lib/cassandra/data/keyspace1/standard1-bc9cf530b1da11e886c66d2c86545d91/mc-2-big-Data.db') (4.482MiB) +Scrub of BigTableReader(path='/var/lib/cassandra/data/keyspace1/standard1-bc9cf530b1da11e886c66d2c86545d91/mc-2-big-Data.db') complete; looks like all 0 rows were tombstoned +.... + +== Skip Corrupted Counter Tables + +ref: https://issues.apache.org/jira/browse/CASSANDRA-5930 + +If counter tables are corrupted in a way that prevents sstablescrub from +completing, you can use the --skip-corrupted option to skip scrubbing +those counter tables. This workaround is not necessary in versions 2.0+. + +Example: + +.... +sstablescrub --skip-corrupted keyspace1 counter1 +.... + +== Dealing with Overflow Dates + +ref: https://issues.apache.org/jira/browse/CASSANDRA-14092 + +Using the option --reinsert-overflowed-ttl allows a rewriting of rows +that had a max TTL going over the maximum (causing an overflow). + +Example: + +.... +sstablescrub --reinsert-overflowed-ttl keyspace1 counter1 +.... + +== Manifest Check + +As of Cassandra version 2.0, this option is no longer relevant, since +level data was moved from a separate manifest into the sstable metadata. diff --git a/doc/modules/cassandra/pages/tools/sstable/sstablesplit.adoc b/doc/modules/cassandra/pages/tools/sstable/sstablesplit.adoc new file mode 100644 index 000000000000..f62b86896f48 --- /dev/null +++ b/doc/modules/cassandra/pages/tools/sstable/sstablesplit.adoc @@ -0,0 +1,96 @@ += sstablesplit + +Big sstable files can take up a lot of disk space. The sstablesplit tool +can be used to split those large files into smaller files. It can be +thought of as a type of anticompaction. + +ref: https://issues.apache.org/jira/browse/CASSANDRA-4766 + +Cassandra must be stopped before this tool is executed, or unexpected +results will occur. Note: the script does not verify that Cassandra is +stopped. + +== Usage + +sstablesplit + +[cols=",",] +|=== +|--debug |display stack traces + +|-h, --help |display this help message + +|--no-snapshot |don't snapshot the sstables before splitting + +|-s, --size |maximum size in MB for the output sstables (default: +50) +|=== + +This command should be run with Cassandra stopped. Note: the script does +not verify that Cassandra is stopped. + +== Split a File + +Split a large sstable into smaller sstables. By default, unless the +option --no-snapshot is added, a snapshot will be done of the original +sstable and placed in the snapshots folder. + +Example: + +.... +sstablesplit /var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-8-big-Data.db + +Pre-split sstables snapshotted into snapshot pre-split-1533144514795 +.... + +== Split Multiple Files + +Wildcards can be used in the filename portion of the command to split +multiple files. + +Example: + +.... +sstablesplit --size 1 /var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-1* +.... + +== Attempt to Split a Small File + +If the file is already smaller than the split size provided, the sstable +will not be split. + +Example: + +.... +sstablesplit /var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-8-big-Data.db +Skipping /var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-8-big-Data.db: it's size (1.442 MB) is less than the split size (50 MB) +No sstables needed splitting. +.... + +== Split a File into Specified Size + +The default size used for splitting is 50MB. Specify another size with +the --size option. The size is in megabytes (MB). Specify only the +number, not the units. For example --size 50 is correct, but --size 50MB +is not. + +Example: + +.... +sstablesplit --size 1 /var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-9-big-Data.db +Pre-split sstables snapshotted into snapshot pre-split-1533144996008 +.... + +== Split Without Snapshot + +By default, sstablesplit will create a snapshot before splitting. If a +snapshot is not needed, use the --no-snapshot option to skip it. + +Example: + +.... +sstablesplit --size 1 --no-snapshot /var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-11-big-Data.db +.... + +Note: There is no output, but you can see the results in your file +system. diff --git a/doc/modules/cassandra/pages/tools/sstable/sstableupgrade.adoc b/doc/modules/cassandra/pages/tools/sstable/sstableupgrade.adoc new file mode 100644 index 000000000000..ad193e2f5a56 --- /dev/null +++ b/doc/modules/cassandra/pages/tools/sstable/sstableupgrade.adoc @@ -0,0 +1,136 @@ += sstableupgrade + +Upgrade the sstables in the given table (or snapshot) to the current +version of Cassandra. This process is typically done after a Cassandra +version upgrade. This operation will rewrite the sstables in the +specified table to match the currently installed version of Cassandra. +The sstableupgrade command can also be used to downgrade sstables to a +previous version. + +The snapshot option will only upgrade the specified snapshot. Upgrading +snapshots is required before attempting to restore a snapshot taken in a +major version older than the major version Cassandra is currently +running. This will replace the files in the given snapshot as well as +break any hard links to live sstables. + +Cassandra must be stopped before this tool is executed, or unexpected +results will occur. Note: the script does not verify that Cassandra is +stopped. + +== Usage + +sstableupgrade
[snapshot_name] + +[cols=",",] +|=== +|--debug |display stack traces +|-h,--help |display this help message +|-k,--keep-source |do not delete the source sstables +|=== + +== Rewrite tables to the current Cassandra version + +Start with a set of sstables in one version of Cassandra: + +.... +ls -al /tmp/cassandra/data/keyspace1/standard1-9695b790a63211e8a6fb091830ac5256/ +... +-rw-r--r-- 1 user wheel 348 Aug 22 13:45 keyspace1-standard1-ka-1-CRC.db +-rw-r--r-- 1 user wheel 5620000 Aug 22 13:45 keyspace1-standard1-ka-1-Data.db +-rw-r--r-- 1 user wheel 10 Aug 22 13:45 keyspace1-standard1-ka-1-Digest.sha1 +-rw-r--r-- 1 user wheel 25016 Aug 22 13:45 keyspace1-standard1-ka-1-Filter.db +-rw-r--r-- 1 user wheel 480000 Aug 22 13:45 keyspace1-standard1-ka-1-Index.db +-rw-r--r-- 1 user wheel 9895 Aug 22 13:45 keyspace1-standard1-ka-1-Statistics.db +-rw-r--r-- 1 user wheel 3562 Aug 22 13:45 keyspace1-standard1-ka-1-Summary.db +-rw-r--r-- 1 user wheel 79 Aug 22 13:45 keyspace1-standard1-ka-1-TOC.txt +.... + +After upgrading the Cassandra version, upgrade the sstables: + +.... +sstableupgrade keyspace1 standard1 +Found 1 sstables that need upgrading. +Upgrading BigTableReader(path='/var/lib/cassandra/data/keyspace1/standard1-9695b790a63211e8a6fb091830ac5256/keyspace1-standard1-ka-1-Data.db') +Upgrade of BigTableReader(path='/var/lib/cassandra/data/keyspace1/standard1-9695b790a63211e8a6fb091830ac5256/keyspace1-standard1-ka-1-Data.db') complete. + +ls -al /tmp/cassandra/data/keyspace1/standard1-9695b790a63211e8a6fb091830ac5256/ +... +drwxr-xr-x 2 user wheel 64 Aug 22 13:48 backups +-rw-r--r-- 1 user wheel 292 Aug 22 13:48 mc-2-big-CRC.db +-rw-r--r-- 1 user wheel 4599475 Aug 22 13:48 mc-2-big-Data.db +-rw-r--r-- 1 user wheel 10 Aug 22 13:48 mc-2-big-Digest.crc32 +-rw-r--r-- 1 user wheel 25256 Aug 22 13:48 mc-2-big-Filter.db +-rw-r--r-- 1 user wheel 330807 Aug 22 13:48 mc-2-big-Index.db +-rw-r--r-- 1 user wheel 10312 Aug 22 13:48 mc-2-big-Statistics.db +-rw-r--r-- 1 user wheel 3506 Aug 22 13:48 mc-2-big-Summary.db +-rw-r--r-- 1 user wheel 80 Aug 22 13:48 mc-2-big-TOC.txt +.... + +== Rewrite tables to the current Cassandra version, and keep tables in old version + +Again, starting with a set of sstables in one version: + +.... +ls -al /tmp/cassandra/data/keyspace1/standard1-db532690a63411e8b4ae091830ac5256/ +... +-rw-r--r-- 1 user wheel 348 Aug 22 13:58 keyspace1-standard1-ka-1-CRC.db +-rw-r--r-- 1 user wheel 5620000 Aug 22 13:58 keyspace1-standard1-ka-1-Data.db +-rw-r--r-- 1 user wheel 10 Aug 22 13:58 keyspace1-standard1-ka-1-Digest.sha1 +-rw-r--r-- 1 user wheel 25016 Aug 22 13:58 keyspace1-standard1-ka-1-Filter.db +-rw-r--r-- 1 user wheel 480000 Aug 22 13:58 keyspace1-standard1-ka-1-Index.db +-rw-r--r-- 1 user wheel 9895 Aug 22 13:58 keyspace1-standard1-ka-1-Statistics.db +-rw-r--r-- 1 user wheel 3562 Aug 22 13:58 keyspace1-standard1-ka-1-Summary.db +-rw-r--r-- 1 user wheel 79 Aug 22 13:58 keyspace1-standard1-ka-1-TOC.txt +.... + +After upgrading the Cassandra version, upgrade the sstables, retaining +the original sstables: + +.... +sstableupgrade keyspace1 standard1 -k +Found 1 sstables that need upgrading. +Upgrading BigTableReader(path='/var/lib/cassandra/data/keyspace1/standard1-db532690a63411e8b4ae091830ac5256/keyspace1-standard1-ka-1-Data.db') +Upgrade of BigTableReader(path='/var/lib/cassandra/data/keyspace1/standard1-db532690a63411e8b4ae091830ac5256/keyspace1-standard1-ka-1-Data.db') complete. + +ls -al /tmp/cassandra/data/keyspace1/standard1-db532690a63411e8b4ae091830ac5256/ +... +drwxr-xr-x 2 user wheel 64 Aug 22 14:00 backups +-rw-r--r--@ 1 user wheel 348 Aug 22 13:58 keyspace1-standard1-ka-1-CRC.db +-rw-r--r--@ 1 user wheel 5620000 Aug 22 13:58 keyspace1-standard1-ka-1-Data.db +-rw-r--r--@ 1 user wheel 10 Aug 22 13:58 keyspace1-standard1-ka-1-Digest.sha1 +-rw-r--r--@ 1 user wheel 25016 Aug 22 13:58 keyspace1-standard1-ka-1-Filter.db +-rw-r--r--@ 1 user wheel 480000 Aug 22 13:58 keyspace1-standard1-ka-1-Index.db +-rw-r--r--@ 1 user wheel 9895 Aug 22 13:58 keyspace1-standard1-ka-1-Statistics.db +-rw-r--r--@ 1 user wheel 3562 Aug 22 13:58 keyspace1-standard1-ka-1-Summary.db +-rw-r--r--@ 1 user wheel 79 Aug 22 13:58 keyspace1-standard1-ka-1-TOC.txt +-rw-r--r-- 1 user wheel 292 Aug 22 14:01 mc-2-big-CRC.db +-rw-r--r-- 1 user wheel 4596370 Aug 22 14:01 mc-2-big-Data.db +-rw-r--r-- 1 user wheel 10 Aug 22 14:01 mc-2-big-Digest.crc32 +-rw-r--r-- 1 user wheel 25256 Aug 22 14:01 mc-2-big-Filter.db +-rw-r--r-- 1 user wheel 330801 Aug 22 14:01 mc-2-big-Index.db +-rw-r--r-- 1 user wheel 10312 Aug 22 14:01 mc-2-big-Statistics.db +-rw-r--r-- 1 user wheel 3506 Aug 22 14:01 mc-2-big-Summary.db +-rw-r--r-- 1 user wheel 80 Aug 22 14:01 mc-2-big-TOC.txt +.... + +== Rewrite a snapshot to the current Cassandra version + +Find the snapshot name: + +.... +nodetool listsnapshots + +Snapshot Details: +Snapshot name Keyspace name Column family name True size Size on disk +... +1534962986979 keyspace1 standard1 5.85 MB 5.85 MB +.... + +Then rewrite the snapshot: + +.... +sstableupgrade keyspace1 standard1 1534962986979 +Found 1 sstables that need upgrading. +Upgrading BigTableReader(path='/var/lib/cassandra/data/keyspace1/standard1-5850e9f0a63711e8a5c5091830ac5256/snapshots/1534962986979/keyspace1-standard1-ka-1-Data.db') +Upgrade of BigTableReader(path='/var/lib/cassandra/data/keyspace1/standard1-5850e9f0a63711e8a5c5091830ac5256/snapshots/1534962986979/keyspace1-standard1-ka-1-Data.db') complete. +.... diff --git a/doc/modules/cassandra/pages/tools/sstable/sstableutil.adoc b/doc/modules/cassandra/pages/tools/sstable/sstableutil.adoc new file mode 100644 index 000000000000..9a718f12957f --- /dev/null +++ b/doc/modules/cassandra/pages/tools/sstable/sstableutil.adoc @@ -0,0 +1,102 @@ += sstableutil + +List sstable files for the provided table. + +ref: https://issues.apache.org/jira/browse/CASSANDRA-7066 + +Cassandra must be stopped before this tool is executed, or unexpected +results will occur. Note: the script does not verify that Cassandra is +stopped. + +== Usage + +sstableutil
+ +[cols=",",] +|=== +|-c, --cleanup |clean up any outstanding transactions + +|-d, --debug |display stack traces + +|-h, --help |display this help message + +|-o, --oplog |include operation logs + +|-t, --type |all (list all files, final or temporary), tmp (list +temporary files only), final (list final files only), + +|-v, --verbose |verbose output +|=== + +== List all sstables + +The basic command lists the sstables associated with a given +keyspace/table. + +Example: + +.... +sstableutil keyspace eventlog +Listing files... +/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-32-big-CRC.db +/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-32-big-Data.db +/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-32-big-Digest.crc32 +/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-32-big-Filter.db +/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-32-big-Index.db +/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-32-big-Statistics.db +/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-32-big-Summary.db +/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-32-big-TOC.txt +/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-37-big-CRC.db +/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-37-big-Data.db +/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-37-big-Digest.crc32 +/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-37-big-Filter.db +/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-37-big-Index.db +/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-37-big-Statistics.db +/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-37-big-Summary.db +/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-37-big-TOC.txt +.... + +== List only temporary sstables + +Using the -t option followed by [.title-ref]#tmp# will list all +temporary sstables, in the format above. Temporary sstables were used in +pre-3.0 versions of Cassandra. + +== List only final sstables + +Using the -t option followed by [.title-ref]#final# will list all final +sstables, in the format above. In recent versions of Cassandra, this is +the same output as not using the -t option. + +== Include transaction logs + +Using the -o option will include transaction logs in the listing, in the +format above. + +== Clean up sstables + +Using the -c option removes any transactions left over from incomplete +writes or compactions. + +From the 3.0 upgrade notes: + +New transaction log files have been introduced to replace the +compactions_in_progress system table, temporary file markers (tmp and +tmplink) and sstable ancestors. Therefore, compaction metadata no longer +contains ancestors. Transaction log files list sstable descriptors +involved in compactions and other operations such as flushing and +streaming. Use the sstableutil tool to list any sstable files currently +involved in operations not yet completed, which previously would have +been marked as temporary. A transaction log file contains one sstable +per line, with the prefix "add:" or "remove:". They also contain a +special line "commit", only inserted at the end when the transaction is +committed. On startup we use these files to cleanup any partial +transactions that were in progress when the process exited. If the +commit line is found, we keep new sstables (those with the "add" prefix) +and delete the old sstables (those with the "remove" prefix), vice-versa +if the commit line is missing. Should you lose or delete these log +files, both old and new sstable files will be kept as live files, which +will result in duplicated sstables. These files are protected by +incremental checksums so you should not manually edit them. When +restoring a full backup or moving sstable files, you should clean-up any +left over transactions and their temporary files first. diff --git a/doc/modules/cassandra/pages/tools/sstable/sstableverify.adoc b/doc/modules/cassandra/pages/tools/sstable/sstableverify.adoc new file mode 100644 index 000000000000..0af2f150db89 --- /dev/null +++ b/doc/modules/cassandra/pages/tools/sstable/sstableverify.adoc @@ -0,0 +1,82 @@ += sstableverify + +Check sstable(s) for errors or corruption, for the provided table. + +ref: https://issues.apache.org/jira/browse/CASSANDRA-5791 + +Cassandra must be stopped before this tool is executed, or unexpected +results will occur. Note: the script does not verify that Cassandra is +stopped. + +== Usage + +sstableverify
+ +[cols=",",] +|=== +|--debug |display stack traces +|-e, --extended |extended verification +|-h, --help |display this help message +|-v, --verbose |verbose output +|=== + +== Basic Verification + +This is the basic verification. It is not a very quick process, and uses +memory. You might need to increase your memory settings if you have many +sstables. + +Example: + +.... +sstableverify keyspace eventlog +Verifying BigTableReader(path='/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-32-big-Data.db') (7.353MiB) +Deserializing sstable metadata for BigTableReader(path='/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-32-big-Data.db') +Checking computed hash of BigTableReader(path='/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-32-big-Data.db') +Verifying BigTableReader(path='/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-37-big-Data.db') (3.775MiB) +Deserializing sstable metadata for BigTableReader(path='/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-37-big-Data.db') +Checking computed hash of BigTableReader(path='/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-37-big-Data.db') +.... + +== Extended Verification + +During an extended verification, the individual values will be validated +for errors or corruption. This of course takes more time. + +Example: + +.... +root@DC1C1:/# sstableverify -e keyspace eventlog +WARN 14:08:06,255 Only 33.096GiB free across all data volumes. Consider adding more capacity to your cluster or removing obsolete snapshots +Verifying BigTableReader(path='/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-32-big-Data.db') (7.353MiB) +Deserializing sstable metadata for BigTableReader(path='/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-32-big-Data.db') +Checking computed hash of BigTableReader(path='/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-32-big-Data.db') +Extended Verify requested, proceeding to inspect values +Verify of BigTableReader(path='/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-32-big-Data.db') succeeded. All 33211 rows read successfully +Verifying BigTableReader(path='/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-37-big-Data.db') (3.775MiB) +Deserializing sstable metadata for BigTableReader(path='/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-37-big-Data.db') +Checking computed hash of BigTableReader(path='/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-37-big-Data.db') +Extended Verify requested, proceeding to inspect values +Verify of BigTableReader(path='/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-37-big-Data.db') succeeded. All 17068 rows read successfully +.... + +== Corrupted File + +Corrupted files are listed if they are detected by the script. + +Example: + +.... +sstableverify keyspace eventlog +Verifying BigTableReader(path='/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-40-big-Data.db') (7.416MiB) +Deserializing sstable metadata for BigTableReader(path='/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-40-big-Data.db') +Checking computed hash of BigTableReader(path='/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-40-big-Data.db') +Error verifying BigTableReader(path='/var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-40-big-Data.db'): Corrupted: /var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-40-big-Data.db +.... + +A similar (but less verbose) tool will show the suggested actions: + +.... +nodetool verify keyspace eventlog +error: Invalid SSTable /var/lib/cassandra/data/keyspace/eventlog-6365332094dd11e88f324f9c503e4753/mc-40-big-Data.db, please force repair +.... diff --git a/doc/modules/cassandra/pages/troubleshooting/finding_nodes.adoc b/doc/modules/cassandra/pages/troubleshooting/finding_nodes.adoc new file mode 100644 index 000000000000..522bbb92af35 --- /dev/null +++ b/doc/modules/cassandra/pages/troubleshooting/finding_nodes.adoc @@ -0,0 +1,133 @@ += Find The Misbehaving Nodes + +The first step to troubleshooting a Cassandra issue is to use error +messages, metrics and monitoring information to identify if the issue +lies with the clients or the server and if it does lie with the server +find the problematic nodes in the Cassandra cluster. The goal is to +determine if this is a systemic issue (e.g. a query pattern that affects +the entire cluster) or isolated to a subset of nodes (e.g. neighbors +holding a shared token range or even a single node with bad hardware). + +There are many sources of information that help determine where the +problem lies. Some of the most common are mentioned below. + +== Client Logs and Errors + +Clients of the cluster often leave the best breadcrumbs to follow. +Perhaps client latencies or error rates have increased in a particular +datacenter (likely eliminating other datacenter's nodes), or clients are +receiving a particular kind of error code indicating a particular kind +of problem. Troubleshooters can often rule out many failure modes just +by reading the error messages. In fact, many Cassandra error messages +include the last coordinator contacted to help operators find nodes to +start with. + +Some common errors (likely culprit in parenthesis) assuming the client +has similar error names as the Datastax `drivers `: + +* `SyntaxError` (*client*). This and other `QueryValidationException` +indicate that the client sent a malformed request. These are rarely +server issues and usually indicate bad queries. +* `UnavailableException` (*server*): This means that the Cassandra +coordinator node has rejected the query as it believes that insufficent +replica nodes are available. If many coordinators are throwing this +error it likely means that there really are (typically) multiple nodes +down in the cluster and you can identify them using `nodetool status +` If only a single coordinator is throwing this error +it may mean that node has been partitioned from the rest. +* `OperationTimedOutException` (*server*): This is the most frequent +timeout message raised when clients set timeouts and means that the +query took longer than the supplied timeout. This is a _client side_ +timeout meaning that it took longer than the client specified timeout. +The error message will include the coordinator node that was last tried +which is usually a good starting point. This error usually indicates +either aggressive client timeout values or latent server +coordinators/replicas. +* `ReadTimeoutException` or `WriteTimeoutException` (*server*): These +are raised when clients do not specify lower timeouts and there is a +_coordinator_ timeouts based on the values supplied in the +`cassandra.yaml` configuration file. They usually indicate a serious +server side problem as the default values are usually multiple seconds. + +== Metrics + +If you have Cassandra xref:operating/metrics.adoc[`metrics`] reporting to a +centralized location such as https://graphiteapp.org/[Graphite] or +https://grafana.com/[Grafana] you can typically use those to narrow down +the problem. At this stage narrowing down the issue to a particular +datacenter, rack, or even group of nodes is the main goal. Some helpful +metrics to look at are: + +=== Errors + +Cassandra refers to internode messaging errors as "drops", and provided +a number of xref:operating/metrics.adoc#droppedmessage-metrics[`Dropped Message Metrics`] to help narrow +down errors. If particular nodes are dropping messages actively, they +are likely related to the issue. + +=== Latency + +For timeouts or latency related issues you can start with operating/metrics.adoc#table-metrics[`table metrics`] +by comparing Coordinator level metrics e.g. +`CoordinatorReadLatency` or `CoordinatorWriteLatency` with their +associated replica metrics e.g. `ReadLatency` or `WriteLatency`. Issues +usually show up on the `99th` percentile before they show up on the +`50th` percentile or the `mean`. While `maximum` coordinator latencies +are not typically very helpful due to the exponentially decaying +reservoir used internally to produce metrics, `maximum` replica +latencies that correlate with increased `99th` percentiles on +coordinators can help narrow down the problem. + +There are usually three main possibilities: + +[arabic] +. Coordinator latencies are high on all nodes, but only a few node's +local read latencies are high. This points to slow replica nodes and the +coordinator's are just side-effects. This usually happens when clients +are not token aware. +. Coordinator latencies and replica latencies increase at the same time +on the a few nodes. If clients are token aware this is almost always +what happens and points to slow replicas of a subset of token ranges +(only part of the ring). +. Coordinator and local latencies are high on many nodes. This usually +indicates either a tipping point in the cluster capacity (too many +writes or reads per second), or a new query pattern. + +It's important to remember that depending on the client's load balancing +behavior and consistency levels coordinator and replica metrics may or +may not correlate. In particular if you use `TokenAware` policies the +same node's coordinator and replica latencies will often increase +together, but if you just use normal `DCAwareRoundRobin` coordinator +latencies can increase with unrelated replica node's latencies. For +example: + +* `TokenAware` + `LOCAL_ONE`: should always have coordinator and replica +latencies on the same node rise together +* `TokenAware` + `LOCAL_QUORUM`: should always have coordinator and +multiple replica latencies rise together in the same datacenter. +* `TokenAware` + `QUORUM`: replica latencies in other datacenters can +affect coordinator latencies. +* `DCAwareRoundRobin` + `LOCAL_ONE`: coordinator latencies and unrelated +replica node's latencies will rise together. +* `DCAwareRoundRobin` + `LOCAL_QUORUM`: different coordinator and +replica latencies will rise together with little correlation. + +=== Query Rates + +Sometimes the xref:operating/metrics.adoc#table-metrics[`table metric`] query rate metrics can help narrow +down load issues as "small" increase in coordinator queries per second +(QPS) may correlate with a very large increase in replica level QPS. +This most often happens with `BATCH` writes, where a client may send a +single `BATCH` query that might contain 50 statements in it, which if +you have 9 copies (RF=3, three datacenters) means that every coordinator +`BATCH` write turns into 450 replica writes! This is why keeping +`BATCH`'s to the same partition is so critical, otherwise you can +exhaust significant CPU capacitity with a "single" query. + +== Next Step: Investigate the Node(s) + +Once you have narrowed down the problem as much as possible (datacenter, +rack , node), login to one of the nodes using SSH and proceed to debug +using xref:reading_logs.adoc[`logs`], xref:use_nodetooladoc[`nodetool`], and +xref:use_tools.adoc[`os tools`]. +If you are not able to login you may still have access to `logs` and `nodetool` remotely. diff --git a/doc/modules/cassandra/pages/troubleshooting/index.adoc b/doc/modules/cassandra/pages/troubleshooting/index.adoc new file mode 100644 index 000000000000..f52796587e78 --- /dev/null +++ b/doc/modules/cassandra/pages/troubleshooting/index.adoc @@ -0,0 +1,19 @@ += Troubleshooting + +As any distributed database does, sometimes Cassandra breaks and you +will have to troubleshoot what is going on. Generally speaking you can +debug Cassandra like any other distributed Java program, meaning that +you have to find which machines in your cluster are misbehaving and then +isolate the problem using logs and tools. Luckily Cassandra had a great +set of instrospection tools to help you. + +These pages include a number of command examples demonstrating various +debugging and analysis techniques, mostly for Linux/Unix systems. If you +don't have access to the machines running Cassandra, or are running on +Windows or another operating system you may not be able to use the exact +commands but there are likely equivalent tools you can use. + +* xref:troubleshooting/finding_nodes.adoc[Finding nodes] +* xref:troubleshooting/reading_logs.adoc[Reading logs] +* xref:troubleshooting/use_nodetool.adoc[Using nodetool] +* xref:troubleshooting/use_tools.adoc[Using tools] diff --git a/doc/modules/cassandra/pages/troubleshooting/reading_logs.adoc b/doc/modules/cassandra/pages/troubleshooting/reading_logs.adoc new file mode 100644 index 000000000000..1736896f4f7a --- /dev/null +++ b/doc/modules/cassandra/pages/troubleshooting/reading_logs.adoc @@ -0,0 +1,247 @@ += Cassandra Logs + +Cassandra has rich support for logging and attempts to give operators +maximum insight into the database while at the same time limiting noise +to the logs. + +== Common Log Files + +Cassandra has three main logs, the `system.log`, `debug.log` and +`gc.log` which hold general logging messages, debugging logging +messages, and java garbage collection logs respectively. + +These logs by default live in `$CASSANDRA_HOME/logs`, but most Linux +distributions relocate logs to `/var/log/cassandra`. Operators can tune +this location as well as what levels are logged using the provided +`logback.xml` file. + +=== `system.log` + +This log is the default Cassandra log and is a good place to start any +investigation. Some examples of activities logged to this log: + +* Uncaught exceptions. These can be very useful for debugging errors. +* `GCInspector` messages indicating long garbage collector pauses. When +long pauses happen Cassandra will print how long and also what was the +state of the system (thread state) at the time of that pause. This can +help narrow down a capacity issue (either not enough heap or not enough +spare CPU). +* Information about nodes joining and leaving the cluster as well as +token metadata (data ownersip) changes. This is useful for debugging +network partitions, data movements, and more. +* Keyspace/Table creation, modification, deletion. +* `StartupChecks` that ensure optimal configuration of the operating +system to run Cassandra +* Information about some background operational tasks (e.g. Index +Redistribution). + +As with any application, looking for `ERROR` or `WARN` lines can be a +great first step: + +[source, bash] +---- +$ # Search for warnings or errors in the latest system.log +$ grep 'WARN\|ERROR' system.log | tail +... + +$ # Search for warnings or errors in all rotated system.log +$ zgrep 'WARN\|ERROR' system.log.* | less +... +---- + +=== `debug.log` + +This log contains additional debugging information that may be useful +when troubleshooting but may be much noiser than the normal +`system.log`. Some examples of activities logged to this log: + +* Information about compactions, including when they start, which +sstables they contain, and when they finish. +* Information about memtable flushes to disk, including when they +happened, how large the flushes were, and which commitlog segments the +flush impacted. + +This log can be _very_ noisy, so it is highly recommended to use `grep` +and other log analysis tools to dive deep. For example: + +[source, bash] +---- +# Search for messages involving a CompactionTask with 5 lines of context +$ grep CompactionTask debug.log -C 5 + +# Look at the distribution of flush tasks per keyspace +$ grep "Enqueuing flush" debug.log | cut -f 10 -d ' ' | sort | uniq -c + 6 compaction_history: + 1 test_keyspace: + 2 local: + 17 size_estimates: + 17 sstable_activity: +---- + +=== `gc.log` + +The gc log is a standard Java GC log. With the default `jvm.options` +settings you get a lot of valuable information in this log such as +application pause times, and why pauses happened. This may help narrow +down throughput or latency issues to a mistuned JVM. For example you can +view the last few pauses: + +[source, bash] +---- +$ grep stopped gc.log.0.current | tail +2018-08-29T00:19:39.522+0000: 3022663.591: Total time for which application threads were stopped: 0.0332813 seconds, Stopping threads took: 0.0008189 seconds +2018-08-29T00:19:44.369+0000: 3022668.438: Total time for which application threads were stopped: 0.0312507 seconds, Stopping threads took: 0.0007025 seconds +2018-08-29T00:19:49.796+0000: 3022673.865: Total time for which application threads were stopped: 0.0307071 seconds, Stopping threads took: 0.0006662 seconds +2018-08-29T00:19:55.452+0000: 3022679.521: Total time for which application threads were stopped: 0.0309578 seconds, Stopping threads took: 0.0006832 seconds +2018-08-29T00:20:00.127+0000: 3022684.197: Total time for which application threads were stopped: 0.0310082 seconds, Stopping threads took: 0.0007090 seconds +2018-08-29T00:20:06.583+0000: 3022690.653: Total time for which application threads were stopped: 0.0317346 seconds, Stopping threads took: 0.0007106 seconds +2018-08-29T00:20:10.079+0000: 3022694.148: Total time for which application threads were stopped: 0.0299036 seconds, Stopping threads took: 0.0006889 seconds +2018-08-29T00:20:15.739+0000: 3022699.809: Total time for which application threads were stopped: 0.0078283 seconds, Stopping threads took: 0.0006012 seconds +2018-08-29T00:20:15.770+0000: 3022699.839: Total time for which application threads were stopped: 0.0301285 seconds, Stopping threads took: 0.0003789 seconds +2018-08-29T00:20:15.798+0000: 3022699.867: Total time for which application threads were stopped: 0.0279407 seconds, Stopping threads took: 0.0003627 seconds +---- + +This shows a lot of valuable information including how long the +application was paused (meaning zero user queries were being serviced +during the e.g. 33ms JVM pause) as well as how long it took to enter the +safepoint. You can use this raw data to e.g. get the longest pauses: + +[source, bash] +---- +$ grep stopped gc.log.0.current | cut -f 11 -d ' ' | sort -n | tail | xargs -IX grep X gc.log.0.current | sort -k 1 +2018-08-28T17:13:40.520-0700: 1.193: Total time for which application threads were stopped: 0.0157914 seconds, Stopping threads took: 0.0000355 seconds +2018-08-28T17:13:41.206-0700: 1.879: Total time for which application threads were stopped: 0.0249811 seconds, Stopping threads took: 0.0000318 seconds +2018-08-28T17:13:41.638-0700: 2.311: Total time for which application threads were stopped: 0.0561130 seconds, Stopping threads took: 0.0000328 seconds +2018-08-28T17:13:41.677-0700: 2.350: Total time for which application threads were stopped: 0.0362129 seconds, Stopping threads took: 0.0000597 seconds +2018-08-28T17:13:41.781-0700: 2.454: Total time for which application threads were stopped: 0.0442846 seconds, Stopping threads took: 0.0000238 seconds +2018-08-28T17:13:41.976-0700: 2.649: Total time for which application threads were stopped: 0.0377115 seconds, Stopping threads took: 0.0000250 seconds +2018-08-28T17:13:42.172-0700: 2.845: Total time for which application threads were stopped: 0.0475415 seconds, Stopping threads took: 0.0001018 seconds +2018-08-28T17:13:42.825-0700: 3.498: Total time for which application threads were stopped: 0.0379155 seconds, Stopping threads took: 0.0000571 seconds +2018-08-28T17:13:43.574-0700: 4.247: Total time for which application threads were stopped: 0.0323812 seconds, Stopping threads took: 0.0000574 seconds +2018-08-28T17:13:44.602-0700: 5.275: Total time for which application threads were stopped: 0.0238975 seconds, Stopping threads took: 0.0000788 seconds +---- + +In this case any client waiting on a query would have experienced a +56ms latency at 17:13:41. + +Note that GC pauses are not _link:[only] garbage collection, although +generally speaking high pauses with fast safepoints indicate a lack of +JVM heap or mistuned JVM GC algorithm. High pauses with slow safepoints +typically indicate that the JVM is having trouble entering a safepoint +which usually indicates slow disk drives (Cassandra makes heavy use of +memory mapped reads which the JVM doesn't know could have disk latency, +so the JVM safepoint logic doesn't handle a blocking memory mapped read +particularly well). + +Using these logs you can even get a pause distribution with something +like +https://github.com/bitly/data_hacks/blob/master/data_hacks/histogram.py[histogram.py]: + +[source, bash] +---- +$ grep stopped gc.log.0.current | cut -f 11 -d ' ' | sort -n | histogram.py +# NumSamples = 410293; Min = 0.00; Max = 11.49 +# Mean = 0.035346; Variance = 0.002216; SD = 0.047078; Median 0.036498 +# each ∎ represents a count of 5470 + 0.0001 - 1.1496 [410255]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ + 1.1496 - 2.2991 [ 15]: + 2.2991 - 3.4486 [ 5]: + 3.4486 - 4.5981 [ 1]: + 4.5981 - 5.7475 [ 5]: + 5.7475 - 6.8970 [ 9]: + 6.8970 - 8.0465 [ 1]: + 8.0465 - 9.1960 [ 0]: + 9.1960 - 10.3455 [ 0]: + 10.3455 - 11.4949 [ 2]: +---- + +We can see in this case while we have very good average performance +something is causing multi second JVM pauses ... In this case it was +mostly safepoint pauses caused by slow disks: + +[source, bash] +---- +$ grep stopped gc.log.0.current | cut -f 11 -d ' ' | sort -n | tail | xargs -IX grep X gc.log.0.current| sort -k 1 +2018-07-27T04:52:27.413+0000: 187831.482: Total time for which application threads were stopped: 6.5037022 seconds, Stopping threads took: 0.0005212 seconds +2018-07-30T23:38:18.354+0000: 514582.423: Total time for which application threads were stopped: 6.3262938 seconds, Stopping threads took: 0.0004882 seconds +2018-08-01T02:37:48.380+0000: 611752.450: Total time for which application threads were stopped: 10.3879659 seconds, Stopping threads took: 0.0004475 seconds +2018-08-06T22:04:14.990+0000: 1113739.059: Total time for which application threads were stopped: 6.0917409 seconds, Stopping threads took: 0.0005553 seconds +2018-08-14T00:04:06.091+0000: 1725730.160: Total time for which application threads were stopped: 6.0141054 seconds, Stopping threads took: 0.0004976 seconds +2018-08-17T06:23:06.755+0000: 2007670.824: Total time for which application threads were stopped: 6.0133694 seconds, Stopping threads took: 0.0006011 seconds +2018-08-23T06:35:46.068+0000: 2526830.137: Total time for which application threads were stopped: 6.4767751 seconds, Stopping threads took: 6.4426849 seconds +2018-08-23T06:36:29.018+0000: 2526873.087: Total time for which application threads were stopped: 11.4949489 seconds, Stopping threads took: 11.4638297 seconds +2018-08-23T06:37:12.671+0000: 2526916.741: Total time for which application threads were stopped: 6.3867003 seconds, Stopping threads took: 6.3507166 seconds +2018-08-23T06:37:47.156+0000: 2526951.225: Total time for which application threads were stopped: 7.9528200 seconds, Stopping threads took: 7.9197756 seconds +---- + +Sometimes reading and understanding java GC logs is hard, but you can +take the raw GC files and visualize them using tools such as +https://github.com/chewiebug/GCViewer[GCViewer] which take the Cassandra +GC log as input and show you detailed visual information on your garbage +collection performance. This includes pause analysis as well as +throughput information. For a stable Cassandra JVM you probably want to +aim for pauses less than 200ms and GC throughput greater +than 99%. + +Java GC pauses are one of the leading causes of tail latency in +Cassandra (along with drive latency) so sometimes this information can +be crucial while debugging tail latency issues. + +== Getting More Information + +If the default logging levels are insuficient, `nodetool` can set higher +or lower logging levels for various packages and classes using the +`nodetool setlogginglevel` command. Start by viewing the current levels: + +[source, bash] +---- +$ nodetool getlogginglevels + +Logger Name Log Level +ROOT INFO +org.apache.cassandra DEBUG +---- + +Perhaps the `Gossiper` is acting up and we wish to enable it at `TRACE` +level for even more insight: + +[source, bash] +---- +$ nodetool setlogginglevel org.apache.cassandra.gms.Gossiper TRACE + +$ nodetool getlogginglevels + +Logger Name Log Level +ROOT INFO +org.apache.cassandra DEBUG +org.apache.cassandra.gms.Gossiper TRACE + +$ grep TRACE debug.log | tail -2 +TRACE [GossipStage:1] 2018-07-04 17:07:47,879 Gossiper.java:1234 - Updating +heartbeat state version to 2344 from 2343 for 127.0.0.2:7000 ... +TRACE [GossipStage:1] 2018-07-04 17:07:47,879 Gossiper.java:923 - local +heartbeat version 2341 greater than 2340 for 127.0.0.1:7000 +---- + +Note that any changes made this way are reverted on next Cassandra +process restart. To make the changes permanent add the appropriate rule +to `logback.xml`. + +[source,diff] +---- +diff --git a/conf/logback.xml b/conf/logback.xml +index b2c5b10..71b0a49 100644 +--- a/conf/logback.xml ++++ b/conf/logback.xml +@@ -98,4 +98,5 @@ appender reference in the root level section below. + + + ++ + +---- + + +Note that if you want more information than this tool provides, there +are other live capture options available such as +xref:cql/troubleshooting/use_tools.adoc#packet-capture[`packet-capture`]. diff --git a/doc/modules/cassandra/pages/troubleshooting/use_nodetool.adoc b/doc/modules/cassandra/pages/troubleshooting/use_nodetool.adoc new file mode 100644 index 000000000000..f80d03969565 --- /dev/null +++ b/doc/modules/cassandra/pages/troubleshooting/use_nodetool.adoc @@ -0,0 +1,242 @@ += Use Nodetool + +Cassandra's `nodetool` allows you to narrow problems from the cluster +down to a particular node and gives a lot of insight into the state of +the Cassandra process itself. There are dozens of useful commands (see +`nodetool help` for all the commands), but briefly some of the most +useful for troubleshooting: + +[[nodetool-status]] +== Cluster Status + +You can use `nodetool status` to assess status of the cluster: + +[source, bash] +---- +$ nodetool status + +Datacenter: dc1 +======================= +Status=Up/Down +|/ State=Normal/Leaving/Joining/Moving +-- Address Load Tokens Owns (effective) Host ID Rack +UN 127.0.1.1 4.69 GiB 1 100.0% 35ea8c9f-b7a2-40a7-b9c5-0ee8b91fdd0e r1 +UN 127.0.1.2 4.71 GiB 1 100.0% 752e278f-b7c5-4f58-974b-9328455af73f r2 +UN 127.0.1.3 4.69 GiB 1 100.0% 9dc1a293-2cc0-40fa-a6fd-9e6054da04a7 r3 +---- + +In this case we can see that we have three nodes in one datacenter with +about 4.6GB of data each and they are all "up". The up/down status of a +node is independently determined by every node in the cluster, so you +may have to run `nodetool status` on multiple nodes in a cluster to see +the full view. + +You can use `nodetool status` plus a little grep to see which nodes are +down: + +[source, bash] +---- +$ nodetool status | grep -v '^UN' +Datacenter: dc1 +=============== +Status=Up/Down +|/ State=Normal/Leaving/Joining/Moving +-- Address Load Tokens Owns (effective) Host ID Rack +Datacenter: dc2 +=============== +Status=Up/Down +|/ State=Normal/Leaving/Joining/Moving +-- Address Load Tokens Owns (effective) Host ID Rack +DN 127.0.0.5 105.73 KiB 1 33.3% df303ac7-61de-46e9-ac79-6e630115fd75 r1 +---- + +In this case there are two datacenters and there is one node down in +datacenter `dc2` and rack `r1`. This may indicate an issue on +`127.0.0.5` warranting investigation. + +[[nodetool-proxyhistograms]] +== Coordinator Query Latency + +You can view latency distributions of coordinator read and write latency +to help narrow down latency issues using `nodetool proxyhistograms`: + +[source, bash] +---- +$ nodetool proxyhistograms +Percentile Read Latency Write Latency Range Latency CAS Read Latency CAS Write Latency View Write Latency + (micros) (micros) (micros) (micros) (micros) (micros) +50% 454.83 219.34 0.00 0.00 0.00 0.00 +75% 545.79 263.21 0.00 0.00 0.00 0.00 +95% 654.95 315.85 0.00 0.00 0.00 0.00 +98% 785.94 379.02 0.00 0.00 0.00 0.00 +99% 3379.39 2346.80 0.00 0.00 0.00 0.00 +Min 42.51 105.78 0.00 0.00 0.00 0.00 +Max 25109.16 43388.63 0.00 0.00 0.00 0.00 +---- + +Here you can see the full latency distribution of reads, writes, range +requests (e.g. `select * from keyspace.table`), CAS read (compare phase +of CAS) and CAS write (set phase of compare and set). These can be +useful for narrowing down high level latency problems, for example in +this case if a client had a 20 millisecond timeout on their reads they +might experience the occasional timeout from this node but less than 1% +(since the 99% read latency is 3.3 milliseconds < 20 milliseconds). + +[[nodetool-tablehistograms]] +== Local Query Latency + +If you know which table is having latency/error issues, you can use +`nodetool tablehistograms` to get a better idea of what is happening +locally on a node: + +[source, bash] +---- +$ nodetool tablehistograms keyspace table +Percentile SSTables Write Latency Read Latency Partition Size Cell Count + (micros) (micros) (bytes) +50% 0.00 73.46 182.79 17084 103 +75% 1.00 88.15 315.85 17084 103 +95% 2.00 126.93 545.79 17084 103 +98% 2.00 152.32 654.95 17084 103 +99% 2.00 182.79 785.94 17084 103 +Min 0.00 42.51 24.60 14238 87 +Max 2.00 12108.97 17436.92 17084 103 +---- + +This shows you percentile breakdowns particularly critical metrics. + +The first column contains how many sstables were read per logical read. +A very high number here indicates that you may have chosen the wrong +compaction strategy, e.g. `SizeTieredCompactionStrategy` typically has +many more reads per read than `LeveledCompactionStrategy` does for +update heavy workloads. + +The second column shows you a latency breakdown of _local_ write +latency. In this case we see that while the p50 is quite good at 73 +microseconds, the maximum latency is quite slow at 12 milliseconds. High +write max latencies often indicate a slow commitlog volume (slow to +fsync) or large writes that quickly saturate commitlog segments. + +The third column shows you a latency breakdown of _local_ read latency. +We can see that local Cassandra reads are (as expected) slower than +local writes, and the read speed correlates highly with the number of +sstables read per read. + +The fourth and fifth columns show distributions of partition size and +column count per partition. These are useful for determining if the +table has on average skinny or wide partitions and can help you isolate +bad data patterns. For example if you have a single cell that is 2 +megabytes, that is probably going to cause some heap pressure when it's +read. + +[[nodetool-tpstats]] +== Threadpool State + +You can use `nodetool tpstats` to view the current outstanding requests +on a particular node. This is useful for trying to find out which +resource (read threads, write threads, compaction, request response +threads) the Cassandra process lacks. For example: + +[source, bash] +---- +$ nodetool tpstats +Pool Name Active Pending Completed Blocked All time blocked +ReadStage 2 0 12 0 0 +MiscStage 0 0 0 0 0 +CompactionExecutor 0 0 1940 0 0 +MutationStage 0 0 0 0 0 +GossipStage 0 0 10293 0 0 +Repair-Task 0 0 0 0 0 +RequestResponseStage 0 0 16 0 0 +ReadRepairStage 0 0 0 0 0 +CounterMutationStage 0 0 0 0 0 +MemtablePostFlush 0 0 83 0 0 +ValidationExecutor 0 0 0 0 0 +MemtableFlushWriter 0 0 30 0 0 +ViewMutationStage 0 0 0 0 0 +CacheCleanupExecutor 0 0 0 0 0 +MemtableReclaimMemory 0 0 30 0 0 +PendingRangeCalculator 0 0 11 0 0 +SecondaryIndexManagement 0 0 0 0 0 +HintsDispatcher 0 0 0 0 0 +Native-Transport-Requests 0 0 192 0 0 +MigrationStage 0 0 14 0 0 +PerDiskMemtableFlushWriter_0 0 0 30 0 0 +Sampler 0 0 0 0 0 +ViewBuildExecutor 0 0 0 0 0 +InternalResponseStage 0 0 0 0 0 +AntiEntropyStage 0 0 0 0 0 + +Message type Dropped Latency waiting in queue (micros) + 50% 95% 99% Max +READ 0 N/A N/A N/A N/A +RANGE_SLICE 0 0.00 0.00 0.00 0.00 +_TRACE 0 N/A N/A N/A N/A +HINT 0 N/A N/A N/A N/A +MUTATION 0 N/A N/A N/A N/A +COUNTER_MUTATION 0 N/A N/A N/A N/A +BATCH_STORE 0 N/A N/A N/A N/A +BATCH_REMOVE 0 N/A N/A N/A N/A +REQUEST_RESPONSE 0 0.00 0.00 0.00 0.00 +PAGED_RANGE 0 N/A N/A N/A N/A +READ_REPAIR 0 N/A N/A N/A N/A +---- + +This command shows you all kinds of interesting statistics. The first +section shows a detailed breakdown of threadpools for each Cassandra +stage, including how many threads are current executing (Active) and how +many are waiting to run (Pending). Typically if you see pending +executions in a particular threadpool that indicates a problem localized +to that type of operation. For example if the `RequestResponseState` +queue is backing up, that means that the coordinators are waiting on a +lot of downstream replica requests and may indicate a lack of token +awareness, or very high consistency levels being used on read requests +(for example reading at `ALL` ties up RF `RequestResponseState` threads +whereas `LOCAL_ONE` only uses a single thread in the `ReadStage` +threadpool). On the other hand if you see a lot of pending compactions +that may indicate that your compaction threads cannot keep up with the +volume of writes and you may need to tune either the compaction strategy +or the `concurrent_compactors` or `compaction_throughput` options. + +The second section shows drops (errors) and latency distributions for +all the major request types. Drops are cumulative since process start, +but if you have any that indicate a serious problem as the default +timeouts to qualify as a drop are quite high (~5-10 seconds). Dropped +messages often warrants further investigation. + +[[nodetool-compactionstats]] +== Compaction State + +As Cassandra is a LSM datastore, Cassandra sometimes has to compact +sstables together, which can have adverse effects on performance. In +particular, compaction uses a reasonable quantity of CPU resources, +invalidates large quantities of the OS +https://en.wikipedia.org/wiki/Page_cache[page cache], and can put a lot +of load on your disk drives. There are great `os tools ` to +determine if this is the case, but often it's a good idea to check if +compactions are even running using `nodetool compactionstats`: + +[source, bash] +---- +$ nodetool compactionstats +pending tasks: 2 +- keyspace.table: 2 + +id compaction type keyspace table completed total unit progress +2062b290-7f3a-11e8-9358-cd941b956e60 Compaction keyspace table 21848273 97867583 bytes 22.32% +Active compaction remaining time : 0h00m04s +---- + +In this case there is a single compaction running on the +`keyspace.table` table, has completed 21.8 megabytes of 97 and Cassandra +estimates (based on the configured compaction throughput) that this will +take 4 seconds. You can also pass `-H` to get the units in a human +readable format. + +Generally each running compaction can consume a single core, but the +more you do in parallel the faster data compacts. Compaction is crucial +to ensuring good read performance so having the right balance of +concurrent compactions such that compactions complete quickly but don't +take too many resources away from query threads is very important for +performance. If you notice compaction unable to keep up, try tuning +Cassandra's `concurrent_compactors` or `compaction_throughput` options. diff --git a/doc/modules/cassandra/pages/troubleshooting/use_tools.adoc b/doc/modules/cassandra/pages/troubleshooting/use_tools.adoc new file mode 100644 index 000000000000..b9ec42acd3ef --- /dev/null +++ b/doc/modules/cassandra/pages/troubleshooting/use_tools.adoc @@ -0,0 +1,578 @@ += Diving Deep, Use External Tools + +Machine access allows operators to dive even deeper than logs and +`nodetool` allow. While every Cassandra operator may have their personal +favorite toolsets for troubleshooting issues, this page contains some of +the most common operator techniques and examples of those tools. Many of +these commands work only on Linux, but if you are deploying on a +different operating system you may have access to other substantially +similar tools that assess similar OS level metrics and processes. + +== JVM Tooling + +The JVM ships with a number of useful tools. Some of them are useful for +debugging Cassandra issues, especially related to heap and execution +stacks. + +*NOTE*: There are two common gotchas with JVM tooling and Cassandra: + +[arabic] +. By default Cassandra ships with `-XX:+PerfDisableSharedMem` set to +prevent long pauses (see `CASSANDRA-9242` and `CASSANDRA-9483` for +details). If you want to use JVM tooling you can instead have `/tmp` +mounted on an in memory `tmpfs` which also effectively works around +`CASSANDRA-9242`. +. Make sure you run the tools as the same user as Cassandra is running +as, e.g. if the database is running as `cassandra` the tool also has to +be run as `cassandra`, e.g. via `sudo -u cassandra `. + +=== Garbage Collection State (jstat) + +If you suspect heap pressure you can use `jstat` to dive deep into the +garbage collection state of a Cassandra process. This command is always +safe to run and yields detailed heap information including eden heap +usage (E), old generation heap usage (O), count of eden collections +(YGC), time spend in eden collections (YGCT), old/mixed generation +collections (FGC) and time spent in old/mixed generation collections +(FGCT): + +[source, bash] +---- +jstat -gcutil 500ms + S0 S1 E O M CCS YGC YGCT FGC FGCT GCT + 0.00 0.00 81.53 31.16 93.07 88.20 12 0.151 3 0.257 0.408 + 0.00 0.00 82.36 31.16 93.07 88.20 12 0.151 3 0.257 0.408 + 0.00 0.00 82.36 31.16 93.07 88.20 12 0.151 3 0.257 0.408 + 0.00 0.00 83.19 31.16 93.07 88.20 12 0.151 3 0.257 0.408 + 0.00 0.00 83.19 31.16 93.07 88.20 12 0.151 3 0.257 0.408 + 0.00 0.00 84.19 31.16 93.07 88.20 12 0.151 3 0.257 0.408 + 0.00 0.00 84.19 31.16 93.07 88.20 12 0.151 3 0.257 0.408 + 0.00 0.00 85.03 31.16 93.07 88.20 12 0.151 3 0.257 0.408 + 0.00 0.00 85.03 31.16 93.07 88.20 12 0.151 3 0.257 0.408 + 0.00 0.00 85.94 31.16 93.07 88.20 12 0.151 3 0.257 0.408 +---- + +In this case we see we have a relatively healthy heap profile, with +31.16% old generation heap usage and 83% eden. If the old generation +routinely is above 75% then you probably need more heap (assuming CMS +with a 75% occupancy threshold). If you do have such persistently high +old gen that often means you either have under-provisioned the old +generation heap, or that there is too much live data on heap for +Cassandra to collect (e.g. because of memtables). Another thing to watch +for is time between young garbage collections (YGC), which indicate how +frequently the eden heap is collected. Each young gc pause is about +20-50ms, so if you have a lot of them your clients will notice in their +high percentile latencies. + +=== Thread Information (jstack) + +To get a point in time snapshot of exactly what Cassandra is doing, run +`jstack` against the Cassandra PID. *Note* that this does pause the JVM +for a very brief period (<20ms).: + +[source, bash] +---- +$ jstack > threaddump + +# display the threaddump +$ cat threaddump + +# look at runnable threads +$grep RUNNABLE threaddump -B 1 +"Attach Listener" #15 daemon prio=9 os_prio=0 tid=0x00007f829c001000 nid=0x3a74 waiting on condition [0x0000000000000000] + java.lang.Thread.State: RUNNABLE +-- +"DestroyJavaVM" #13 prio=5 os_prio=0 tid=0x00007f82e800e000 nid=0x2a19 waiting on condition [0x0000000000000000] + java.lang.Thread.State: RUNNABLE +-- +"JPS thread pool" #10 prio=5 os_prio=0 tid=0x00007f82e84d0800 nid=0x2a2c runnable [0x00007f82d0856000] + java.lang.Thread.State: RUNNABLE +-- +"Service Thread" #9 daemon prio=9 os_prio=0 tid=0x00007f82e80d7000 nid=0x2a2a runnable [0x0000000000000000] + java.lang.Thread.State: RUNNABLE +-- +"C1 CompilerThread3" #8 daemon prio=9 os_prio=0 tid=0x00007f82e80cc000 nid=0x2a29 waiting on condition [0x0000000000000000] + java.lang.Thread.State: RUNNABLE +-- + +# Note that the nid is the Linux thread id +---- + +Some of the most important information in the threaddumps are +waiting/blocking threads, including what locks or monitors the thread is +blocking/waiting on. + +== Basic OS Tooling + +A great place to start when debugging a Cassandra issue is understanding +how Cassandra is interacting with system resources. The following are +all resources that Cassandra makes heavy uses of: + +* CPU cores. For executing concurrent user queries +* CPU processing time. For query activity (data decompression, row +merging, etc.) +* CPU processing time (low priority). For background tasks (compaction, +streaming, etc ...) +* RAM for Java Heap. Used to hold internal data-structures and by +default the Cassandra memtables. Heap space is a crucial component of +write performance as well as generally. +* RAM for OS disk cache. Used to cache frequently accessed SSTable +blocks. OS disk cache is a crucial component of read performance. +* Disks. Cassandra cares a lot about disk read latency, disk write +throughput, and of course disk space. +* Network latency. Cassandra makes many internode requests, so network +latency between nodes can directly impact performance. +* Network throughput. Cassandra (as other databases) frequently have the +so called "incast" problem where a small request (e.g. +`SELECT * from foo.bar`) returns a massively large result set (e.g. the +entire dataset). In such situations outgoing bandwidth is crucial. + +Often troubleshooting Cassandra comes down to troubleshooting what +resource the machine or cluster is running out of. Then you create more +of that resource or change the query pattern to make less use of that +resource. + +=== High Level Resource Usage (top/htop) + +Cassandra makes signifiant use of system resources, and often the very +first useful action is to run `top` or `htop` +(https://hisham.hm/htop/[website])to see the state of the machine. + +Useful things to look at: + +* System load levels. While these numbers can be confusing, generally +speaking if the load average is greater than the number of CPU cores, +Cassandra probably won't have very good (sub 100 millisecond) latencies. +See +http://www.brendangregg.com/blog/2017-08-08/linux-load-averages.html[Linux +Load Averages] for more information. +* CPU utilization. `htop` in particular can help break down CPU +utilization into `user` (low and normal priority), `system` (kernel), +and `io-wait` . Cassandra query threads execute as normal priority +`user` threads, while compaction threads execute as low priority `user` +threads. High `system` time could indicate problems like thread +contention, and high `io-wait` may indicate slow disk drives. This can +help you understand what Cassandra is spending processing resources +doing. +* Memory usage. Look for which programs have the most resident memory, +it is probably Cassandra. The number for Cassandra is likely +inaccurately high due to how Linux (as of 2018) accounts for memory +mapped file memory. + +[[os-iostat]] +=== IO Usage (iostat) + +Use iostat to determine how data drives are faring, including latency +distributions, throughput, and utilization: + +[source, bash] +---- +$ sudo iostat -xdm 2 +Linux 4.13.0-13-generic (hostname) 07/03/2018 _x86_64_ (8 CPU) + +Device: rrqm/s wrqm/s r/s w/s rMB/s wMB/s avgrq-sz avgqu-sz await r_await w_await svctm %util +sda 0.00 0.28 0.32 5.42 0.01 0.13 48.55 0.01 2.21 0.26 2.32 0.64 0.37 +sdb 0.00 0.00 0.00 0.00 0.00 0.00 79.34 0.00 0.20 0.20 0.00 0.16 0.00 +sdc 0.34 0.27 0.76 0.36 0.01 0.02 47.56 0.03 26.90 2.98 77.73 9.21 1.03 + +Device: rrqm/s wrqm/s r/s w/s rMB/s wMB/s avgrq-sz avgqu-sz await r_await w_await svctm %util +sda 0.00 0.00 2.00 32.00 0.01 4.04 244.24 0.54 16.00 0.00 17.00 1.06 3.60 +sdb 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 +sdc 0.00 24.50 0.00 114.00 0.00 11.62 208.70 5.56 48.79 0.00 48.79 1.12 12.80 +---- + +In this case we can see that `/dev/sdc1` is a very slow drive, having an +`await` close to 50 milliseconds and an `avgqu-sz` close to 5 ios. The +drive is not particularly saturated (utilization is only 12.8%), but we +should still be concerned about how this would affect our p99 latency +since 50ms is quite long for typical Cassandra operations. That being +said, in this case most of the latency is present in writes (typically +writes are more latent than reads), which due to the LSM nature of +Cassandra is often hidden from the user. + +Important metrics to assess using iostat: + +* Reads and writes per second. These numbers will change with the +workload, but generally speaking the more reads Cassandra has to do from +disk the slower Cassandra read latencies are. Large numbers of reads per +second can be a dead giveaway that the cluster has insufficient memory +for OS page caching. +* Write throughput. Cassandra's LSM model defers user writes and batches +them together, which means that throughput to the underlying medium is +the most important write metric for Cassandra. +* Read latency (`r_await`). When Cassandra missed the OS page cache and +reads from SSTables, the read latency directly determines how fast +Cassandra can respond with the data. +* Write latency. Cassandra is less sensitive to write latency except +when it syncs the commit log. This typically enters into the very high +percentiles of write latency. + +Note that to get detailed latency breakdowns you will need a more +advanced tool such as xref:use_tools.adoc#bcc-tools[`bcc-tools`]. + +=== OS page Cache Usage + +As Cassandra makes heavy use of memory mapped files, the health of the +operating system's https://en.wikipedia.org/wiki/Page_cache[Page Cache] +is crucial to performance. Start by finding how much available cache is +in the system: + +[source, bash] +---- +$ free -g + total used free shared buff/cache available +Mem: 15 9 2 0 3 5 +Swap: 0 0 0 +---- + +In this case 9GB of memory is used by user processes (Cassandra heap) +and 8GB is available for OS page cache. Of that, 3GB is actually used to +cache files. If most memory is used and unavailable to the page cache, +Cassandra performance can suffer significantly. This is why Cassandra +starts with a reasonably small amount of memory reserved for the heap. + +If you suspect that you are missing the OS page cache frequently you can +use advanced tools like xref:use_tools.adoc#use-bcc-tools[cachestat] or +xref:use_tools.adoc#use-vmtouch[vmtouch] to dive deeper. + +=== Network Latency and Reliability + +Whenever Cassandra does writes or reads that involve other replicas, +`LOCAL_QUORUM` reads for example, one of the dominant effects on latency +is network latency. When trying to debug issues with multi machine +operations, the network can be an important resource to investigate. You +can determine internode latency using tools like `ping` and `traceroute` +or most effectively `mtr`: + +[source, bash] +---- +$ mtr -nr www.google.com +Start: Sun Jul 22 13:10:28 2018 +HOST: hostname Loss% Snt Last Avg Best Wrst StDev + 1.|-- 192.168.1.1 0.0% 10 2.0 1.9 1.1 3.7 0.7 + 2.|-- 96.123.29.15 0.0% 10 11.4 11.0 9.0 16.4 1.9 + 3.|-- 68.86.249.21 0.0% 10 10.6 10.7 9.0 13.7 1.1 + 4.|-- 162.141.78.129 0.0% 10 11.5 10.6 9.6 12.4 0.7 + 5.|-- 162.151.78.253 0.0% 10 10.9 12.1 10.4 20.2 2.8 + 6.|-- 68.86.143.93 0.0% 10 12.4 12.6 9.9 23.1 3.8 + 7.|-- 96.112.146.18 0.0% 10 11.9 12.4 10.6 15.5 1.6 + 9.|-- 209.85.252.250 0.0% 10 13.7 13.2 12.5 13.9 0.0 + 10.|-- 108.170.242.238 0.0% 10 12.7 12.4 11.1 13.0 0.5 + 11.|-- 74.125.253.149 0.0% 10 13.4 13.7 11.8 19.2 2.1 + 12.|-- 216.239.62.40 0.0% 10 13.4 14.7 11.5 26.9 4.6 + 13.|-- 108.170.242.81 0.0% 10 14.4 13.2 10.9 16.0 1.7 + 14.|-- 72.14.239.43 0.0% 10 12.2 16.1 11.0 32.8 7.1 + 15.|-- 216.58.195.68 0.0% 10 25.1 15.3 11.1 25.1 4.8 +---- + +In this example of `mtr`, we can rapidly assess the path that your +packets are taking, as well as what their typical loss and latency are. +Packet loss typically leads to between `200ms` and `3s` of additional +latency, so that can be a common cause of latency issues. + +=== Network Throughput + +As Cassandra is sensitive to outgoing bandwidth limitations, sometimes +it is useful to determine if network throughput is limited. One handy +tool to do this is +https://www.systutorials.com/docs/linux/man/8-iftop/[iftop] which shows +both bandwidth usage as well as connection information at a glance. An +example showing traffic during a stress run against a local `ccm` +cluster: + +[source, bash] +---- +$ # remove the -t for ncurses instead of pure text +$ sudo iftop -nNtP -i lo +interface: lo +IP address is: 127.0.0.1 +MAC address is: 00:00:00:00:00:00 +Listening on lo + # Host name (port/service if enabled) last 2s last 10s last 40s cumulative +-------------------------------------------------------------------------------------------- + 1 127.0.0.1:58946 => 869Kb 869Kb 869Kb 217KB + 127.0.0.3:9042 <= 0b 0b 0b 0B + 2 127.0.0.1:54654 => 736Kb 736Kb 736Kb 184KB + 127.0.0.1:9042 <= 0b 0b 0b 0B + 3 127.0.0.1:51186 => 669Kb 669Kb 669Kb 167KB + 127.0.0.2:9042 <= 0b 0b 0b 0B + 4 127.0.0.3:9042 => 3.30Kb 3.30Kb 3.30Kb 845B + 127.0.0.1:58946 <= 0b 0b 0b 0B + 5 127.0.0.1:9042 => 2.79Kb 2.79Kb 2.79Kb 715B + 127.0.0.1:54654 <= 0b 0b 0b 0B + 6 127.0.0.2:9042 => 2.54Kb 2.54Kb 2.54Kb 650B + 127.0.0.1:51186 <= 0b 0b 0b 0B + 7 127.0.0.1:36894 => 1.65Kb 1.65Kb 1.65Kb 423B + 127.0.0.5:7000 <= 0b 0b 0b 0B + 8 127.0.0.1:38034 => 1.50Kb 1.50Kb 1.50Kb 385B + 127.0.0.2:7000 <= 0b 0b 0b 0B + 9 127.0.0.1:56324 => 1.50Kb 1.50Kb 1.50Kb 383B + 127.0.0.1:7000 <= 0b 0b 0b 0B + 10 127.0.0.1:53044 => 1.43Kb 1.43Kb 1.43Kb 366B + 127.0.0.4:7000 <= 0b 0b 0b 0B +-------------------------------------------------------------------------------------------- +Total send rate: 2.25Mb 2.25Mb 2.25Mb +Total receive rate: 0b 0b 0b +Total send and receive rate: 2.25Mb 2.25Mb 2.25Mb +-------------------------------------------------------------------------------------------- +Peak rate (sent/received/total): 2.25Mb 0b 2.25Mb +Cumulative (sent/received/total): 576KB 0B 576KB +============================================================================================ +---- + +In this case we can see that bandwidth is fairly shared between many +peers, but if the total was getting close to the rated capacity of the +NIC or was focussed on a single client, that may indicate a clue as to +what issue is occurring. + +== Advanced tools + +Sometimes as an operator you may need to really dive deep. This is where +advanced OS tooling can come in handy. + +[[use-bcc-tools]] +=== bcc-tools + +Most modern Linux distributions (kernels newer than `4.1`) support +https://github.com/iovisor/bcc[bcc-tools] for diving deep into +performance problems. First install `bcc-tools`, e.g. via `apt` on +Debian: + +[source, bash] +---- +$ apt install bcc-tools +---- + +Then you can use all the tools that `bcc-tools` contains. One of the +most useful tools is `cachestat` +(https://github.com/iovisor/bcc/blob/master/tools/cachestat_example.txt[cachestat +examples]) which allows you to determine exactly how many OS page cache +hits and misses are happening: + +[source, bash] +---- +$ sudo /usr/share/bcc/tools/cachestat -T 1 +TIME TOTAL MISSES HITS DIRTIES BUFFERS_MB CACHED_MB +18:44:08 66 66 0 64 88 4427 +18:44:09 40 40 0 75 88 4427 +18:44:10 4353 45 4308 203 88 4427 +18:44:11 84 77 7 13 88 4428 +18:44:12 2511 14 2497 14 88 4428 +18:44:13 101 98 3 18 88 4428 +18:44:14 16741 0 16741 58 88 4428 +18:44:15 1935 36 1899 18 88 4428 +18:44:16 89 34 55 18 88 4428 +---- + +In this case there are not too many page cache `MISSES` which indicates +a reasonably sized cache. These metrics are the most direct measurement +of your Cassandra node's "hot" dataset. If you don't have enough cache, +`MISSES` will be high and performance will be slow. If you have enough +cache, `MISSES` will be low and performance will be fast (as almost all +reads are being served out of memory). + +You can also measure disk latency distributions using `biolatency` +(https://github.com/iovisor/bcc/blob/master/tools/biolatency_example.txt[biolatency +examples]) to get an idea of how slow Cassandra will be when reads miss +the OS page Cache and have to hit disks: + +[source, bash] +---- +$ sudo /usr/share/bcc/tools/biolatency -D 10 +Tracing block device I/O... Hit Ctrl-C to end. + + +disk = 'sda' + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 0 | | + 16 -> 31 : 12 |****************************************| + 32 -> 63 : 9 |****************************** | + 64 -> 127 : 1 |*** | + 128 -> 255 : 3 |********** | + 256 -> 511 : 7 |*********************** | + 512 -> 1023 : 2 |****** | + +disk = 'sdc' + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 0 | | + 16 -> 31 : 0 | | + 32 -> 63 : 0 | | + 64 -> 127 : 41 |************ | + 128 -> 255 : 17 |***** | + 256 -> 511 : 13 |*** | + 512 -> 1023 : 2 | | + 1024 -> 2047 : 0 | | + 2048 -> 4095 : 0 | | + 4096 -> 8191 : 56 |***************** | + 8192 -> 16383 : 131 |****************************************| + 16384 -> 32767 : 9 |** | +---- + +In this case most ios on the data drive (`sdc`) are fast, but many take +between 8 and 16 milliseconds. + +Finally `biosnoop` +(https://github.com/iovisor/bcc/blob/master/tools/biosnoop_example.txt[examples]) +can be used to dive even deeper and see per IO latencies: + +[source, bash] +---- +$ sudo /usr/share/bcc/tools/biosnoop | grep java | head +0.000000000 java 17427 sdc R 3972458600 4096 13.58 +0.000818000 java 17427 sdc R 3972459408 4096 0.35 +0.007098000 java 17416 sdc R 3972401824 4096 5.81 +0.007896000 java 17416 sdc R 3972489960 4096 0.34 +0.008920000 java 17416 sdc R 3972489896 4096 0.34 +0.009487000 java 17427 sdc R 3972401880 4096 0.32 +0.010238000 java 17416 sdc R 3972488368 4096 0.37 +0.010596000 java 17427 sdc R 3972488376 4096 0.34 +0.011236000 java 17410 sdc R 3972488424 4096 0.32 +0.011825000 java 17427 sdc R 3972488576 16384 0.65 +... time passes +8.032687000 java 18279 sdc R 10899712 122880 3.01 +8.033175000 java 18279 sdc R 10899952 8192 0.46 +8.073295000 java 18279 sdc R 23384320 122880 3.01 +8.073768000 java 18279 sdc R 23384560 8192 0.46 +---- + +With `biosnoop` you see every single IO and how long they take. This +data can be used to construct the latency distributions in `biolatency` +but can also be used to better understand how disk latency affects +performance. For example this particular drive takes ~3ms to service a +memory mapped read due to the large default value (`128kb`) of +`read_ahead_kb`. To improve point read performance you may may want to +decrease `read_ahead_kb` on fast data volumes such as SSDs while keeping +the a higher value like `128kb` value is probably right for HDs. There +are tradeoffs involved, see +https://www.kernel.org/doc/Documentation/block/queue-sysfs.txt[queue-sysfs] +docs for more information, but regardless `biosnoop` is useful for +understanding _how_ Cassandra uses drives. + +[[use-vmtouch]] +=== vmtouch + +Sometimes it's useful to know how much of the Cassandra data files are +being cached by the OS. A great tool for answering this question is +https://github.com/hoytech/vmtouch[vmtouch]. + +First install it: + +[source, bash] +---- +$ git clone https://github.com/hoytech/vmtouch.git +$ cd vmtouch +$ make +---- + +Then run it on the Cassandra data directory: + +[source, bash] +---- +$ ./vmtouch /var/lib/cassandra/data/ + Files: 312 + Directories: 92 + Resident Pages: 62503/64308 244M/251M 97.2% + Elapsed: 0.005657 seconds +---- + +In this case almost the entire dataset is hot in OS page Cache. +Generally speaking the percentage doesn't really matter unless reads are +missing the cache (per e.g. xref:cql/troubleshooting/use_tools.adoc#use-bcc-tools[cachestat] in which case +having additional memory may help read performance. + +=== CPU Flamegraphs + +Cassandra often uses a lot of CPU, but telling _what_ it is doing can +prove difficult. One of the best ways to analyze Cassandra on CPU time +is to use +http://www.brendangregg.com/FlameGraphs/cpuflamegraphs.html[CPU +Flamegraphs] which display in a useful way which areas of Cassandra code +are using CPU. This may help narrow down a compaction problem to a +"compaction problem dropping tombstones" or just generally help you +narrow down what Cassandra is doing while it is having an issue. To get +CPU flamegraphs follow the instructions for +http://www.brendangregg.com/FlameGraphs/cpuflamegraphs.html#Java[Java +Flamegraphs]. + +Generally: + +[arabic] +. Enable the `-XX:+PreserveFramePointer` option in Cassandra's +`jvm.options` configuation file. This has a negligible performance +impact but allows you actually see what Cassandra is doing. +. Run `perf` to get some data. +. Send that data through the relevant scripts in the FlameGraph toolset +and convert the data into a pretty flamegraph. View the resulting SVG +image in a browser or other image browser. + +For example just cloning straight off github we first install the +`perf-map-agent` to the location of our JVMs (assumed to be +`/usr/lib/jvm`): + +[source, bash] +---- +$ sudo bash +$ export JAVA_HOME=/usr/lib/jvm/java-8-oracle/ +$ cd /usr/lib/jvm +$ git clone --depth=1 https://github.com/jvm-profiling-tools/perf-map-agent +$ cd perf-map-agent +$ cmake . +$ make +---- + +Now to get a flamegraph: + +[source, bash] +---- +$ git clone --depth=1 https://github.com/brendangregg/FlameGraph +$ sudo bash +$ cd FlameGraph +$ # Record traces of Cassandra and map symbols for all java processes +$ perf record -F 49 -a -g -p -- sleep 30; ./jmaps +$ # Translate the data +$ perf script > cassandra_stacks +$ cat cassandra_stacks | ./stackcollapse-perf.pl | grep -v cpu_idle | \ + ./flamegraph.pl --color=java --hash > cassandra_flames.svg +---- + +The resulting SVG is searchable, zoomable, and generally easy to +introspect using a browser. + +=== Packet Capture + +Sometimes you have to understand what queries a Cassandra node is +performing _right now_ to troubleshoot an issue. For these times trusty +packet capture tools like `tcpdump` and +https://www.wireshark.org/[Wireshark] can be very helpful to dissect +packet captures. Wireshark even has native +https://www.wireshark.org/docs/dfref/c/cql.html[CQL support] although it +sometimes has compatibility issues with newer Cassandra protocol +releases. + +To get a packet capture first capture some packets: + +[source, bash] +---- +$ sudo tcpdump -U -s0 -i -w cassandra.pcap -n "tcp port 9042" +---- + +Now open it up with wireshark: + +[source, bash] +---- +$ wireshark cassandra.pcap +---- + +If you don't see CQL like statements try telling to decode as CQL by +right clicking on a packet going to 9042 -> `Decode as` -> select CQL +from the dropdown for port 9042. + +If you don't want to do this manually or use a GUI, you can also use +something like https://github.com/jolynch/cqltrace[cqltrace] to ease +obtaining and parsing CQL packet captures. diff --git a/doc/modules/cassandra/partials/java_version.adoc b/doc/modules/cassandra/partials/java_version.adoc new file mode 100644 index 000000000000..dddc1132dea3 --- /dev/null +++ b/doc/modules/cassandra/partials/java_version.adoc @@ -0,0 +1,23 @@ +[arabic, start=1] +. Verify the version of Java installed. For example: + +[{tabs}] +==== +Command:: ++ +-- +[source,shell] +---- +include::example$BASH/java_verify.sh[] +---- +-- + +Result:: ++ +-- +[source,plaintext] +---- +include::example$RESULTS/java_verify.result[] +---- +-- +==== diff --git a/doc/modules/cassandra/partials/nodetool_and_cqlsh.adoc b/doc/modules/cassandra/partials/nodetool_and_cqlsh.adoc new file mode 100644 index 000000000000..d1c4e73a2f3c --- /dev/null +++ b/doc/modules/cassandra/partials/nodetool_and_cqlsh.adoc @@ -0,0 +1,21 @@ +NOTE: For information on how to configure your installation, see +{cass_url}doc/latest/getting_started/configuring.html[Configuring +Cassandra]. + +[arabic, start=7] +. Check the status of Cassandra: + +[source,shell] +---- +include::example$BASH/nodetool_status.sh[] +---- + +The status column in the output should report `UN` which stands for +"Up/Normal". + +Alternatively, connect to the database with: + +[source,shell] +---- +include::example$BASH/run_cqlsh.sh[] +---- diff --git a/doc/modules/cassandra/partials/nodetool_and_cqlsh_nobin.adoc b/doc/modules/cassandra/partials/nodetool_and_cqlsh_nobin.adoc new file mode 100644 index 000000000000..c17949c4bcea --- /dev/null +++ b/doc/modules/cassandra/partials/nodetool_and_cqlsh_nobin.adoc @@ -0,0 +1,21 @@ +NOTE: For information on how to configure your installation, see +{cass_url}doc/latest/getting_started/configuring.html[Configuring +Cassandra]. + +[arabic, start=7] +. Check the status of Cassandra: + +[source,shell] +---- +include::example$BASH/nodetool_status_nobin.sh[] +---- + +The status column in the output should report `UN` which stands for +"Up/Normal". + +Alternatively, connect to the database with: + +[source,shell] +---- +include::example$BASH/run_cqlsh_nobin.sh[] +---- diff --git a/doc/modules/cassandra/partials/package_versions.adoc b/doc/modules/cassandra/partials/package_versions.adoc new file mode 100644 index 000000000000..f5c89689eecf --- /dev/null +++ b/doc/modules/cassandra/partials/package_versions.adoc @@ -0,0 +1,5 @@ +The latest major version is {311_version} and the +corresponding distribution name is `311x` (with an "x" as the suffix). +For older releases use `30x` for {30_version}, `22x` for {22_version} and +`21x` for {21_version}. +For example, to add the repository for version {311_version} (`311x`): diff --git a/doc/modules/cassandra/partials/tail_syslog.adoc b/doc/modules/cassandra/partials/tail_syslog.adoc new file mode 100644 index 000000000000..b5dd8f3d2988 --- /dev/null +++ b/doc/modules/cassandra/partials/tail_syslog.adoc @@ -0,0 +1,25 @@ +[arabic, start=6] +. Monitor the progress of the startup with: + +[{tabs}] +==== +Command:: ++ +-- +[source,shell] +---- +include::example$BASH/tail_syslog.sh[] +---- +-- + +Result:: ++ +-- +Cassandra is ready when you see an entry like this in the `system.log`: + +[source,plaintext] +---- +include::example$RESULTS/tail_syslog.result[] +---- +-- +==== diff --git a/doc/convert_yaml_to_rst.py b/doc/scripts/convert_yaml_to_adoc.py similarity index 86% rename from doc/convert_yaml_to_rst.py rename to doc/scripts/convert_yaml_to_adoc.py index c17bbbb20854..5eff522a00d0 100644 --- a/doc/convert_yaml_to_rst.py +++ b/doc/scripts/convert_yaml_to_adoc.py @@ -15,12 +15,15 @@ # limitations under the License. """ -A script to convert cassandra.yaml into ReStructuredText for +A script to convert cassandra.yaml into Asciidoc for the online documentation. Usage: - convert_yaml_to_rest.py conf/cassandra.yaml docs/source/conf.rst +YAML_INPUT=conf/cassandra.yaml +YAML_OUTPUT=modules/cassandra/pages/configuration/cass_yaml_file.adoc + + convert_yaml_to_adoc.py $YAML_INPUT $YAML_OUTPUT """ import sys @@ -41,7 +44,6 @@ # that these can be commented out (making it useless to use a yaml parser). COMPLEX_OPTIONS = ( 'seed_provider', - 'request_scheduler_options', 'data_file_directories', 'commitlog_compression', 'hints_compression', @@ -51,17 +53,13 @@ 'hinted_handoff_disabled_datacenters' ) - def convert(yaml_file, dest_file): with open(yaml_file, 'r') as f: # Trim off the boilerplate header lines = f.readlines()[7:] with open(dest_file, 'w') as outfile: - outfile.write(".. _cassandra-yaml:\n") - outfile.write("\n") - outfile.write("Cassandra Configuration File\n") - outfile.write("============================\n") + outfile.write("= cassandra.yaml file configuration\n") # since comments preceed an option, this holds all of the comment # lines we've seen since the last option @@ -98,8 +96,7 @@ def convert(yaml_file, dest_file): def write_section_header(option_name, outfile): outfile.write("\n") - outfile.write("``%s``\n" % (option_name,)) - outfile.write("-" * (len(option_name) + 4) + "\n") + outfile.write("== `%s`\n\n" % (option_name,)) def write_comments(comment_lines, is_commented, outfile): @@ -114,7 +111,7 @@ def write_comments(comment_lines, is_commented, outfile): def maybe_write_default_value(option_match, outfile): default_value = option_match.group(3) if default_value and default_value != "\n": - outfile.write("\n*Default Value:* %s\n" % (default_value,)) + outfile.write("\n_Default Value:_ %s\n" % (default_value,)) def read_complex_option(line_iter): @@ -131,14 +128,15 @@ def read_complex_option(line_iter): def write_complex_option(lines, outfile): - outfile.write("\n*Default Value (complex option)*::\n\n") + outfile.write("\n_Default Value (complex option)_:\n\n....\n") for line in lines: outfile.write((" " * 4) + line) + outfile.write("....\n") if __name__ == '__main__': if len(sys.argv) != 3: - print >> sys.stderr, "Usage: %s " % (sys.argv[0],) + print >> sys.stderr, "Usage: %s " % (sys.argv[0],) sys.exit(1) yaml_file = sys.argv[1] diff --git a/doc/scripts/gen-nodetool-docs.py b/doc/scripts/gen-nodetool-docs.py new file mode 100644 index 000000000000..1903dca8763c --- /dev/null +++ b/doc/scripts/gen-nodetool-docs.py @@ -0,0 +1,83 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +A script to use nodetool to generate documentation for nodetool +""" +from __future__ import print_function + +import os +import re +import sys +import subprocess +from subprocess import PIPE +from subprocess import Popen + +if(os.environ.get("SKIP_NODETOOL") == "1"): + sys.exit(0) + + +nodetool = "../bin/nodetool" +outdir = "modules/cassandra/pages/tools/nodetool" +examplesdir = "modules/cassandra/examples/TEXT/NODETOOL" +helpfilename = outdir + "/nodetool.txt" +command_re = re.compile("( )([_a-z]+)") +commandADOCContent = "== {0}\n\n== Usage\n[source,plaintext]\n----\ninclude::example$TEXT/NODETOOL/{0}.txt[]\n----\n" + +# create the documentation directory +if not os.path.exists(outdir): + os.makedirs(outdir) + +# create the base help file to use for discovering the commands +def create_help_file(): + with open(helpfilename, "w+") as output_file: + try: + subprocess.check_call([nodetool, "help"], stdout=output_file) + except subprocess.CalledProcessError as cpe: + print( + 'ERROR: Nodetool failed to run, you likely need to build ' + 'cassandra using ant jar from the top level directory' + ) + raise cpe + +# for a given command, create the help file and an ADOC file to contain it +def create_adoc(command): + if command: + cmdName = command.group(0).strip() + cmdFilename = examplesdir + "/" + cmdName + ".txt" + adocFilename = outdir + "/" + cmdName + ".adoc" + with open(cmdFilename, "wb+") as cmdFile: + proc = Popen([nodetool, "help", cmdName], stdin=PIPE, stdout=PIPE) + (out, err) = proc.communicate() + cmdFile.write(out) + with open(adocFilename, "w+") as adocFile: + adocFile.write(commandADOCContent.format(cmdName,cmdName,cmdName)) + +# create base file +create_help_file() + +# create the main usage page +with open(outdir + "/nodetool.adoc", "w+") as output: + with open(helpfilename, "r+") as helpfile: + output.write("== Nodetool\n\n== Usage\n\n") + for commandLine in helpfile: + command = command_re.sub(r'\nxref:tools/nodetool/\2.adoc[\2] - ',commandLine) + output.write(command) + +# create the command usage pages +with open(helpfilename, "r+") as helpfile: + for commandLine in helpfile: + command = command_re.match(commandLine) + create_adoc(command) diff --git a/doc/source/_static/extra.css b/doc/source/_static/extra.css deleted file mode 100644 index c2972bd09a28..000000000000 --- a/doc/source/_static/extra.css +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -div:not(.highlight) > pre { - background: #fff; - border: 1px solid #e1e4e5; - color: #404040; - margin: 1px 0 24px 0; - overflow-x: auto; - padding: 12px 12px; - font-size: 12px; -} - -a.reference.internal code.literal { - border: none; - font-size: 12px; - color: #2980B9; - padding: 0; - background: none; -} - -a.reference.internal:visited code.literal { - color: #9B59B6; - padding: 0; - background: none; -} - - -/* override table width restrictions */ -.wy-table-responsive table td, .wy-table-responsive table th { - white-space: normal; -} - -.wy-table-responsive { - margin-bottom: 24px; - max-width: 100%; - overflow: visible; -} - -table.contentstable { - margin: 0; -} - -td.rightcolumn { - padding-left: 30px; -} - -div#wipwarning { - font-size: 14px; - border: 1px solid #ecc; - color: #f66; - background: #ffe8e8; - padding: 10px 30px; - margin-bottom: 30px; -} -.content-container{ - padding-right: 15px; - padding-left: 15px; - margin-right: auto; - margin-left: auto; - width:100%; -} diff --git a/doc/source/_templates/indexcontent.html b/doc/source/_templates/indexcontent.html deleted file mode 100644 index e8f22240b0f3..000000000000 --- a/doc/source/_templates/indexcontent.html +++ /dev/null @@ -1,89 +0,0 @@ -{% extends "layout.html" %} -{# Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. #} -{%- block htmltitle -%} -{{ html_title }} -{%- endblock -%} -{% block body %} -

{{ docstitle|e }}

-

- {% trans %}Welcome! This is the documentation for Apache Cassandra {{ version }}.{% endtrans %} -

-
This documentation is a work-in-progress. - Contributions are welcome.
- -

Main documentation

- -
- - - - - - - - - - - - - - - - - - - - -
- - - -
- - - -
- - - -
- - - -
- - - -
- -

Meta informations

- - - -{% endblock %} diff --git a/doc/source/_theme/cassandra_theme/defindex.html b/doc/source/_theme/cassandra_theme/defindex.html deleted file mode 100644 index 3310c7bf2638..000000000000 --- a/doc/source/_theme/cassandra_theme/defindex.html +++ /dev/null @@ -1,40 +0,0 @@ ---- -{# - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitatins under the License. #} -layout: doclandingpage -title: "Documentation" -is_homepage: false -is_sphinx_doc: false ---- -{% block body %} -

{{ docstitle|e }}

- {% block tables %} -

{{ _('Indices and tables:') }}

- - -
- - - - - -
- {% endblock %} -{% endblock %} diff --git a/doc/source/_theme/cassandra_theme/layout.html b/doc/source/_theme/cassandra_theme/layout.html deleted file mode 100644 index e53c53797dd0..000000000000 --- a/doc/source/_theme/cassandra_theme/layout.html +++ /dev/null @@ -1,108 +0,0 @@ ---- -{# - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. #} -layout: docpage -{% block title %} -title: "Documentation" -{% endblock%} -is_homepage: false -is_sphinx_doc: true -{% for doc in parents %} -doc-parent: "{{ doc.title }}" -{% endfor %} -doc-title: "{{ title }}" -doc-header-links: ' - - {%- if parents %} - - {%- endif %} - {%- if next %} - - {%- endif %} - {%- if prev %} - - {%- endif %} -' -doc-search-path: "{{ pathto('search') }}" -{% block extrafooter %} -extra-footer: ' - -' -{% endblock %} ---- -
-
-
-
- -
-
-
-
-
- {% block body %}{% endblock %} - - {% if next or prev %} - - {% endif %} -
-
-
-
-
diff --git a/doc/source/_theme/cassandra_theme/search.html b/doc/source/_theme/cassandra_theme/search.html deleted file mode 100644 index d379a705366f..000000000000 --- a/doc/source/_theme/cassandra_theme/search.html +++ /dev/null @@ -1,67 +0,0 @@ -{# Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. #} -{%- extends "layout.html" %} -{% block title %} -title: "{{_('Search')}}" -{% endblock %} -{% block extrafooter %} -extra-footer: ' - - - {# this is used when loading the search index using $.ajax fails, - such as on Chrome for documents on localhost #} - -' -{% endblock %} -{% block body %} - - - {% if search_performed %} -

{{ _('Search Results') }}

- {% if not search_results %} -

{{ _('Your search did not match any documents. Please make sure that all words are spelled correctly.') }}

- {% endif %} - {% endif %} -
- {% if search_results %} -
    - {% for href, caption, context in search_results %} -
  • - {{ caption }} -

    {{ context|e }}

    -
  • - {% endfor %} -
- {% endif %} -
-{% endblock %} diff --git a/doc/source/_theme/cassandra_theme/theme.conf b/doc/source/_theme/cassandra_theme/theme.conf deleted file mode 100644 index 42c0704b507d..000000000000 --- a/doc/source/_theme/cassandra_theme/theme.conf +++ /dev/null @@ -1,3 +0,0 @@ -[theme] -inherit = basic -stylesheet = none diff --git a/doc/source/_util/cql.py b/doc/source/_util/cql.py deleted file mode 100644 index 023700b7bc9b..000000000000 --- a/doc/source/_util/cql.py +++ /dev/null @@ -1,283 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -""" - CQL pygments lexer - ~~~~~~~~~~~~~~~~~~ - - Lexer for the Cassandra Query Language (CQL). - - This is heavily inspired from the pygments SQL lexer (and the Postgres one in particular) but adapted to CQL - keywords and specificities. - - TODO: This has been hacked quickly, but once it's more tested, we could submit it upstream. - In particular, we have alot of keywords whose meaning depends on the context and we could potentially improve - their handling. For instance, SET is a keyword, but also a type name (that's why currently we also consider - map and list as keywords, not types; we could disambiguate by looking if there is a '<' afterwards). Or things - like USERS, which can is used in some documentation example as a table name but is a keyword too (we could - only consider it a keyword if after LIST for instance). Similarly, type nanes are not reserved, so they and - are sometime used as column identifiers (also, timestamp is both a type and a keyword). I "think" we can - somewhat disambiguate through "states", but unclear how far it's worth going. - - We could also add the predefined functions? -""" - -import re - -from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words -from pygments.token import Punctuation, Whitespace, Error, \ - Text, Comment, Operator, Keyword, Name, String, Number, Generic, Literal -from pygments.lexers import get_lexer_by_name, ClassNotFound - -__all__ = [ 'CQLLexer' ] - -language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE) - -KEYWORDS = ( - 'SELECT', - 'FROM', - 'AS', - 'WHERE', - 'AND', - 'KEY', - 'KEYS', - 'ENTRIES', - 'FULL', - 'INSERT', - 'UPDATE', - 'WITH', - 'LIMIT', - 'PER', - 'PARTITION', - 'USING', - 'USE', - 'DISTINCT', - 'COUNT', - 'SET', - 'BEGIN', - 'UNLOGGED', - 'BATCH', - 'APPLY', - 'TRUNCATE', - 'DELETE', - 'IN', - 'CREATE', - 'KEYSPACE', - 'SCHEMA', - 'KEYSPACES', - 'COLUMNFAMILY', - 'TABLE', - 'MATERIALIZED', - 'VIEW', - 'INDEX', - 'CUSTOM', - 'ON', - 'TO', - 'DROP', - 'PRIMARY', - 'INTO', - 'VALUES', - 'TIMESTAMP', - 'TTL', - 'CAST', - 'ALTER', - 'RENAME', - 'ADD', - 'TYPE', - 'COMPACT', - 'STORAGE', - 'ORDER', - 'BY', - 'ASC', - 'DESC', - 'ALLOW', - 'FILTERING', - 'IF', - 'IS', - 'CONTAINS', - 'GRANT', - 'ALL', - 'PERMISSION', - 'PERMISSIONS', - 'OF', - 'REVOKE', - 'MODIFY', - 'AUTHORIZE', - 'DESCRIBE', - 'EXECUTE', - 'NORECURSIVE', - 'MBEAN', - 'MBEANS', - 'USER', - 'USERS', - 'ROLE', - 'ROLES', - 'SUPERUSER', - 'NOSUPERUSER', - 'PASSWORD', - 'LOGIN', - 'NOLOGIN', - 'OPTIONS', - 'CLUSTERING', - 'TOKEN', - 'WRITETIME', - 'NULL', - 'NOT', - 'EXISTS', - 'MAP', - 'LIST', - 'NAN', - 'INFINITY', - 'TUPLE', - 'TRIGGER', - 'STATIC', - 'FROZEN', - 'FUNCTION', - 'FUNCTIONS', - 'AGGREGATE', - 'SFUNC', - 'STYPE', - 'FINALFUNC', - 'INITCOND', - 'RETURNS', - 'CALLED', - 'INPUT', - 'LANGUAGE', - 'OR', - 'REPLACE', - 'JSON', - 'LIKE', -) - -DATATYPES = ( - 'ASCII', - 'BIGINT', - 'BLOB', - 'BOOLEAN', - 'COUNTER', - 'DATE', - 'DECIMAL', - 'DOUBLE', - 'EMPTY', - 'FLOAT', - 'INET', - 'INT', - 'SMALLINT', - 'TEXT', - 'TIME', - 'TIMESTAMP', - 'TIMEUUID', - 'TINYINT', - 'UUID', - 'VARCHAR', - 'VARINT', -) - -def language_callback(lexer, match): - """Parse the content of a $-string using a lexer - - The lexer is chosen looking for a nearby LANGUAGE or assumed as - java if no LANGUAGE has been found. - """ - l = None - m = language_re.match(lexer.text[max(0, match.start()-100):match.start()]) - if m is not None: - l = lexer._get_lexer(m.group(1)) - else: - l = lexer._get_lexer('java') - - # 1 = $, 2 = delimiter, 3 = $ - yield (match.start(1), String, match.group(1)) - yield (match.start(2), String.Delimiter, match.group(2)) - yield (match.start(3), String, match.group(3)) - # 4 = string contents - if l: - for x in l.get_tokens_unprocessed(match.group(4)): - yield x - else: - yield (match.start(4), String, match.group(4)) - # 5 = $, 6 = delimiter, 7 = $ - yield (match.start(5), String, match.group(5)) - yield (match.start(6), String.Delimiter, match.group(6)) - yield (match.start(7), String, match.group(7)) - - -class CQLLexer(RegexLexer): - """ - Lexer for the Cassandra Query Language. - """ - - name = 'Cassandra Query Language' - aliases = ['cql'] - filenames = ['*.cql'] - mimetypes = ['text/x-cql'] - - flags = re.IGNORECASE - tokens = { - 'root': [ - (r'\s+', Text), - (r'--.*\n?', Comment.Single), - (r'//.*\n?', Comment.Single), - (r'/\*', Comment.Multiline, 'multiline-comments'), - (r'(' + '|'.join(s.replace(" ", "\s+") - for s in DATATYPES) - + r')\b', Name.Builtin), - (words(KEYWORDS, suffix=r'\b'), Keyword), - (r'[+*/<>=~!@#%^&|`?-]+', Operator), - (r'\$\d+', Name.Variable), - - # Using Number instead of the more accurate Literal because the latter don't seem to e highlighted in most - # styles - (r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}', Number), # UUIDs - (r'0x[0-9a-fA-F]+', Number), # Blobs - - (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float), - (r'[0-9]+', Number.Integer), - (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'), - # quoted identifier - (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'), - (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback), - (r'[a-z_]\w*', Name), - (r'[;:()\[\]{},.]', Punctuation), - ], - 'multiline-comments': [ - (r'/\*', Comment.Multiline, 'multiline-comments'), - (r'\*/', Comment.Multiline, '#pop'), - (r'[^/*]+', Comment.Multiline), - (r'[/*]', Comment.Multiline) - ], - 'string': [ - (r"[^']+", String.Single), - (r"''", String.Single), - (r"'", String.Single, '#pop'), - ], - 'quoted-ident': [ - (r'[^"]+', String.Name), - (r'""', String.Name), - (r'"', String.Name, '#pop'), - ], - } - - def get_tokens_unprocessed(self, text, *args): - # Have a copy of the entire text to be used by `language_callback`. - self.text = text - for x in RegexLexer.get_tokens_unprocessed(self, text, *args): - yield x - - def _get_lexer(self, lang): - return get_lexer_by_name(lang, **self.options) diff --git a/doc/source/architecture/dynamo.rst b/doc/source/architecture/dynamo.rst deleted file mode 100644 index a7dbb8750158..000000000000 --- a/doc/source/architecture/dynamo.rst +++ /dev/null @@ -1,139 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -Dynamo ------- - -.. _gossip: - -Gossip -^^^^^^ - -.. todo:: todo - -Failure Detection -^^^^^^^^^^^^^^^^^ - -.. todo:: todo - -Token Ring/Ranges -^^^^^^^^^^^^^^^^^ - -.. todo:: todo - -.. _replication-strategy: - -Replication -^^^^^^^^^^^ - -The replication strategy of a keyspace determines which nodes are replicas for a given token range. The two main -replication strategies are :ref:`simple-strategy` and :ref:`network-topology-strategy`. - -.. _simple-strategy: - -SimpleStrategy -~~~~~~~~~~~~~~ - -SimpleStrategy allows a single integer ``replication_factor`` to be defined. This determines the number of nodes that -should contain a copy of each row. For example, if ``replication_factor`` is 3, then three different nodes should store -a copy of each row. - -SimpleStrategy treats all nodes identically, ignoring any configured datacenters or racks. To determine the replicas -for a token range, Cassandra iterates through the tokens in the ring, starting with the token range of interest. For -each token, it checks whether the owning node has been added to the set of replicas, and if it has not, it is added to -the set. This process continues until ``replication_factor`` distinct nodes have been added to the set of replicas. - -.. _network-topology-strategy: - -NetworkTopologyStrategy -~~~~~~~~~~~~~~~~~~~~~~~ - -NetworkTopologyStrategy allows a replication factor to be specified for each datacenter in the cluster. Even if your -cluster only uses a single datacenter, NetworkTopologyStrategy should be prefered over SimpleStrategy to make it easier -to add new physical or virtual datacenters to the cluster later. - -In addition to allowing the replication factor to be specified per-DC, NetworkTopologyStrategy also attempts to choose -replicas within a datacenter from different racks. If the number of racks is greater than or equal to the replication -factor for the DC, each replica will be chosen from a different rack. Otherwise, each rack will hold at least one -replica, but some racks may hold more than one. Note that this rack-aware behavior has some potentially `surprising -implications `_. For example, if there are not an even number of -nodes in each rack, the data load on the smallest rack may be much higher. Similarly, if a single node is bootstrapped -into a new rack, it will be considered a replica for the entire ring. For this reason, many operators choose to -configure all nodes on a single "rack". - -Tunable Consistency -^^^^^^^^^^^^^^^^^^^ - -Cassandra supports a per-operation tradeoff between consistency and availability through *Consistency Levels*. -Essentially, an operation's consistency level specifies how many of the replicas need to respond to the coordinator in -order to consider the operation a success. - -The following consistency levels are available: - -``ONE`` - Only a single replica must respond. - -``TWO`` - Two replicas must respond. - -``THREE`` - Three replicas must respond. - -``QUORUM`` - A majority (n/2 + 1) of the replicas must respond. - -``ALL`` - All of the replicas must respond. - -``LOCAL_QUORUM`` - A majority of the replicas in the local datacenter (whichever datacenter the coordinator is in) must respond. - -``EACH_QUORUM`` - A majority of the replicas in each datacenter must respond. - -``LOCAL_ONE`` - Only a single replica must respond. In a multi-datacenter cluster, this also gaurantees that read requests are not - sent to replicas in a remote datacenter. - -``ANY`` - A single replica may respond, or the coordinator may store a hint. If a hint is stored, the coordinator will later - attempt to replay the hint and deliver the mutation to the replicas. This consistency level is only accepted for - write operations. - -Write operations are always sent to all replicas, regardless of consistency level. The consistency level simply -controls how many responses the coordinator waits for before responding to the client. - -For read operations, the coordinator generally only issues read commands to enough replicas to satisfy the consistency -level. There are a couple of exceptions to this: - -- Speculative retry may issue a redundant read request to an extra replica if the other replicas have not responded - within a specified time window. -- Based on ``read_repair_chance`` and ``dclocal_read_repair_chance`` (part of a table's schema), read requests may be - randomly sent to all replicas in order to repair potentially inconsistent data. - -Picking Consistency Levels -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -It is common to pick read and write consistency levels that are high enough to overlap, resulting in "strong" -consistency. This is typically expressed as ``W + R > RF``, where ``W`` is the write consistency level, ``R`` is the -read consistency level, and ``RF`` is the replication factor. For example, if ``RF = 3``, a ``QUORUM`` request will -require responses from at least two of the three replicas. If ``QUORUM`` is used for both writes and reads, at least -one of the replicas is guaranteed to participate in *both* the write and the read request, which in turn guarantees that -the latest write will be read. In a multi-datacenter environment, ``LOCAL_QUORUM`` can be used to provide a weaker but -still useful guarantee: reads are guaranteed to see the latest write from within the same datacenter. - -If this type of strong consistency isn't required, lower consistency levels like ``ONE`` may be used to improve -throughput, latency, and availability. diff --git a/doc/source/architecture/guarantees.rst b/doc/source/architecture/guarantees.rst deleted file mode 100644 index c0b58d880fb4..000000000000 --- a/doc/source/architecture/guarantees.rst +++ /dev/null @@ -1,20 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -Guarantees ----------- - -.. todo:: todo diff --git a/doc/source/architecture/index.rst b/doc/source/architecture/index.rst deleted file mode 100644 index 58eda137795e..000000000000 --- a/doc/source/architecture/index.rst +++ /dev/null @@ -1,29 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -Architecture -============ - -This section describes the general architecture of Apache Cassandra. - -.. toctree:: - :maxdepth: 2 - - overview - dynamo - storage_engine - guarantees - diff --git a/doc/source/architecture/overview.rst b/doc/source/architecture/overview.rst deleted file mode 100644 index 005b15b94cfb..000000000000 --- a/doc/source/architecture/overview.rst +++ /dev/null @@ -1,20 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -Overview --------- - -.. todo:: todo diff --git a/doc/source/architecture/storage_engine.rst b/doc/source/architecture/storage_engine.rst deleted file mode 100644 index 2bd429d0b6d2..000000000000 --- a/doc/source/architecture/storage_engine.rst +++ /dev/null @@ -1,129 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -Storage Engine --------------- - -.. _commit-log: - -CommitLog -^^^^^^^^^ - -Commitlogs are an append only log of all mutations local to a Cassandra node. Any data written to Cassandra will first be written to a commit log before being written to a memtable. This provides durability in the case of unexpected shutdown. On startup, any mutations in the commit log will be applied to memtables. - -All mutations write optimized by storing in commitlog segments, reducing the number of seeks needed to write to disk. Commitlog Segments are limited by the "commitlog_segment_size_in_mb" option, once the size is reached, a new commitlog segment is created. Commitlog segments can be archived, deleted, or recycled once all its data has been flushed to SSTables. Commitlog segments are truncated when Cassandra has written data older than a certain point to the SSTables. Running "nodetool drain" before stopping Cassandra will write everything in the memtables to SSTables and remove the need to sync with the commitlogs on startup. - -- ``commitlog_segment_size_in_mb``: The default size is 32, which is almost always fine, but if you are archiving commitlog segments (see commitlog_archiving.properties), then you probably want a finer granularity of archiving; 8 or 16 MB is reasonable. Max mutation size is also configurable via max_mutation_size_in_kb setting in cassandra.yaml. The default is half the size commitlog_segment_size_in_mb * 1024. - -***NOTE: If max_mutation_size_in_kb is set explicitly then commitlog_segment_size_in_mb must be set to at least twice the size of max_mutation_size_in_kb / 1024*** - -*Default Value:* 32 - -Commitlogs are an append only log of all mutations local to a Cassandra node. Any data written to Cassandra will first be written to a commit log before being written to a memtable. This provides durability in the case of unexpected shutdown. On startup, any mutations in the commit log will be applied. - -- ``commitlog_sync``: may be either “periodic” or “batch.” - - - ``batch``: In batch mode, Cassandra won’t ack writes until the commit log has been fsynced to disk. It will wait "commitlog_sync_batch_window_in_ms" milliseconds between fsyncs. This window should be kept short because the writer threads will be unable to do extra work while waiting. You may need to increase concurrent_writes for the same reason. - - - ``commitlog_sync_batch_window_in_ms``: Time to wait between "batch" fsyncs - *Default Value:* 2 - - - ``periodic``: In periodic mode, writes are immediately ack'ed, and the CommitLog is simply synced every "commitlog_sync_period_in_ms" milliseconds. - - - ``commitlog_sync_period_in_ms``: Time to wait between "periodic" fsyncs - *Default Value:* 10000 - -*Default Value:* periodic - -*** NOTE: In the event of an unexpected shutdown, Cassandra can lose up to the sync period or more if the sync is delayed. If using "batch" mode, it is recommended to store commitlogs in a separate, dedicated device.** - - -- ``commitlog_directory``: This option is commented out by default When running on magnetic HDD, this should be a separate spindle than the data directories. If not set, the default directory is $CASSANDRA_HOME/data/commitlog. - -*Default Value:* /var/lib/cassandra/commitlog - -- ``commitlog_compression``: Compression to apply to the commitlog. If omitted, the commit log will be written uncompressed. LZ4, Snappy, Deflate and Zstd compressors are supported. - -(Default Value: (complex option):: - - # - class_name: LZ4Compressor - # parameters: - # - - -- ``commitlog_total_space_in_mb``: Total space to use for commit logs on disk. - -If space gets above this value, Cassandra will flush every dirty CF in the oldest segment and remove it. So a small total commitlog space will tend to cause more flush activity on less-active columnfamilies. - -The default value is the smaller of 8192, and 1/4 of the total space of the commitlog volume. - -*Default Value:* 8192 - -.. _memtables: - -Memtables -^^^^^^^^^ - -Memtables are in-memory structures where Cassandra buffers writes. In general, there is one active memtable per table. -Eventually, memtables are flushed onto disk and become immutable `SSTables`_. This can be triggered in several -ways: - -- The memory usage of the memtables exceeds the configured threshold (see ``memtable_cleanup_threshold``) -- The :ref:`commit-log` approaches its maximum size, and forces memtable flushes in order to allow commitlog segments to - be freed - -Memtables may be stored entirely on-heap or partially off-heap, depending on ``memtable_allocation_type``. - -SSTables -^^^^^^^^ - -SSTables are the immutable data files that Cassandra uses for persisting data on disk. - -As SSTables are flushed to disk from :ref:`memtables` or are streamed from other nodes, Cassandra triggers compactions -which combine multiple SSTables into one. Once the new SSTable has been written, the old SSTables can be removed. - -Each SSTable is comprised of multiple components stored in separate files: - -``Data.db`` - The actual data, i.e. the contents of rows. - -``Index.db`` - An index from partition keys to positions in the ``Data.db`` file. For wide partitions, this may also include an - index to rows within a partition. - -``Summary.db`` - A sampling of (by default) every 128th entry in the ``Index.db`` file. - -``Filter.db`` - A Bloom Filter of the partition keys in the SSTable. - -``CompressionInfo.db`` - Metadata about the offsets and lengths of compression chunks in the ``Data.db`` file. - -``Statistics.db`` - Stores metadata about the SSTable, including information about timestamps, tombstones, clustering keys, compaction, - repair, compression, TTLs, and more. - -``Digest.crc32`` - A CRC-32 digest of the ``Data.db`` file. - -``TOC.txt`` - A plain text list of the component files for the SSTable. - -Within the ``Data.db`` file, rows are organized by partition. These partitions are sorted in token order (i.e. by a -hash of the partition key when the default partitioner, ``Murmur3Partition``, is used). Within a partition, rows are -stored in the order of their clustering keys. - -SSTables can be optionally compressed using block-based compression. diff --git a/doc/source/bugs.rst b/doc/source/bugs.rst deleted file mode 100644 index 240cfd495981..000000000000 --- a/doc/source/bugs.rst +++ /dev/null @@ -1,30 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -Reporting Bugs and Contributing -=============================== - -If you encounter a problem with Cassandra, the first places to ask for help are the :ref:`user mailing list -` and the ``#cassandra`` :ref:`IRC channel `. - -If, after having asked for help, you suspect that you have found a bug in Cassandra, you should report it by opening a -ticket through the `Apache Cassandra JIRA `__. Please provide as much -details as you can on your problem, and don't forget to indicate which version of Cassandra you are running and on which -environment. - -Further details on how to contribute can be found at our :doc:`development/index` section. Please note that the source of -this documentation is part of the Cassandra git repository and hence contributions to the documentation should follow the -same path. diff --git a/doc/source/conf.py b/doc/source/conf.py deleted file mode 100644 index 7143b23b46c0..000000000000 --- a/doc/source/conf.py +++ /dev/null @@ -1,441 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -# Apache Cassandra Documentation documentation build configuration file -# -# This file is execfile()d with the current directory set to its containing -# dir. -import re, sys, os - -# Finds out the version (so we don't have to manually edit that file every -# time we change the version) -cassandra_build_file = '../../build.xml' -with open(cassandra_build_file) as f: - m = re.search("name=\"base\.version\" value=\"([^\"]+)\"", f.read()) - if not m or m.lastindex != 1: - raise RuntimeException("Problem finding version in build.xml file, this shouldn't happen.") - cassandra_version = m.group(1) - -def setup(sphinx): - sys.path.insert(0, os.path.abspath('./source/_util')) - from cql import CQLLexer - sphinx.add_lexer("cql", CQLLexer()) - -# Ugly way to find out if we're building for the website (the Makefile creates an empty file for us) -build_for_website = os.path.isfile('.build_for_website') - -# -- General configuration ------------------------------------------------ - -# If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - 'sphinx.ext.todo', - 'sphinx.ext.mathjax', - 'sphinx.ext.ifconfig', - 'sphinx.ext.extlinks', -] - -extlinks = { - 'jira': ( 'https://issues.apache.org/jira/browse/CASSANDRA-%s', 'CASSANDRA-') -} - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -source_suffix = ['.rst'] - -# The encoding of source files. -# -# source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = u'Apache Cassandra' -copyright = u'2016, The Apache Cassandra team' -author = u'The Apache Cassandra team' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -version = cassandra_version - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -# -# today = '' -# -# Else, today_fmt is used as the format for a strftime call. -# -# today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This patterns also effect to html_static_path and html_extra_path -exclude_patterns = [] - -# The reST default role (used for this markup: `text`) to use for all -# documents. -# -# default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -# -# add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -# -# add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -# -# show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# A list of ignored prefixes for module index sorting. -# modindex_common_prefix = [] - -# If true, keep warnings as "system message" paragraphs in the built documents. -# keep_warnings = False - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = True - - -# -- Options for HTML output ---------------------------------------------- - -if build_for_website: - html_theme = 'cassandra_theme' - html_theme_path = ['./_theme'] -else: - html_theme = 'sphinx_rtd_theme' - -html_context = { 'extra_css_files': [ '_static/extra.css' ] } - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -# html_theme_options = {} - -# The name for this set of Sphinx documents. -# " v documentation" by default. -# -html_title = u'Apache Cassandra Documentation v%s' % version - -# A shorter title for the navigation bar. Default is the same as html_title. -# -# html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -# -# html_logo = None - -# The name of an image file (relative to this directory) to use as a favicon of -# the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -# -# html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - -# Add any extra paths that contain custom files (such as robots.txt or -# .htaccess) here, relative to this directory. These files are copied -# directly to the root of the documentation. -# -# html_extra_path = [] - -# If not None, a 'Last updated on:' timestamp is inserted at every page -# bottom, using the given strftime format. -# The empty string is equivalent to '%b %d, %Y'. -# -# html_last_updated_fmt = None - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -# -# html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -# -# html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -# -html_additional_pages = { - 'index': 'indexcontent.html' -} - -# If false, no module index is generated. -# -# html_domain_indices = True - -# If false, no index is generated. -# -# html_use_index = True - -# If true, the index is split into individual pages for each letter. -# -# html_split_index = False - -# If true, links to the reST sources are added to the pages. -# -# html_show_sourcelink = True - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -# -# html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -# -# html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -# -# html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -# html_file_suffix = None - -# Language to be used for generating the HTML full-text search index. -# Sphinx supports the following languages: -# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' -# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh' -# -# html_search_language = 'en' - -# A dictionary with options for the search language support, empty by default. -# 'ja' uses this config value. -# 'zh' user can custom change `jieba` dictionary path. -# -# html_search_options = {'type': 'default'} - -# The name of a javascript file (relative to the configuration directory) that -# implements a search results scorer. If empty, the default will be used. -# -# html_search_scorer = 'scorer.js' - -# Output file base name for HTML help builder. -htmlhelp_basename = 'ApacheCassandraDocumentationdoc' - -# -- Options for LaTeX output --------------------------------------------- - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - # 'papersize': 'letterpaper', - - # The font size ('10pt', '11pt' or '12pt'). - # - # 'pointsize': '10pt', - - # Additional stuff for the LaTeX preamble. - # - # 'preamble': '', - - # Latex figure (float) alignment - # - # 'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, 'ApacheCassandra.tex', u'Apache Cassandra Documentation', - u'The Apache Cassandra team', 'manual'), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -# -# latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -# -# latex_use_parts = False - -# If true, show page references after internal links. -# -# latex_show_pagerefs = False - -# If true, show URL addresses after external links. -# -# latex_show_urls = False - -# Documents to append as an appendix to all manuals. -# -# latex_appendices = [] - -# If false, no module index is generated. -# -# latex_domain_indices = True - - -# -- Options for manual page output --------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'apachecassandra', u'Apache Cassandra Documentation', - [author], 1) -] - -# If true, show URL addresses after external links. -# -# man_show_urls = False - - -# -- Options for Texinfo output ------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - (master_doc, 'ApacheCassandra', u'Apache Cassandra Documentation', - author, 'ApacheCassandraDocumentation', 'One line description of project.', - 'Miscellaneous'), -] - -# Documents to append as an appendix to all manuals. -# -# texinfo_appendices = [] - -# If false, no module index is generated. -# -# texinfo_domain_indices = True - -# How to display URL addresses: 'footnote', 'no', or 'inline'. -# -# texinfo_show_urls = 'footnote' - -# If true, do not generate a @detailmenu in the "Top" node's menu. -# -# texinfo_no_detailmenu = False - - -# -- Options for Epub output ---------------------------------------------- - -# Bibliographic Dublin Core info. -epub_title = project -epub_author = author -epub_publisher = author -epub_copyright = copyright - -# The basename for the epub file. It defaults to the project name. -# epub_basename = project - -# The HTML theme for the epub output. Since the default themes are not -# optimized for small screen space, using the same theme for HTML and epub -# output is usually not wise. This defaults to 'epub', a theme designed to save -# visual space. -# -# epub_theme = 'epub' - -# The language of the text. It defaults to the language option -# or 'en' if the language is not set. -# -# epub_language = '' - -# The scheme of the identifier. Typical schemes are ISBN or URL. -# epub_scheme = '' - -# The unique identifier of the text. This can be a ISBN number -# or the project homepage. -# -# epub_identifier = '' - -# A unique identification for the text. -# -# epub_uid = '' - -# A tuple containing the cover image and cover page html template filenames. -# -# epub_cover = () - -# A sequence of (type, uri, title) tuples for the guide element of content.opf. -# -# epub_guide = () - -# HTML files that should be inserted before the pages created by sphinx. -# The format is a list of tuples containing the path and title. -# -# epub_pre_files = [] - -# HTML files that should be inserted after the pages created by sphinx. -# The format is a list of tuples containing the path and title. -# -# epub_post_files = [] - -# A list of files that should not be packed into the epub file. -epub_exclude_files = ['search.html'] - -# The depth of the table of contents in toc.ncx. -# -# epub_tocdepth = 3 - -# Allow duplicate toc entries. -# -# epub_tocdup = True - -# Choose between 'default' and 'includehidden'. -# -# epub_tocscope = 'default' - -# Fix unsupported image types using the Pillow. -# -# epub_fix_images = False - -# Scale large images. -# -# epub_max_image_width = 0 - -# How to display URL addresses: 'footnote', 'no', or 'inline'. -# -# epub_show_urls = 'inline' - -# If false, no index is generated. -# -# epub_use_index = True diff --git a/doc/source/configuration/index.rst b/doc/source/configuration/index.rst deleted file mode 100644 index f774fdad67c4..000000000000 --- a/doc/source/configuration/index.rst +++ /dev/null @@ -1,25 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -Configuring Cassandra -===================== - -This section describes how to configure Apache Cassandra. - -.. toctree:: - :maxdepth: 1 - - cassandra_config_file diff --git a/doc/source/contactus.rst b/doc/source/contactus.rst deleted file mode 100644 index 8d0f5dd04663..000000000000 --- a/doc/source/contactus.rst +++ /dev/null @@ -1,53 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -Contact us -========== - -You can get in touch with the Cassandra community either via the mailing lists or the freenode IRC channels. - -.. _mailing-lists: - -Mailing lists -------------- - -The following mailing lists are available: - -- `Users `__ – General discussion list for users - `Subscribe - `__ -- `Developers `__ – Development related discussion - `Subscribe - `__ -- `Commits `__ – Commit notification source repository - - `Subscribe `__ -- `Client Libraries `__ – Discussion related to the - development of idiomatic client APIs - `Subscribe `__ - -Subscribe by sending an email to the email address in the Subscribe links above. Follow the instructions in the welcome -email to confirm your subscription. Make sure to keep the welcome email as it contains instructions on how to -unsubscribe. - -.. _irc-channels: - -IRC ---- - -To chat with developers or users in real-time, join our channels on `IRC freenode `__. The -following channels are available: - -- ``#cassandra`` - for user questions and general discussions. -- ``#cassandra-dev`` - strictly for questions or discussions related to Cassandra development. -- ``#cassandra-builds`` - results of automated test builds. - diff --git a/doc/source/cql/appendices.rst b/doc/source/cql/appendices.rst deleted file mode 100644 index 456170d405c6..000000000000 --- a/doc/source/cql/appendices.rst +++ /dev/null @@ -1,333 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: cql - -Appendices ----------- - -.. _appendix-A: - -Appendix A: CQL Keywords -~~~~~~~~~~~~~~~~~~~~~~~~ - -CQL distinguishes between *reserved* and *non-reserved* keywords. -Reserved keywords cannot be used as identifier, they are truly reserved -for the language (but one can enclose a reserved keyword by -double-quotes to use it as an identifier). Non-reserved keywords however -only have a specific meaning in certain context but can used as -identifier otherwise. The only *raison d’être* of these non-reserved -keywords is convenience: some keyword are non-reserved when it was -always easy for the parser to decide whether they were used as keywords -or not. - -+--------------------+-------------+ -| Keyword | Reserved? | -+====================+=============+ -| ``ADD`` | yes | -+--------------------+-------------+ -| ``AGGREGATE`` | no | -+--------------------+-------------+ -| ``ALL`` | no | -+--------------------+-------------+ -| ``ALLOW`` | yes | -+--------------------+-------------+ -| ``ALTER`` | yes | -+--------------------+-------------+ -| ``AND`` | yes | -+--------------------+-------------+ -| ``APPLY`` | yes | -+--------------------+-------------+ -| ``AS`` | no | -+--------------------+-------------+ -| ``ASC`` | yes | -+--------------------+-------------+ -| ``ASCII`` | no | -+--------------------+-------------+ -| ``AUTHORIZE`` | yes | -+--------------------+-------------+ -| ``BATCH`` | yes | -+--------------------+-------------+ -| ``BEGIN`` | yes | -+--------------------+-------------+ -| ``BIGINT`` | no | -+--------------------+-------------+ -| ``BLOB`` | no | -+--------------------+-------------+ -| ``BOOLEAN`` | no | -+--------------------+-------------+ -| ``BY`` | yes | -+--------------------+-------------+ -| ``CALLED`` | no | -+--------------------+-------------+ -| ``CLUSTERING`` | no | -+--------------------+-------------+ -| ``COLUMNFAMILY`` | yes | -+--------------------+-------------+ -| ``COMPACT`` | no | -+--------------------+-------------+ -| ``CONTAINS`` | no | -+--------------------+-------------+ -| ``COUNT`` | no | -+--------------------+-------------+ -| ``COUNTER`` | no | -+--------------------+-------------+ -| ``CREATE`` | yes | -+--------------------+-------------+ -| ``CUSTOM`` | no | -+--------------------+-------------+ -| ``DATE`` | no | -+--------------------+-------------+ -| ``DECIMAL`` | no | -+--------------------+-------------+ -| ``DELETE`` | yes | -+--------------------+-------------+ -| ``DESC`` | yes | -+--------------------+-------------+ -| ``DESCRIBE`` | yes | -+--------------------+-------------+ -| ``DISTINCT`` | no | -+--------------------+-------------+ -| ``DOUBLE`` | no | -+--------------------+-------------+ -| ``DROP`` | yes | -+--------------------+-------------+ -| ``ENTRIES`` | yes | -+--------------------+-------------+ -| ``EXECUTE`` | yes | -+--------------------+-------------+ -| ``EXISTS`` | no | -+--------------------+-------------+ -| ``FILTERING`` | no | -+--------------------+-------------+ -| ``FINALFUNC`` | no | -+--------------------+-------------+ -| ``FLOAT`` | no | -+--------------------+-------------+ -| ``FROM`` | yes | -+--------------------+-------------+ -| ``FROZEN`` | no | -+--------------------+-------------+ -| ``FULL`` | yes | -+--------------------+-------------+ -| ``FUNCTION`` | no | -+--------------------+-------------+ -| ``FUNCTIONS`` | no | -+--------------------+-------------+ -| ``GRANT`` | yes | -+--------------------+-------------+ -| ``IF`` | yes | -+--------------------+-------------+ -| ``IN`` | yes | -+--------------------+-------------+ -| ``INDEX`` | yes | -+--------------------+-------------+ -| ``INET`` | no | -+--------------------+-------------+ -| ``INFINITY`` | yes | -+--------------------+-------------+ -| ``INITCOND`` | no | -+--------------------+-------------+ -| ``INPUT`` | no | -+--------------------+-------------+ -| ``INSERT`` | yes | -+--------------------+-------------+ -| ``INT`` | no | -+--------------------+-------------+ -| ``INTO`` | yes | -+--------------------+-------------+ -| ``JSON`` | no | -+--------------------+-------------+ -| ``KEY`` | no | -+--------------------+-------------+ -| ``KEYS`` | no | -+--------------------+-------------+ -| ``KEYSPACE`` | yes | -+--------------------+-------------+ -| ``KEYSPACES`` | no | -+--------------------+-------------+ -| ``LANGUAGE`` | no | -+--------------------+-------------+ -| ``LIMIT`` | yes | -+--------------------+-------------+ -| ``LIST`` | no | -+--------------------+-------------+ -| ``LOGIN`` | no | -+--------------------+-------------+ -| ``MAP`` | no | -+--------------------+-------------+ -| ``MODIFY`` | yes | -+--------------------+-------------+ -| ``NAN`` | yes | -+--------------------+-------------+ -| ``NOLOGIN`` | no | -+--------------------+-------------+ -| ``NORECURSIVE`` | yes | -+--------------------+-------------+ -| ``NOSUPERUSER`` | no | -+--------------------+-------------+ -| ``NOT`` | yes | -+--------------------+-------------+ -| ``NULL`` | yes | -+--------------------+-------------+ -| ``OF`` | yes | -+--------------------+-------------+ -| ``ON`` | yes | -+--------------------+-------------+ -| ``OPTIONS`` | no | -+--------------------+-------------+ -| ``OR`` | yes | -+--------------------+-------------+ -| ``ORDER`` | yes | -+--------------------+-------------+ -| ``PASSWORD`` | no | -+--------------------+-------------+ -| ``PERMISSION`` | no | -+--------------------+-------------+ -| ``PERMISSIONS`` | no | -+--------------------+-------------+ -| ``PRIMARY`` | yes | -+--------------------+-------------+ -| ``RENAME`` | yes | -+--------------------+-------------+ -| ``REPLACE`` | yes | -+--------------------+-------------+ -| ``RETURNS`` | no | -+--------------------+-------------+ -| ``REVOKE`` | yes | -+--------------------+-------------+ -| ``ROLE`` | no | -+--------------------+-------------+ -| ``ROLES`` | no | -+--------------------+-------------+ -| ``SCHEMA`` | yes | -+--------------------+-------------+ -| ``SELECT`` | yes | -+--------------------+-------------+ -| ``SET`` | yes | -+--------------------+-------------+ -| ``SFUNC`` | no | -+--------------------+-------------+ -| ``SMALLINT`` | no | -+--------------------+-------------+ -| ``STATIC`` | no | -+--------------------+-------------+ -| ``STORAGE`` | no | -+--------------------+-------------+ -| ``STYPE`` | no | -+--------------------+-------------+ -| ``SUPERUSER`` | no | -+--------------------+-------------+ -| ``TABLE`` | yes | -+--------------------+-------------+ -| ``TEXT`` | no | -+--------------------+-------------+ -| ``TIME`` | no | -+--------------------+-------------+ -| ``TIMESTAMP`` | no | -+--------------------+-------------+ -| ``TIMEUUID`` | no | -+--------------------+-------------+ -| ``TINYINT`` | no | -+--------------------+-------------+ -| ``TO`` | yes | -+--------------------+-------------+ -| ``TOKEN`` | yes | -+--------------------+-------------+ -| ``TRIGGER`` | no | -+--------------------+-------------+ -| ``TRUNCATE`` | yes | -+--------------------+-------------+ -| ``TTL`` | no | -+--------------------+-------------+ -| ``TUPLE`` | no | -+--------------------+-------------+ -| ``TYPE`` | no | -+--------------------+-------------+ -| ``UNLOGGED`` | yes | -+--------------------+-------------+ -| ``UPDATE`` | yes | -+--------------------+-------------+ -| ``USE`` | yes | -+--------------------+-------------+ -| ``USER`` | no | -+--------------------+-------------+ -| ``USERS`` | no | -+--------------------+-------------+ -| ``USING`` | yes | -+--------------------+-------------+ -| ``UUID`` | no | -+--------------------+-------------+ -| ``VALUES`` | no | -+--------------------+-------------+ -| ``VARCHAR`` | no | -+--------------------+-------------+ -| ``VARINT`` | no | -+--------------------+-------------+ -| ``WHERE`` | yes | -+--------------------+-------------+ -| ``WITH`` | yes | -+--------------------+-------------+ -| ``WRITETIME`` | no | -+--------------------+-------------+ - -Appendix B: CQL Reserved Types -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The following type names are not currently used by CQL, but are reserved -for potential future use. User-defined types may not use reserved type -names as their name. - -+-----------------+ -| type | -+=================+ -| ``bitstring`` | -+-----------------+ -| ``byte`` | -+-----------------+ -| ``complex`` | -+-----------------+ -| ``enum`` | -+-----------------+ -| ``interval`` | -+-----------------+ -| ``macaddr`` | -+-----------------+ - - -Appendix C: Dropping Compact Storage -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -``ALTER ... DROP COMPACT STORAGE`` statement makes Compact Tables CQL-compatible, -exposing internal structure of Thrift/Compact Tables: - -- CQL-created Compact Tables that have no clustering columns, will expose an - additional clustering column ``column1`` with ``UTF8Type``. -- CQL-created Compact Tables that had no regular columns, will expose a - regular column ``value`` with ``BytesType``. -- For CQL-Created Compact Tables, all columns originally defined as - ``regular`` will be come ``static`` -- CQL-created Compact Tables that have clustering but have no regular - columns will have an empty value column (of ``EmptyType``) -- SuperColumn Tables (can only be created through Thrift) will expose - a compact value map with an empty name. -- Thrift-created Compact Tables will have types corresponding to their - Thrift definition. -- If a row was written while a table was still compact but it has no live - cells due to later row or cell deletions, it may continue to be simply - left out of query results, as is the normal behavior for compact tables. - Rows written after a table is fully CQL-compatible, if they have no live - cells but a live primary key, may be present in query results with null values. \ No newline at end of file diff --git a/doc/source/cql/changes.rst b/doc/source/cql/changes.rst deleted file mode 100644 index 1eee5369a000..000000000000 --- a/doc/source/cql/changes.rst +++ /dev/null @@ -1,204 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: cql - -Changes -------- - -The following describes the changes in each version of CQL. - -3.4.4 -^^^^^ - -- ``ALTER TABLE`` ``ALTER`` has been removed; a column's type may not be changed after creation (:jira:`12443`). -- ``ALTER TYPE`` ``ALTER`` has been removed; a field's type may not be changed after creation (:jira:`12443`). - -3.4.3 -^^^^^ - -- Adds a new ``duration `` :ref:`data types ` (:jira:`11873`). -- Support for ``GROUP BY`` (:jira:`10707`). -- Adds a ``DEFAULT UNSET`` option for ``INSERT JSON`` to ignore omitted columns (:jira:`11424`). -- Allows ``null`` as a legal value for TTL on insert and update. It will be treated as equivalent to -inserting a 0 (:jira:`12216`). - -3.4.2 -^^^^^ - -- If a table has a non zero ``default_time_to_live``, then explicitly specifying a TTL of 0 in an ``INSERT`` or - ``UPDATE`` statement will result in the new writes not having any expiration (that is, an explicit TTL of 0 cancels - the ``default_time_to_live``). This wasn't the case before and the ``default_time_to_live`` was applied even though a - TTL had been explicitly set. -- ``ALTER TABLE`` ``ADD`` and ``DROP`` now allow multiple columns to be added/removed. -- New ``PER PARTITION LIMIT`` option for ``SELECT`` statements (see `CASSANDRA-7017 - `__. -- :ref:`User-defined functions ` can now instantiate ``UDTValue`` and ``TupleValue`` instances via the - new ``UDFContext`` interface (see `CASSANDRA-10818 `__. -- :ref:`User-defined types ` may now be stored in a non-frozen form, allowing individual fields to be updated and - deleted in ``UPDATE`` statements and ``DELETE`` statements, respectively. (`CASSANDRA-7423 - `__). - -3.4.1 -^^^^^ - -- Adds ``CAST`` functions. - -3.4.0 -^^^^^ - -- Support for :ref:`materialized views `. -- ``DELETE`` support for inequality expressions and ``IN`` restrictions on any primary key columns. -- ``UPDATE`` support for ``IN`` restrictions on any primary key columns. - -3.3.1 -^^^^^ - -- The syntax ``TRUNCATE TABLE X`` is now accepted as an alias for ``TRUNCATE X``. - -3.3.0 -^^^^^ - -- :ref:`User-defined functions and aggregates ` are now supported. -- Allows double-dollar enclosed strings literals as an alternative to single-quote enclosed strings. -- Introduces Roles to supersede user based authentication and access control -- New ``date``, ``time``, ``tinyint`` and ``smallint`` :ref:`data types ` have been added. -- :ref:`JSON support ` has been added -- Adds new time conversion functions and deprecate ``dateOf`` and ``unixTimestampOf``. - -3.2.0 -^^^^^ - -- :ref:`User-defined types ` supported. -- ``CREATE INDEX`` now supports indexing collection columns, including indexing the keys of map collections through the - ``keys()`` function -- Indexes on collections may be queried using the new ``CONTAINS`` and ``CONTAINS KEY`` operators -- :ref:`Tuple types ` were added to hold fixed-length sets of typed positional fields. -- ``DROP INDEX`` now supports optionally specifying a keyspace. - -3.1.7 -^^^^^ - -- ``SELECT`` statements now support selecting multiple rows in a single partition using an ``IN`` clause on combinations - of clustering columns. -- ``IF NOT EXISTS`` and ``IF EXISTS`` syntax is now supported by ``CREATE USER`` and ``DROP USER`` statements, - respectively. - -3.1.6 -^^^^^ - -- A new ``uuid()`` method has been added. -- Support for ``DELETE ... IF EXISTS`` syntax. - -3.1.5 -^^^^^ - -- It is now possible to group clustering columns in a relation, see :ref:`WHERE ` clauses. -- Added support for :ref:`static columns `. - -3.1.4 -^^^^^ - -- ``CREATE INDEX`` now allows specifying options when creating CUSTOM indexes. - -3.1.3 -^^^^^ - -- Millisecond precision formats have been added to the :ref:`timestamp ` parser. - -3.1.2 -^^^^^ - -- ``NaN`` and ``Infinity`` has been added as valid float constants. They are now reserved keywords. In the unlikely case - you we using them as a column identifier (or keyspace/table one), you will now need to double quote them. - -3.1.1 -^^^^^ - -- ``SELECT`` statement now allows listing the partition keys (using the ``DISTINCT`` modifier). See `CASSANDRA-4536 - `__. -- The syntax ``c IN ?`` is now supported in ``WHERE`` clauses. In that case, the value expected for the bind variable - will be a list of whatever type ``c`` is. -- It is now possible to use named bind variables (using ``:name`` instead of ``?``). - -3.1.0 -^^^^^ - -- ``ALTER TABLE`` ``DROP`` option added. -- ``SELECT`` statement now supports aliases in select clause. Aliases in WHERE and ORDER BY clauses are not supported. -- ``CREATE`` statements for ``KEYSPACE``, ``TABLE`` and ``INDEX`` now supports an ``IF NOT EXISTS`` condition. - Similarly, ``DROP`` statements support a ``IF EXISTS`` condition. -- ``INSERT`` statements optionally supports a ``IF NOT EXISTS`` condition and ``UPDATE`` supports ``IF`` conditions. - -3.0.5 -^^^^^ - -- ``SELECT``, ``UPDATE``, and ``DELETE`` statements now allow empty ``IN`` relations (see `CASSANDRA-5626 - `__. - -3.0.4 -^^^^^ - -- Updated the syntax for custom :ref:`secondary indexes `. -- Non-equal condition on the partition key are now never supported, even for ordering partitioner as this was not - correct (the order was **not** the one of the type of the partition key). Instead, the ``token`` method should always - be used for range queries on the partition key (see :ref:`WHERE clauses `). - -3.0.3 -^^^^^ - -- Support for custom :ref:`secondary indexes ` has been added. - -3.0.2 -^^^^^ - -- Type validation for the :ref:`constants ` has been fixed. For instance, the implementation used to allow - ``'2'`` as a valid value for an ``int`` column (interpreting it has the equivalent of ``2``), or ``42`` as a valid - ``blob`` value (in which case ``42`` was interpreted as an hexadecimal representation of the blob). This is no longer - the case, type validation of constants is now more strict. See the :ref:`data types ` section for details - on which constant is allowed for which type. -- The type validation fixed of the previous point has lead to the introduction of blobs constants to allow the input of - blobs. Do note that while the input of blobs as strings constant is still supported by this version (to allow smoother - transition to blob constant), it is now deprecated and will be removed by a future version. If you were using strings - as blobs, you should thus update your client code ASAP to switch blob constants. -- A number of functions to convert native types to blobs have also been introduced. Furthermore the token function is - now also allowed in select clauses. See the :ref:`section on functions ` for details. - -3.0.1 -^^^^^ - -- Date strings (and timestamps) are no longer accepted as valid ``timeuuid`` values. Doing so was a bug in the sense - that date string are not valid ``timeuuid``, and it was thus resulting in `confusing behaviors - `__. However, the following new methods have been added to help - working with ``timeuuid``: ``now``, ``minTimeuuid``, ``maxTimeuuid`` , - ``dateOf`` and ``unixTimestampOf``. -- Float constants now support the exponent notation. In other words, ``4.2E10`` is now a valid floating point value. - -Versioning -^^^^^^^^^^ - -Versioning of the CQL language adheres to the `Semantic Versioning `__ guidelines. Versions take the -form X.Y.Z where X, Y, and Z are integer values representing major, minor, and patch level respectively. There is no -correlation between Cassandra release versions and the CQL language version. - -========= ============================================================================================================= - version description -========= ============================================================================================================= - Major The major version *must* be bumped when backward incompatible changes are introduced. This should rarely - occur. - Minor Minor version increments occur when new, but backward compatible, functionality is introduced. - Patch The patch version is incremented when bugs are fixed. -========= ============================================================================================================= diff --git a/doc/source/cql/ddl.rst b/doc/source/cql/ddl.rst deleted file mode 100644 index 302777544cef..000000000000 --- a/doc/source/cql/ddl.rst +++ /dev/null @@ -1,649 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: cql - -.. _data-definition: - -Data Definition ---------------- - -CQL stores data in *tables*, whose schema defines the layout of said data in the table, and those tables are grouped in -*keyspaces*. A keyspace defines a number of options that applies to all the tables it contains, most prominently of -which is the :ref:`replication strategy ` used by the keyspace. It is generally encouraged to use -one keyspace by *application*, and thus many cluster may define only one keyspace. - -This section describes the statements used to create, modify, and remove those keyspace and tables. - -Common definitions -^^^^^^^^^^^^^^^^^^ - -The names of the keyspaces and tables are defined by the following grammar: - -.. productionlist:: - keyspace_name: `name` - table_name: [ `keyspace_name` '.' ] `name` - name: `unquoted_name` | `quoted_name` - unquoted_name: re('[a-zA-Z_0-9]{1, 48}') - quoted_name: '"' `unquoted_name` '"' - -Both keyspace and table name should be comprised of only alphanumeric characters, cannot be empty and are limited in -size to 48 characters (that limit exists mostly to avoid filenames (which may include the keyspace and table name) to go -over the limits of certain file systems). By default, keyspace and table names are case insensitive (``myTable`` is -equivalent to ``mytable``) but case sensitivity can be forced by using double-quotes (``"myTable"`` is different from -``mytable``). - -Further, a table is always part of a keyspace and a table name can be provided fully-qualified by the keyspace it is -part of. If is is not fully-qualified, the table is assumed to be in the *current* keyspace (see :ref:`USE statement -`). - -Further, the valid names for columns is simply defined as: - -.. productionlist:: - column_name: `identifier` - -We also define the notion of statement options for use in the following section: - -.. productionlist:: - options: `option` ( AND `option` )* - option: `identifier` '=' ( `identifier` | `constant` | `map_literal` ) - -.. _create-keyspace-statement: - -CREATE KEYSPACE -^^^^^^^^^^^^^^^ - -A keyspace is created using a ``CREATE KEYSPACE`` statement: - -.. productionlist:: - create_keyspace_statement: CREATE KEYSPACE [ IF NOT EXISTS ] `keyspace_name` WITH `options` - -For instance:: - - CREATE KEYSPACE Excelsior - WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 3}; - - CREATE KEYSPACE Excalibur - WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1' : 1, 'DC2' : 3} - AND durable_writes = false; - - -The supported ``options`` are: - -=================== ========== =========== ========= =================================================================== -name kind mandatory default description -=================== ========== =========== ========= =================================================================== -``replication`` *map* yes The replication strategy and options to use for the keyspace (see - details below). -``durable_writes`` *simple* no true Whether to use the commit log for updates on this keyspace - (disable this option at your own risk!). -=================== ========== =========== ========= =================================================================== - -The ``replication`` property is mandatory and must at least contains the ``'class'`` sub-option which defines the -:ref:`replication strategy ` class to use. The rest of the sub-options depends on what replication -strategy is used. By default, Cassandra support the following ``'class'``: - -- ``'SimpleStrategy'``: A simple strategy that defines a replication factor for the whole cluster. The only sub-options - supported is ``'replication_factor'`` to define that replication factor and is mandatory. -- ``'NetworkTopologyStrategy'``: A replication strategy that allows to set the replication factor independently for - each data-center. The rest of the sub-options are key-value pairs where a key is a data-center name and its value is - the associated replication factor. - -Attempting to create a keyspace that already exists will return an error unless the ``IF NOT EXISTS`` option is used. If -it is used, the statement will be a no-op if the keyspace already exists. - -.. _use-statement: - -USE -^^^ - -The ``USE`` statement allows to change the *current* keyspace (for the *connection* on which it is executed). A number -of objects in CQL are bound to a keyspace (tables, user-defined types, functions, ...) and the current keyspace is the -default keyspace used when those objects are referred without a fully-qualified name (that is, without being prefixed a -keyspace name). A ``USE`` statement simply takes the keyspace to use as current as argument: - -.. productionlist:: - use_statement: USE `keyspace_name` - -.. _alter-keyspace-statement: - -ALTER KEYSPACE -^^^^^^^^^^^^^^ - -An ``ALTER KEYSPACE`` statement allows to modify the options of a keyspace: - -.. productionlist:: - alter_keyspace_statement: ALTER KEYSPACE `keyspace_name` WITH `options` - -For instance:: - - ALTER KEYSPACE Excelsior - WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 4}; - -The supported options are the same than for :ref:`creating a keyspace `. - -.. _drop-keyspace-statement: - -DROP KEYSPACE -^^^^^^^^^^^^^ - -Dropping a keyspace can be done using the ``DROP KEYSPACE`` statement: - -.. productionlist:: - drop_keyspace_statement: DROP KEYSPACE [ IF EXISTS ] `keyspace_name` - -For instance:: - - DROP KEYSPACE Excelsior; - -Dropping a keyspace results in the immediate, irreversible removal of that keyspace, including all the tables, UTD and -functions in it, and all the data contained in those tables. - -If the keyspace does not exists, the statement will return an error, unless ``IF EXISTS`` is used in which case the -operation is a no-op. - -.. _create-table-statement: - -CREATE TABLE -^^^^^^^^^^^^ - -Creating a new table uses the ``CREATE TABLE`` statement: - -.. productionlist:: - create_table_statement: CREATE TABLE [ IF NOT EXISTS ] `table_name` - : '(' - : `column_definition` - : ( ',' `column_definition` )* - : [ ',' PRIMARY KEY '(' `primary_key` ')' ] - : ')' [ WITH `table_options` ] - column_definition: `column_name` `cql_type` [ STATIC ] [ PRIMARY KEY] - primary_key: `partition_key` [ ',' `clustering_columns` ] - partition_key: `column_name` - : | '(' `column_name` ( ',' `column_name` )* ')' - clustering_columns: `column_name` ( ',' `column_name` )* - table_options: COMPACT STORAGE [ AND `table_options` ] - : | CLUSTERING ORDER BY '(' `clustering_order` ')' [ AND `table_options` ] - : | `options` - clustering_order: `column_name` (ASC | DESC) ( ',' `column_name` (ASC | DESC) )* - -For instance:: - - CREATE TABLE monkeySpecies ( - species text PRIMARY KEY, - common_name text, - population varint, - average_size int - ) WITH comment='Important biological records' - AND read_repair_chance = 1.0; - - CREATE TABLE timeline ( - userid uuid, - posted_month int, - posted_time uuid, - body text, - posted_by text, - PRIMARY KEY (userid, posted_month, posted_time) - ) WITH compaction = { 'class' : 'LeveledCompactionStrategy' }; - - CREATE TABLE loads ( - machine inet, - cpu int, - mtime timeuuid, - load float, - PRIMARY KEY ((machine, cpu), mtime) - ) WITH CLUSTERING ORDER BY (mtime DESC); - -A CQL table has a name and is composed of a set of *rows*. Creating a table amounts to defining which :ref:`columns -` the rows will be composed, which of those columns compose the :ref:`primary key `, as -well as optional :ref:`options ` for the table. - -Attempting to create an already existing table will return an error unless the ``IF NOT EXISTS`` directive is used. If -it is used, the statement will be a no-op if the table already exists. - - -.. _column-definition: - -Column definitions -~~~~~~~~~~~~~~~~~~ - -Every rows in a CQL table has a set of predefined columns defined at the time of the table creation (or added later -using an :ref:`alter statement`). - -A :token:`column_definition` is primarily comprised of the name of the column defined and it's :ref:`type `, -which restrict which values are accepted for that column. Additionally, a column definition can have the following -modifiers: - -``STATIC`` - it declares the column as being a :ref:`static column `. - -``PRIMARY KEY`` - it declares the column as being the sole component of the :ref:`primary key ` of the table. - -.. _static-columns: - -Static columns -`````````````` -Some columns can be declared as ``STATIC`` in a table definition. A column that is static will be “shared” by all the -rows belonging to the same partition (having the same :ref:`partition key `). For instance:: - - CREATE TABLE t ( - pk int, - t int, - v text, - s text static, - PRIMARY KEY (pk, t) - ); - - INSERT INTO t (pk, t, v, s) VALUES (0, 0, 'val0', 'static0'); - INSERT INTO t (pk, t, v, s) VALUES (0, 1, 'val1', 'static1'); - - SELECT * FROM t; - pk | t | v | s - ----+---+--------+----------- - 0 | 0 | 'val0' | 'static1' - 0 | 1 | 'val1' | 'static1' - -As can be seen, the ``s`` value is the same (``static1``) for both of the row in the partition (the partition key in -that example being ``pk``, both rows are in that same partition): the 2nd insertion has overridden the value for ``s``. - -The use of static columns as the following restrictions: - -- tables with the ``COMPACT STORAGE`` option (see below) cannot use them. -- a table without clustering columns cannot have static columns (in a table without clustering columns, every partition - has only one row, and so every column is inherently static). -- only non ``PRIMARY KEY`` columns can be static. - -.. _primary-key: - -The Primary key -~~~~~~~~~~~~~~~ - -Within a table, a row is uniquely identified by its ``PRIMARY KEY``, and hence all table **must** define a PRIMARY KEY -(and only one). A ``PRIMARY KEY`` definition is composed of one or more of the columns defined in the table. -Syntactically, the primary key is defined the keywords ``PRIMARY KEY`` followed by comma-separated list of the column -names composing it within parenthesis, but if the primary key has only one column, one can alternatively follow that -column definition by the ``PRIMARY KEY`` keywords. The order of the columns in the primary key definition matter. - -A CQL primary key is composed of 2 parts: - -- the :ref:`partition key ` part. It is the first component of the primary key definition. It can be a - single column or, using additional parenthesis, can be multiple columns. A table always have at least a partition key, - the smallest possible table definition is:: - - CREATE TABLE t (k text PRIMARY KEY); - -- the :ref:`clustering columns `. Those are the columns after the first component of the primary key - definition, and the order of those columns define the *clustering order*. - -Some example of primary key definition are: - -- ``PRIMARY KEY (a)``: ``a`` is the partition key and there is no clustering columns. -- ``PRIMARY KEY (a, b, c)`` : ``a`` is the partition key and ``b`` and ``c`` are the clustering columns. -- ``PRIMARY KEY ((a, b), c)`` : ``a`` and ``b`` compose the partition key (this is often called a *composite* partition - key) and ``c`` is the clustering column. - - -.. _partition-key: - -The partition key -````````````````` - -Within a table, CQL defines the notion of a *partition*. A partition is simply the set of rows that share the same value -for their partition key. Note that if the partition key is composed of multiple columns, then rows belong to the same -partition only they have the same values for all those partition key column. So for instance, given the following table -definition and content:: - - CREATE TABLE t ( - a int, - b int, - c int, - d int, - PRIMARY KEY ((a, b), c, d) - ); - - SELECT * FROM t; - a | b | c | d - ---+---+---+--- - 0 | 0 | 0 | 0 // row 1 - 0 | 0 | 1 | 1 // row 2 - 0 | 1 | 2 | 2 // row 3 - 0 | 1 | 3 | 3 // row 4 - 1 | 1 | 4 | 4 // row 5 - -``row 1`` and ``row 2`` are in the same partition, ``row 3`` and ``row 4`` are also in the same partition (but a -different one) and ``row 5`` is in yet another partition. - -Note that a table always has a partition key, and that if the table has no :ref:`clustering columns -`, then every partition of that table is only comprised of a single row (since the primary key -uniquely identifies rows and the primary key is equal to the partition key if there is no clustering columns). - -The most important property of partition is that all the rows belonging to the same partition are guarantee to be stored -on the same set of replica nodes. In other words, the partition key of a table defines which of the rows will be -localized together in the Cluster, and it is thus important to choose your partition key wisely so that rows that needs -to be fetch together are in the same partition (so that querying those rows together require contacting a minimum of -nodes). - -Please note however that there is a flip-side to this guarantee: as all rows sharing a partition key are guaranteed to -be stored on the same set of replica node, a partition key that groups too much data can create a hotspot. - -Another useful property of a partition is that when writing data, all the updates belonging to a single partition are -done *atomically* and in *isolation*, which is not the case across partitions. - -The proper choice of the partition key and clustering columns for a table is probably one of the most important aspect -of data modeling in Cassandra, and it largely impact which queries can be performed, and how efficiently they are. - - -.. _clustering-columns: - -The clustering columns -`````````````````````` - -The clustering columns of a table defines the clustering order for the partition of that table. For a given -:ref:`partition `, all the rows are physically ordered inside Cassandra by that clustering order. For -instance, given:: - - CREATE TABLE t ( - a int, - b int, - c int, - PRIMARY KEY (a, c, d) - ); - - SELECT * FROM t; - a | b | c - ---+---+--- - 0 | 0 | 4 // row 1 - 0 | 1 | 9 // row 2 - 0 | 2 | 2 // row 3 - 0 | 3 | 3 // row 4 - -then the rows (which all belong to the same partition) are all stored internally in the order of the values of their -``b`` column (the order they are displayed above). So where the partition key of the table allows to group rows on the -same replica set, the clustering columns controls how those rows are stored on the replica. That sorting allows the -retrieval of a range of rows within a partition (for instance, in the example above, ``SELECT * FROM t WHERE a = 0 AND b -> 1 and b <= 3``) to be very efficient. - - -.. _create-table-options: - -Table options -~~~~~~~~~~~~~ - -A CQL table has a number of options that can be set at creation (and, for most of them, :ref:`altered -` later). These options are specified after the ``WITH`` keyword. - -Amongst those options, two important ones cannot be changed after creation and influence which queries can be done -against the table: the ``COMPACT STORAGE`` option and the ``CLUSTERING ORDER`` option. Those, as well as the other -options of a table are described in the following sections. - -.. _compact-tables: - -Compact tables -`````````````` - -.. warning:: Since Cassandra 3.0, compact tables have the exact same layout internally than non compact ones (for the - same schema obviously), and declaring a table compact **only** creates artificial limitations on the table definition - and usage that are necessary to ensure backward compatibility with the deprecated Thrift API. And as ``COMPACT - STORAGE`` cannot, as of Cassandra |version|, be removed, it is strongly discouraged to create new table with the - ``COMPACT STORAGE`` option. - -A *compact* table is one defined with the ``COMPACT STORAGE`` option. This option is mainly targeted towards backward -compatibility for definitions created before CQL version 3 (see `www.datastax.com/dev/blog/thrift-to-cql3 -`__ for more details) and shouldn't be used for new tables. Declaring a -table with this option creates limitations for the table which are largely arbitrary but necessary for backward -compatibility with the (deprecated) Thrift API. Amongst those limitation: - -- a compact table cannot use collections nor static columns. -- if a compact table has at least one clustering column, then it must have *exactly* one column outside of the primary - key ones. This imply you cannot add or remove columns after creation in particular. -- a compact table is limited in the indexes it can create, and no materialized view can be created on it. - -.. _clustering-order: - -Reversing the clustering order -`````````````````````````````` - -The clustering order of a table is defined by the :ref:`clustering columns ` of that table. By -default, that ordering is based on natural order of those clustering order, but the ``CLUSTERING ORDER`` allows to -change that clustering order to use the *reverse* natural order for some (potentially all) of the columns. - -The ``CLUSTERING ORDER`` option takes the comma-separated list of the clustering column, each with a ``ASC`` (for -*ascendant*, e.g. the natural order) or ``DESC`` (for *descendant*, e.g. the reverse natural order). Note in particular -that the default (if the ``CLUSTERING ORDER`` option is not used) is strictly equivalent to using the option with all -clustering columns using the ``ASC`` modifier. - -Note that this option is basically a hint for the storage engine to change the order in which it stores the row but it -has 3 visible consequences: - -# it limits which ``ORDER BY`` clause are allowed for :ref:`selects ` on that table. You can only - order results by the clustering order or the reverse clustering order. Meaning that if a table has 2 clustering column - ``a`` and ``b`` and you defined ``WITH CLUSTERING ORDER (a DESC, b ASC)``, then in queries you will be allowed to use - ``ORDER BY (a DESC, b ASC)`` and (reverse clustering order) ``ORDER BY (a ASC, b DESC)`` but **not** ``ORDER BY (a - ASC, b ASC)`` (nor ``ORDER BY (a DESC, b DESC)``). -# it also change the default order of results when queried (if no ``ORDER BY`` is provided). Results are always returned - in clustering order (within a partition). -# it has a small performance impact on some queries as queries in reverse clustering order are slower than the one in - forward clustering order. In practice, this means that if you plan on querying mostly in the reverse natural order of - your columns (which is common with time series for instance where you often want data from the newest to the oldest), - it is an optimization to declare a descending clustering order. - -.. _create-table-general-options: - -Other table options -``````````````````` - -.. todo:: review (misses cdc if nothing else) and link to proper categories when appropriate (compaction for instance) - -A table supports the following options: - -+--------------------------------+----------+-------------+-----------------------------------------------------------+ -| option | kind | default | description | -+================================+==========+=============+===========================================================+ -| ``comment`` | *simple* | none | A free-form, human-readable comment. | -+--------------------------------+----------+-------------+-----------------------------------------------------------+ -| ``read_repair_chance`` | *simple* | 0.1 | The probability with which to query extra nodes (e.g. | -| | | | more nodes than required by the consistency level) for | -| | | | the purpose of read repairs. | -+--------------------------------+----------+-------------+-----------------------------------------------------------+ -| ``dclocal_read_repair_chance`` | *simple* | 0 | The probability with which to query extra nodes (e.g. | -| | | | more nodes than required by the consistency level) | -| | | | belonging to the same data center than the read | -| | | | coordinator for the purpose of read repairs. | -+--------------------------------+----------+-------------+-----------------------------------------------------------+ -| ``gc_grace_seconds`` | *simple* | 864000 | Time to wait before garbage collecting tombstones | -| | | | (deletion markers). | -+--------------------------------+----------+-------------+-----------------------------------------------------------+ -| ``bloom_filter_fp_chance`` | *simple* | 0.00075 | The target probability of false positive of the sstable | -| | | | bloom filters. Said bloom filters will be sized to provide| -| | | | the provided probability (thus lowering this value impact | -| | | | the size of bloom filters in-memory and on-disk) | -+--------------------------------+----------+-------------+-----------------------------------------------------------+ -| ``default_time_to_live`` | *simple* | 0 | The default expiration time (“TTL”) in seconds for a | -| | | | table. | -+--------------------------------+----------+-------------+-----------------------------------------------------------+ -| ``compaction`` | *map* | *see below* | :ref:`Compaction options `. | -+--------------------------------+----------+-------------+-----------------------------------------------------------+ -| ``compression`` | *map* | *see below* | :ref:`Compression options `. | -+--------------------------------+----------+-------------+-----------------------------------------------------------+ -| ``caching`` | *map* | *see below* | :ref:`Caching options `. | -+--------------------------------+----------+-------------+-----------------------------------------------------------+ - -.. _cql-compaction-options: - -Compaction options -################## - -The ``compaction`` options must at least define the ``'class'`` sub-option, that defines the compaction strategy class -to use. The default supported class are ``'SizeTieredCompactionStrategy'`` (:ref:`STCS `), -``'LeveledCompactionStrategy'`` (:ref:`LCS `) and ``'TimeWindowCompactionStrategy'`` (:ref:`TWCS `) (the -``'DateTieredCompactionStrategy'`` is also supported but is deprecated and ``'TimeWindowCompactionStrategy'`` should be -preferred instead). Custom strategy can be provided by specifying the full class name as a :ref:`string constant -`. - -All default strategies support a number of :ref:`common options `, as well as options specific to -the strategy chosen (see the section corresponding to your strategy for details: :ref:`STCS `, :ref:`LCS -` and :ref:`TWCS `). - -.. _cql-compression-options: - -Compression options -################### - -The ``compression`` options define if and how the sstables of the table are compressed. The following sub-options are -available: - -========================= =============== ============================================================================= - Option Default Description -========================= =============== ============================================================================= - ``class`` LZ4Compressor The compression algorithm to use. Default compressor are: LZ4Compressor, - SnappyCompressor and DeflateCompressor. Use ``'enabled' : false`` to disable - compression. Custom compressor can be provided by specifying the full class - name as a “string constant”:#constants. - ``enabled`` true Enable/disable sstable compression. - ``chunk_length_in_kb`` 64 On disk SSTables are compressed by block (to allow random reads). This - defines the size (in KB) of said block. Bigger values may improve the - compression rate, but increases the minimum size of data to be read from disk - for a read - ``crc_check_chance`` 1.0 When compression is enabled, each compressed block includes a checksum of - that block for the purpose of detecting disk bitrot and avoiding the - propagation of corruption to other replica. This option defines the - probability with which those checksums are checked during read. By default - they are always checked. Set to 0 to disable checksum checking and to 0.5 for - instance to check them every other read | -========================= =============== ============================================================================= - - -For instance, to create a table with LZ4Compressor and a chunk_lenth_in_kb of 4KB:: - - CREATE TABLE simple ( - id int, - key text, - value text, - PRIMARY KEY (key, value) - ) with compression = {'class': 'LZ4Compressor', 'chunk_length_in_kb': 4}; - - -.. _cql-caching-options: - -Caching options -############### - -The ``caching`` options allows to configure both the *key cache* and the *row cache* for the table. The following -sub-options are available: - -======================== ========= ==================================================================================== - Option Default Description -======================== ========= ==================================================================================== - ``keys`` ALL Whether to cache keys (“key cache”) for this table. Valid values are: ``ALL`` and - ``NONE``. - ``rows_per_partition`` NONE The amount of rows to cache per partition (“row cache”). If an integer ``n`` is - specified, the first ``n`` queried rows of a partition will be cached. Other - possible options are ``ALL``, to cache all rows of a queried partition, or ``NONE`` - to disable row caching. -======================== ========= ==================================================================================== - - -For instance, to create a table with both a key cache and 10 rows per partition:: - - CREATE TABLE simple ( - id int, - key text, - value text, - PRIMARY KEY (key, value) - ) WITH caching = {'keys': 'ALL', 'rows_per_partition': 10}; - - -Other considerations: -##################### - -- Adding new columns (see ``ALTER TABLE`` below) is a constant time operation. There is thus no need to try to - anticipate future usage when creating a table. - -.. _alter-table-statement: - -ALTER TABLE -^^^^^^^^^^^ - -Altering an existing table uses the ``ALTER TABLE`` statement: - -.. productionlist:: - alter_table_statement: ALTER TABLE `table_name` `alter_table_instruction` - alter_table_instruction: ADD `column_name` `cql_type` ( ',' `column_name` `cql_type` )* - : | DROP `column_name` ( `column_name` )* - : | WITH `options` - -For instance:: - - ALTER TABLE addamsFamily ADD gravesite varchar; - - ALTER TABLE addamsFamily - WITH comment = 'A most excellent and useful table' - AND read_repair_chance = 0.2; - -The ``ALTER TABLE`` statement can: - -- Add new column(s) to the table (through the ``ADD`` instruction). Note that the primary key of a table cannot be - changed and thus newly added column will, by extension, never be part of the primary key. Also note that :ref:`compact - tables ` have restrictions regarding column addition. Note that this is constant (in the amount of - data the cluster contains) time operation. -- Remove column(s) from the table. This drops both the column and all its content, but note that while the column - becomes immediately unavailable, its content is only removed lazily during compaction. Please also see the warnings - below. Due to lazy removal, the altering itself is a constant (in the amount of data removed or contained in the - cluster) time operation. -- Change some of the table options (through the ``WITH`` instruction). The :ref:`supported options - ` are the same that when creating a table (outside of ``COMPACT STORAGE`` and ``CLUSTERING - ORDER`` that cannot be changed after creation). Note that setting any ``compaction`` sub-options has the effect of - erasing all previous ``compaction`` options, so you need to re-specify all the sub-options if you want to keep them. - The same note applies to the set of ``compression`` sub-options. - -.. warning:: Dropping a column assumes that the timestamps used for the value of this column are "real" timestamp in - microseconds. Using "real" timestamps in microseconds is the default is and is **strongly** recommended but as - Cassandra allows the client to provide any timestamp on any table it is theoretically possible to use another - convention. Please be aware that if you do so, dropping a column will not work correctly. - -.. warning:: Once a column is dropped, it is allowed to re-add a column with the same name than the dropped one - **unless** the type of the dropped column was a (non-frozen) column (due to an internal technical limitation). - - -.. _drop-table-statement: - -DROP TABLE -^^^^^^^^^^ - -Dropping a table uses the ``DROP TABLE`` statement: - -.. productionlist:: - drop_table_statement: DROP TABLE [ IF EXISTS ] `table_name` - -Dropping a table results in the immediate, irreversible removal of the table, including all data it contains. - -If the table does not exist, the statement will return an error, unless ``IF EXISTS`` is used in which case the -operation is a no-op. - -.. _truncate-statement: - -TRUNCATE -^^^^^^^^ - -A table can be truncated using the ``TRUNCATE`` statement: - -.. productionlist:: - truncate_statement: TRUNCATE [ TABLE ] `table_name` - -Note that ``TRUNCATE TABLE foo`` is allowed for consistency with other DDL statements but tables are the only object -that can be truncated currently and so the ``TABLE`` keyword can be omitted. - -Truncating a table permanently removes all existing data from the table, but without removing the table itself. diff --git a/doc/source/cql/definitions.rst b/doc/source/cql/definitions.rst deleted file mode 100644 index d4a5b59b9d55..000000000000 --- a/doc/source/cql/definitions.rst +++ /dev/null @@ -1,232 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. _UUID: https://en.wikipedia.org/wiki/Universally_unique_identifier - -.. highlight:: cql - -Definitions ------------ - -.. _conventions: - -Conventions -^^^^^^^^^^^ - -To aid in specifying the CQL syntax, we will use the following conventions in this document: - -- Language rules will be given in an informal `BNF variant - `_ notation. In particular, we'll use square brakets - (``[ item ]``) for optional items, ``*`` and ``+`` for repeated items (where ``+`` imply at least one). -- The grammar will also use the following convention for convenience: non-terminal term will be lowercase (and link to - their definition) while terminal keywords will be provided "all caps". Note however that keywords are - :ref:`identifiers` and are thus case insensitive in practice. We will also define some early construction using - regexp, which we'll indicate with ``re()``. -- The grammar is provided for documentation purposes and leave some minor details out. For instance, the comma on the - last column definition in a ``CREATE TABLE`` statement is optional but supported if present even though the grammar in - this document suggests otherwise. Also, not everything accepted by the grammar is necessarily valid CQL. -- References to keywords or pieces of CQL code in running text will be shown in a ``fixed-width font``. - - -.. _identifiers: - -Identifiers and keywords -^^^^^^^^^^^^^^^^^^^^^^^^ - -The CQL language uses *identifiers* (or *names*) to identify tables, columns and other objects. An identifier is a token -matching the regular expression ``[a-zA-Z][a-zA-Z0-9_]*``. - -A number of such identifiers, like ``SELECT`` or ``WITH``, are *keywords*. They have a fixed meaning for the language -and most are reserved. The list of those keywords can be found in :ref:`appendix-A`. - -Identifiers and (unquoted) keywords are case insensitive. Thus ``SELECT`` is the same than ``select`` or ``sElEcT``, and -``myId`` is the same than ``myid`` or ``MYID``. A convention often used (in particular by the samples of this -documentation) is to use upper case for keywords and lower case for other identifiers. - -There is a second kind of identifiers called *quoted identifiers* defined by enclosing an arbitrary sequence of -characters (non empty) in double-quotes(``"``). Quoted identifiers are never keywords. Thus ``"select"`` is not a -reserved keyword and can be used to refer to a column (note that using this is particularly advised), while ``select`` -would raise a parsing error. Also, contrarily to unquoted identifiers and keywords, quoted identifiers are case -sensitive (``"My Quoted Id"`` is *different* from ``"my quoted id"``). A fully lowercase quoted identifier that matches -``[a-zA-Z][a-zA-Z0-9_]*`` is however *equivalent* to the unquoted identifier obtained by removing the double-quote (so -``"myid"`` is equivalent to ``myid`` and to ``myId`` but different from ``"myId"``). Inside a quoted identifier, the -double-quote character can be repeated to escape it, so ``"foo "" bar"`` is a valid identifier. - -.. note:: *quoted identifiers* allows to declare columns with arbitrary names, and those can sometime clash with - specific names used by the server. For instance, when using conditional update, the server will respond with a - result-set containing a special result named ``"[applied]"``. If you’ve declared a column with such a name, this - could potentially confuse some tools and should be avoided. In general, unquoted identifiers should be preferred but - if you use quoted identifiers, it is strongly advised to avoid any name enclosed by squared brackets (like - ``"[applied]"``) and any name that looks like a function call (like ``"f(x)"``). - -More formally, we have: - -.. productionlist:: - identifier: `unquoted_identifier` | `quoted_identifier` - unquoted_identifier: re('[a-zA-Z][a-zA-Z0-9_]*') - quoted_identifier: '"' (any character where " can appear if doubled)+ '"' - -.. _constants: - -Constants -^^^^^^^^^ - -CQL defines the following kind of *constants*: - -.. productionlist:: - constant: `string` | `integer` | `float` | `boolean` | `uuid` | `blob` | NULL - string: '\'' (any character where ' can appear if doubled)+ '\'' - : '$$' (any character other than '$$') '$$' - integer: re('-?[0-9]+') - float: re('-?[0-9]+(\.[0-9]*)?([eE][+-]?[0-9+])?') | NAN | INFINITY - boolean: TRUE | FALSE - uuid: `hex`{8}-`hex`{4}-`hex`{4}-`hex`{4}-`hex`{12} - hex: re("[0-9a-fA-F]") - blob: '0' ('x' | 'X') `hex`+ - -In other words: - -- A string constant is an arbitrary sequence of characters enclosed by single-quote(``'``). A single-quote - can be included by repeating it, e.g. ``'It''s raining today'``. Those are not to be confused with quoted - :ref:`identifiers` that use double-quotes. Alternatively, a string can be defined by enclosing the arbitrary sequence - of characters by two dollar characters, in which case single-quote can be used without escaping (``$$It's raining - today$$``). That latter form is often used when defining :ref:`user-defined functions ` to avoid having to - escape single-quote characters in function body (as they are more likely to occur than ``$$``). -- Integer, float and boolean constant are defined as expected. Note however than float allows the special ``NaN`` and - ``Infinity`` constants. -- CQL supports UUID_ constants. -- Blobs content are provided in hexadecimal and prefixed by ``0x``. -- The special ``NULL`` constant denotes the absence of value. - -For how these constants are typed, see the :ref:`data-types` section. - -Terms -^^^^^ - -CQL has the notion of a *term*, which denotes the kind of values that CQL support. Terms are defined by: - -.. productionlist:: - term: `constant` | `literal` | `function_call` | `type_hint` | `bind_marker` - literal: `collection_literal` | `udt_literal` | `tuple_literal` - function_call: `identifier` '(' [ `term` (',' `term`)* ] ')' - type_hint: '(' `cql_type` `)` term - bind_marker: '?' | ':' `identifier` - -A term is thus one of: - -- A :ref:`constant `. -- A literal for either :ref:`a collection `, :ref:`a user-defined type ` or :ref:`a tuple ` - (see the linked sections for details). -- A function call: see :ref:`the section on functions ` for details on which :ref:`native function - ` exists and how to define your own :ref:`user-defined ones `. -- A *type hint*: see the :ref:`related section ` for details. -- A bind marker, which denotes a variable to be bound at execution time. See the section on :ref:`prepared-statements` - for details. A bind marker can be either anonymous (``?``) or named (``:some_name``). The latter form provides a more - convenient way to refer to the variable for binding it and should generally be preferred. - - -Comments -^^^^^^^^ - -A comment in CQL is a line beginning by either double dashes (``--``) or double slash (``//``). - -Multi-line comments are also supported through enclosure within ``/*`` and ``*/`` (but nesting is not supported). - -:: - - -- This is a comment - // This is a comment too - /* This is - a multi-line comment */ - -Statements -^^^^^^^^^^ - -CQL consists of statements that can be divided in the following categories: - -- :ref:`data-definition` statements, to define and change how the data is stored (keyspaces and tables). -- :ref:`data-manipulation` statements, for selecting, inserting and deleting data. -- :ref:`secondary-indexes` statements. -- :ref:`materialized-views` statements. -- :ref:`cql-roles` statements. -- :ref:`cql-permissions` statements. -- :ref:`User-Defined Functions ` statements. -- :ref:`udts` statements. -- :ref:`cql-triggers` statements. - -All the statements are listed below and are described in the rest of this documentation (see links above): - -.. productionlist:: - cql_statement: `statement` [ ';' ] - statement: `ddl_statement` - : | `dml_statement` - : | `secondary_index_statement` - : | `materialized_view_statement` - : | `role_or_permission_statement` - : | `udf_statement` - : | `udt_statement` - : | `trigger_statement` - ddl_statement: `use_statement` - : | `create_keyspace_statement` - : | `alter_keyspace_statement` - : | `drop_keyspace_statement` - : | `create_table_statement` - : | `alter_table_statement` - : | `drop_table_statement` - : | `truncate_statement` - dml_statement: `select_statement` - : | `insert_statement` - : | `update_statement` - : | `delete_statement` - : | `batch_statement` - secondary_index_statement: `create_index_statement` - : | `drop_index_statement` - materialized_view_statement: `create_materialized_view_statement` - : | `drop_materialized_view_statement` - role_or_permission_statement: `create_role_statement` - : | `alter_role_statement` - : | `drop_role_statement` - : | `grant_role_statement` - : | `revoke_role_statement` - : | `list_roles_statement` - : | `grant_permission_statement` - : | `revoke_permission_statement` - : | `list_permissions_statement` - : | `create_user_statement` - : | `alter_user_statement` - : | `drop_user_statement` - : | `list_users_statement` - udf_statement: `create_function_statement` - : | `drop_function_statement` - : | `create_aggregate_statement` - : | `drop_aggregate_statement` - udt_statement: `create_type_statement` - : | `alter_type_statement` - : | `drop_type_statement` - trigger_statement: `create_trigger_statement` - : | `drop_trigger_statement` - -.. _prepared-statements: - -Prepared Statements -^^^^^^^^^^^^^^^^^^^ - -CQL supports *prepared statements*. Prepared statements are an optimization that allows to parse a query only once but -execute it multiple times with different concrete values. - -Any statement that uses at least one bind marker (see :token:`bind_marker`) will need to be *prepared*. After which the statement -can be *executed* by provided concrete values for each of its marker. The exact details of how a statement is prepared -and then executed depends on the CQL driver used and you should refer to your driver documentation. diff --git a/doc/source/cql/dml.rst b/doc/source/cql/dml.rst deleted file mode 100644 index 1308de57ee87..000000000000 --- a/doc/source/cql/dml.rst +++ /dev/null @@ -1,522 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: cql - -.. _data-manipulation: - -Data Manipulation ------------------ - -This section describes the statements supported by CQL to insert, update, delete and query data. - -.. _select-statement: - -SELECT -^^^^^^ - -Querying data from data is done using a ``SELECT`` statement: - -.. productionlist:: - select_statement: SELECT [ JSON | DISTINCT ] ( `select_clause` | '*' ) - : FROM `table_name` - : [ WHERE `where_clause` ] - : [ GROUP BY `group_by_clause` ] - : [ ORDER BY `ordering_clause` ] - : [ PER PARTITION LIMIT (`integer` | `bind_marker`) ] - : [ LIMIT (`integer` | `bind_marker`) ] - : [ ALLOW FILTERING ] - select_clause: `selector` [ AS `identifier` ] ( ',' `selector` [ AS `identifier` ] ) - selector: `column_name` - : | `term` - : | CAST '(' `selector` AS `cql_type` ')' - : | `function_name` '(' [ `selector` ( ',' `selector` )* ] ')' - : | COUNT '(' '*' ')' - where_clause: `relation` ( AND `relation` )* - relation: `column_name` `operator` `term` - : '(' `column_name` ( ',' `column_name` )* ')' `operator` `tuple_literal` - : TOKEN '(' `column_name` ( ',' `column_name` )* ')' `operator` `term` - operator: '=' | '<' | '>' | '<=' | '>=' | '!=' | IN | CONTAINS | CONTAINS KEY - group_by_clause: `column_name` ( ',' `column_name` )* - ordering_clause: `column_name` [ ASC | DESC ] ( ',' `column_name` [ ASC | DESC ] )* - -For instance:: - - SELECT name, occupation FROM users WHERE userid IN (199, 200, 207); - SELECT JSON name, occupation FROM users WHERE userid = 199; - SELECT name AS user_name, occupation AS user_occupation FROM users; - - SELECT time, value - FROM events - WHERE event_type = 'myEvent' - AND time > '2011-02-03' - AND time <= '2012-01-01' - - SELECT COUNT (*) AS user_count FROM users; - -The ``SELECT`` statements reads one or more columns for one or more rows in a table. It returns a result-set of the rows -matching the request, where each row contains the values for the selection corresponding to the query. Additionally, -:ref:`functions ` including :ref:`aggregation ` ones can be applied to the result. - -A ``SELECT`` statement contains at least a :ref:`selection clause ` and the name of the table on which -the selection is on (note that CQL does **not** joins or sub-queries and thus a select statement only apply to a single -table). In most case, a select will also have a :ref:`where clause ` and it can optionally have additional -clauses to :ref:`order ` or :ref:`limit ` the results. Lastly, :ref:`queries that require -filtering ` can be allowed if the ``ALLOW FILTERING`` flag is provided. - -.. _selection-clause: - -Selection clause -~~~~~~~~~~~~~~~~ - -The :token:`select_clause` determines which columns needs to be queried and returned in the result-set, as well as any -transformation to apply to this result before returning. It consists of a comma-separated list of *selectors* or, -alternatively, of the wildcard character (``*``) to select all the columns defined in the table. - -Selectors -````````` - -A :token:`selector` can be one of: - -- A column name of the table selected, to retrieve the values for that column. -- A term, which is usually used nested inside other selectors like functions (if a term is selected directly, then the - corresponding column of the result-set will simply have the value of this term for every row returned). -- A casting, which allows to convert a nested selector to a (compatible) type. -- A function call, where the arguments are selector themselves. See the section on :ref:`functions ` for - more details. -- The special call ``COUNT(*)`` to the :ref:`COUNT function `, which counts all non-null results. - -Aliases -``````` - -Every *top-level* selector can also be aliased (using `AS`). If so, the name of the corresponding column in the result -set will be that of the alias. For instance:: - - // Without alias - SELECT intAsBlob(4) FROM t; - - // intAsBlob(4) - // -------------- - // 0x00000004 - - // With alias - SELECT intAsBlob(4) AS four FROM t; - - // four - // ------------ - // 0x00000004 - -.. note:: Currently, aliases aren't recognized anywhere else in the statement where they are used (not in the ``WHERE`` - clause, not in the ``ORDER BY`` clause, ...). You must use the orignal column name instead. - - -``WRITETIME`` and ``TTL`` function -``````````````````````````````````` - -Selection supports two special functions (that aren't allowed anywhere else): ``WRITETIME`` and ``TTL``. Both function -take only one argument and that argument *must* be a column name (so for instance ``TTL(3)`` is invalid). - -Those functions allow to retrieve meta-information that are stored internally for each column, namely: - -- the timestamp of the value of the column for ``WRITETIME``. -- the remaining time to live (in seconds) for the value of the column if it set to expire (and ``null`` otherwise). - -.. _where-clause: - -The ``WHERE`` clause -~~~~~~~~~~~~~~~~~~~~ - -The ``WHERE`` clause specifies which rows must be queried. It is composed of relations on the columns that are part of -the ``PRIMARY KEY`` and/or have a `secondary index <#createIndexStmt>`__ defined on them. - -Not all relations are allowed in a query. For instance, non-equal relations (where ``IN`` is considered as an equal -relation) on a partition key are not supported (but see the use of the ``TOKEN`` method below to do non-equal queries on -the partition key). Moreover, for a given partition key, the clustering columns induce an ordering of rows and relations -on them is restricted to the relations that allow to select a **contiguous** (for the ordering) set of rows. For -instance, given:: - - CREATE TABLE posts ( - userid text, - blog_title text, - posted_at timestamp, - entry_title text, - content text, - category int, - PRIMARY KEY (userid, blog_title, posted_at) - ) - -The following query is allowed:: - - SELECT entry_title, content FROM posts - WHERE userid = 'john doe' - AND blog_title='John''s Blog' - AND posted_at >= '2012-01-01' AND posted_at < '2012-01-31' - -But the following one is not, as it does not select a contiguous set of rows (and we suppose no secondary indexes are -set):: - - // Needs a blog_title to be set to select ranges of posted_at - SELECT entry_title, content FROM posts - WHERE userid = 'john doe' - AND posted_at >= '2012-01-01' AND posted_at < '2012-01-31' - -When specifying relations, the ``TOKEN`` function can be used on the ``PARTITION KEY`` column to query. In that case, -rows will be selected based on the token of their ``PARTITION_KEY`` rather than on the value. Note that the token of a -key depends on the partitioner in use, and that in particular the RandomPartitioner won't yield a meaningful order. Also -note that ordering partitioners always order token values by bytes (so even if the partition key is of type int, -``token(-1) > token(0)`` in particular). Example:: - - SELECT * FROM posts - WHERE token(userid) > token('tom') AND token(userid) < token('bob') - -Moreover, the ``IN`` relation is only allowed on the last column of the partition key and on the last column of the full -primary key. - -It is also possible to “group” ``CLUSTERING COLUMNS`` together in a relation using the tuple notation. For instance:: - - SELECT * FROM posts - WHERE userid = 'john doe' - AND (blog_title, posted_at) > ('John''s Blog', '2012-01-01') - -will request all rows that sorts after the one having “John's Blog” as ``blog_tile`` and '2012-01-01' for ``posted_at`` -in the clustering order. In particular, rows having a ``post_at <= '2012-01-01'`` will be returned as long as their -``blog_title > 'John''s Blog'``, which would not be the case for:: - - SELECT * FROM posts - WHERE userid = 'john doe' - AND blog_title > 'John''s Blog' - AND posted_at > '2012-01-01' - -The tuple notation may also be used for ``IN`` clauses on clustering columns:: - - SELECT * FROM posts - WHERE userid = 'john doe' - AND (blog_title, posted_at) IN (('John''s Blog', '2012-01-01'), ('Extreme Chess', '2014-06-01')) - -The ``CONTAINS`` operator may only be used on collection columns (lists, sets, and maps). In the case of maps, -``CONTAINS`` applies to the map values. The ``CONTAINS KEY`` operator may only be used on map columns and applies to the -map keys. - -.. _group-by-clause: - -Grouping results -~~~~~~~~~~~~~~~~ - -The ``GROUP BY`` option allows to condense into a single row all selected rows that share the same values for a set -of columns. - -Using the ``GROUP BY`` option, it is only possible to group rows at the partition key level or at a clustering column -level. By consequence, the ``GROUP BY`` option only accept as arguments primary key column names in the primary key -order. If a primary key column is restricted by an equality restriction it is not required to be present in the -``GROUP BY`` clause. - -Aggregate functions will produce a separate value for each group. If no ``GROUP BY`` clause is specified, -aggregates functions will produce a single value for all the rows. - -If a column is selected without an aggregate function, in a statement with a ``GROUP BY``, the first value encounter -in each group will be returned. - -.. _ordering-clause: - -Ordering results -~~~~~~~~~~~~~~~~ - -The ``ORDER BY`` clause allows to select the order of the returned results. It takes as argument a list of column names -along with the order for the column (``ASC`` for ascendant and ``DESC`` for descendant, omitting the order being -equivalent to ``ASC``). Currently the possible orderings are limited by the :ref:`clustering order ` -defined on the table: - -- if the table has been defined without any specific ``CLUSTERING ORDER``, then then allowed orderings are the order - induced by the clustering columns and the reverse of that one. -- otherwise, the orderings allowed are the order of the ``CLUSTERING ORDER`` option and the reversed one. - -.. _limit-clause: - -Limiting results -~~~~~~~~~~~~~~~~ - -The ``LIMIT`` option to a ``SELECT`` statement limits the number of rows returned by a query, while the ``PER PARTITION -LIMIT`` option limits the number of rows returned for a given partition by the query. Note that both type of limit can -used in the same statement. - -.. _allow-filtering: - -Allowing filtering -~~~~~~~~~~~~~~~~~~ - -By default, CQL only allows select queries that don't involve “filtering” server side, i.e. queries where we know that -all (live) record read will be returned (maybe partly) in the result set. The reasoning is that those “non filtering” -queries have predictable performance in the sense that they will execute in a time that is proportional to the amount of -data **returned** by the query (which can be controlled through ``LIMIT``). - -The ``ALLOW FILTERING`` option allows to explicitly allow (some) queries that require filtering. Please note that a -query using ``ALLOW FILTERING`` may thus have unpredictable performance (for the definition above), i.e. even a query -that selects a handful of records **may** exhibit performance that depends on the total amount of data stored in the -cluster. - -For instance, considering the following table holding user profiles with their year of birth (with a secondary index on -it) and country of residence:: - - CREATE TABLE users ( - username text PRIMARY KEY, - firstname text, - lastname text, - birth_year int, - country text - ) - - CREATE INDEX ON users(birth_year); - -Then the following queries are valid:: - - SELECT * FROM users; - SELECT * FROM users WHERE birth_year = 1981; - -because in both case, Cassandra guarantees that these queries performance will be proportional to the amount of data -returned. In particular, if no users are born in 1981, then the second query performance will not depend of the number -of user profile stored in the database (not directly at least: due to secondary index implementation consideration, this -query may still depend on the number of node in the cluster, which indirectly depends on the amount of data stored. -Nevertheless, the number of nodes will always be multiple number of magnitude lower than the number of user profile -stored). Of course, both query may return very large result set in practice, but the amount of data returned can always -be controlled by adding a ``LIMIT``. - -However, the following query will be rejected:: - - SELECT * FROM users WHERE birth_year = 1981 AND country = 'FR'; - -because Cassandra cannot guarantee that it won't have to scan large amount of data even if the result to those query is -small. Typically, it will scan all the index entries for users born in 1981 even if only a handful are actually from -France. However, if you “know what you are doing”, you can force the execution of this query by using ``ALLOW -FILTERING`` and so the following query is valid:: - - SELECT * FROM users WHERE birth_year = 1981 AND country = 'FR' ALLOW FILTERING; - -.. _insert-statement: - -INSERT -^^^^^^ - -Inserting data for a row is done using an ``INSERT`` statement: - -.. productionlist:: - insert_statement: INSERT INTO `table_name` ( `names_values` | `json_clause` ) - : [ IF NOT EXISTS ] - : [ USING `update_parameter` ( AND `update_parameter` )* ] - names_values: `names` VALUES `tuple_literal` - json_clause: JSON `string` [ DEFAULT ( NULL | UNSET ) ] - names: '(' `column_name` ( ',' `column_name` )* ')' - -For instance:: - - INSERT INTO NerdMovies (movie, director, main_actor, year) - VALUES ('Serenity', 'Joss Whedon', 'Nathan Fillion', 2005) - USING TTL 86400; - - INSERT INTO NerdMovies JSON '{"movie": "Serenity", - "director": "Joss Whedon", - "year": 2005}'; - -The ``INSERT`` statement writes one or more columns for a given row in a table. Note that since a row is identified by -its ``PRIMARY KEY``, at least the columns composing it must be specified. The list of columns to insert to must be -supplied when using the ``VALUES`` syntax. When using the ``JSON`` syntax, they are optional. See the -section on :ref:`JSON support ` for more detail. - -Note that unlike in SQL, ``INSERT`` does not check the prior existence of the row by default: the row is created if none -existed before, and updated otherwise. Furthermore, there is no mean to know which of creation or update happened. - -It is however possible to use the ``IF NOT EXISTS`` condition to only insert if the row does not exist prior to the -insertion. But please note that using ``IF NOT EXISTS`` will incur a non negligible performance cost (internally, Paxos -will be used) so this should be used sparingly. - -All updates for an ``INSERT`` are applied atomically and in isolation. - -Please refer to the :ref:`UPDATE ` section for informations on the :token:`update_parameter`. - -Also note that ``INSERT`` does not support counters, while ``UPDATE`` does. - -.. _update-statement: - -UPDATE -^^^^^^ - -Updating a row is done using an ``UPDATE`` statement: - -.. productionlist:: - update_statement: UPDATE `table_name` - : [ USING `update_parameter` ( AND `update_parameter` )* ] - : SET `assignment` ( ',' `assignment` )* - : WHERE `where_clause` - : [ IF ( EXISTS | `condition` ( AND `condition` )*) ] - update_parameter: ( TIMESTAMP | TTL ) ( `integer` | `bind_marker` ) - assignment: `simple_selection` '=' `term` - :| `column_name` '=' `column_name` ( '+' | '-' ) `term` - :| `column_name` '=' `list_literal` '+' `column_name` - simple_selection: `column_name` - :| `column_name` '[' `term` ']' - :| `column_name` '.' `field_name - condition: `simple_selection` `operator` `term` - -For instance:: - - UPDATE NerdMovies USING TTL 400 - SET director = 'Joss Whedon', - main_actor = 'Nathan Fillion', - year = 2005 - WHERE movie = 'Serenity'; - - UPDATE UserActions - SET total = total + 2 - WHERE user = B70DE1D0-9908-4AE3-BE34-5573E5B09F14 - AND action = 'click'; - -The ``UPDATE`` statement writes one or more columns for a given row in a table. The :token:`where_clause` is used to -select the row to update and must include all columns composing the ``PRIMARY KEY``. Non primary key columns are then -set using the ``SET`` keyword. - -Note that unlike in SQL, ``UPDATE`` does not check the prior existence of the row by default (except through ``IF``, see -below): the row is created if none existed before, and updated otherwise. Furthermore, there are no means to know -whether a creation or update occurred. - -It is however possible to use the conditions on some columns through ``IF``, in which case the row will not be updated -unless the conditions are met. But, please note that using ``IF`` conditions will incur a non-negligible performance -cost (internally, Paxos will be used) so this should be used sparingly. - -In an ``UPDATE`` statement, all updates within the same partition key are applied atomically and in isolation. - -Regarding the :token:`assignment`: - -- ``c = c + 3`` is used to increment/decrement counters. The column name after the '=' sign **must** be the same than - the one before the '=' sign. Note that increment/decrement is only allowed on counters, and are the *only* update - operations allowed on counters. See the section on :ref:`counters ` for details. -- ``id = id + `` and ``id[value1] = value2`` are for collections, see the :ref:`relevant section - ` for details. -- ``id.field = 3`` is for setting the value of a field on a non-frozen user-defined types. see the :ref:`relevant section - ` for details. - -.. _update-parameters: - -Update parameters -~~~~~~~~~~~~~~~~~ - -The ``UPDATE``, ``INSERT`` (and ``DELETE`` and ``BATCH`` for the ``TIMESTAMP``) statements support the following -parameters: - -- ``TIMESTAMP``: sets the timestamp for the operation. If not specified, the coordinator will use the current time (in - microseconds) at the start of statement execution as the timestamp. This is usually a suitable default. -- ``TTL``: specifies an optional Time To Live (in seconds) for the inserted values. If set, the inserted values are - automatically removed from the database after the specified time. Note that the TTL concerns the inserted values, not - the columns themselves. This means that any subsequent update of the column will also reset the TTL (to whatever TTL - is specified in that update). By default, values never expire. A TTL of 0 is equivalent to no TTL. If the table has a - default_time_to_live, a TTL of 0 will remove the TTL for the inserted or updated values. A TTL of ``null`` is equivalent - to inserting with a TTL of 0. - -.. _delete_statement: - -DELETE -^^^^^^ - -Deleting rows or parts of rows uses the ``DELETE`` statement: - -.. productionlist:: - delete_statement: DELETE [ `simple_selection` ( ',' `simple_selection` ) ] - : FROM `table_name` - : [ USING `update_parameter` ( AND `update_parameter` )* ] - : WHERE `where_clause` - : [ IF ( EXISTS | `condition` ( AND `condition` )*) ] - -For instance:: - - DELETE FROM NerdMovies USING TIMESTAMP 1240003134 - WHERE movie = 'Serenity'; - - DELETE phone FROM Users - WHERE userid IN (C73DE1D3-AF08-40F3-B124-3FF3E5109F22, B70DE1D0-9908-4AE3-BE34-5573E5B09F14); - -The ``DELETE`` statement deletes columns and rows. If column names are provided directly after the ``DELETE`` keyword, -only those columns are deleted from the row indicated by the ``WHERE`` clause. Otherwise, whole rows are removed. - -The ``WHERE`` clause specifies which rows are to be deleted. Multiple rows may be deleted with one statement by using an -``IN`` operator. A range of rows may be deleted using an inequality operator (such as ``>=``). - -``DELETE`` supports the ``TIMESTAMP`` option with the same semantics as in :ref:`updates `. - -In a ``DELETE`` statement, all deletions within the same partition key are applied atomically and in isolation. - -A ``DELETE`` operation can be conditional through the use of an ``IF`` clause, similar to ``UPDATE`` and ``INSERT`` -statements. However, as with ``INSERT`` and ``UPDATE`` statements, this will incur a non-negligible performance cost -(internally, Paxos will be used) and so should be used sparingly. - -.. _batch_statement: - -BATCH -^^^^^ - -Multiple ``INSERT``, ``UPDATE`` and ``DELETE`` can be executed in a single statement by grouping them through a -``BATCH`` statement: - -.. productionlist:: - batch_statement: BEGIN [ UNLOGGED | COUNTER ] BATCH - : [ USING `update_parameter` ( AND `update_parameter` )* ] - : `modification_statement` ( ';' `modification_statement` )* - : APPLY BATCH - modification_statement: `insert_statement` | `update_statement` | `delete_statement` - -For instance:: - - BEGIN BATCH - INSERT INTO users (userid, password, name) VALUES ('user2', 'ch@ngem3b', 'second user'); - UPDATE users SET password = 'ps22dhds' WHERE userid = 'user3'; - INSERT INTO users (userid, password) VALUES ('user4', 'ch@ngem3c'); - DELETE name FROM users WHERE userid = 'user1'; - APPLY BATCH; - -The ``BATCH`` statement group multiple modification statements (insertions/updates and deletions) into a single -statement. It serves several purposes: - -- It saves network round-trips between the client and the server (and sometimes between the server coordinator and the - replicas) when batching multiple updates. -- All updates in a ``BATCH`` belonging to a given partition key are performed in isolation. -- By default, all operations in the batch are performed as *logged*, to ensure all mutations eventually complete (or - none will). See the notes on :ref:`UNLOGGED batches ` for more details. - -Note that: - -- ``BATCH`` statements may only contain ``UPDATE``, ``INSERT`` and ``DELETE`` statements (not other batches for instance). -- Batches are *not* a full analogue for SQL transactions. -- If a timestamp is not specified for each operation, then all operations will be applied with the same timestamp - (either one generated automatically, or the timestamp provided at the batch level). Due to Cassandra's conflict - resolution procedure in the case of `timestamp ties `__, operations may - be applied in an order that is different from the order they are listed in the ``BATCH`` statement. To force a - particular operation ordering, you must specify per-operation timestamps. -- A LOGGED batch to a single partition will be converted to an UNLOGGED batch as an optimization. - -.. _unlogged-batches: - -``UNLOGGED`` batches -~~~~~~~~~~~~~~~~~~~~ - -By default, Cassandra uses a batch log to ensure all operations in a batch eventually complete or none will (note -however that operations are only isolated within a single partition). - -There is a performance penalty for batch atomicity when a batch spans multiple partitions. If you do not want to incur -this penalty, you can tell Cassandra to skip the batchlog with the ``UNLOGGED`` option. If the ``UNLOGGED`` option is -used, a failed batch might leave the patch only partly applied. - -``COUNTER`` batches -~~~~~~~~~~~~~~~~~~~ - -Use the ``COUNTER`` option for batched counter updates. Unlike other -updates in Cassandra, counter updates are not idempotent. diff --git a/doc/source/cql/functions.rst b/doc/source/cql/functions.rst deleted file mode 100644 index 47026cd94b05..000000000000 --- a/doc/source/cql/functions.rst +++ /dev/null @@ -1,558 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: cql - -.. _cql-functions: - -.. Need some intro for UDF and native functions in general and point those to it. -.. _udfs: -.. _native-functions: - -Functions ---------- - -CQL supports 2 main categories of functions: - -- the :ref:`scalar functions `, which simply take a number of values and produce an output with it. -- the :ref:`aggregate functions `, which are used to aggregate multiple rows results from a - ``SELECT`` statement. - -In both cases, CQL provides a number of native "hard-coded" functions as well as the ability to create new user-defined -functions. - -.. note:: By default, the use of user-defined functions is disabled by default for security concerns (even when - enabled, the execution of user-defined functions is sandboxed and a "rogue" function should not be allowed to do - evil, but no sandbox is perfect so using user-defined functions is opt-in). See the ``enable_user_defined_functions`` - in ``cassandra.yaml`` to enable them. - -A function is identifier by its name: - -.. productionlist:: - function_name: [ `keyspace_name` '.' ] `name` - -.. _scalar-functions: - -Scalar functions -^^^^^^^^^^^^^^^^ - -.. _scalar-native-functions: - -Native functions -~~~~~~~~~~~~~~~~ - -Cast -```` - -The ``cast`` function can be used to converts one native datatype to another. - -The following table describes the conversions supported by the ``cast`` function. Cassandra will silently ignore any -cast converting a datatype into its own datatype. - -=============== ======================================================================================================= - From To -=============== ======================================================================================================= - ``ascii`` ``text``, ``varchar`` - ``bigint`` ``tinyint``, ``smallint``, ``int``, ``float``, ``double``, ``decimal``, ``varint``, ``text``, - ``varchar`` - ``boolean`` ``text``, ``varchar`` - ``counter`` ``tinyint``, ``smallint``, ``int``, ``bigint``, ``float``, ``double``, ``decimal``, ``varint``, - ``text``, ``varchar`` - ``date`` ``timestamp`` - ``decimal`` ``tinyint``, ``smallint``, ``int``, ``bigint``, ``float``, ``double``, ``varint``, ``text``, - ``varchar`` - ``double`` ``tinyint``, ``smallint``, ``int``, ``bigint``, ``float``, ``decimal``, ``varint``, ``text``, - ``varchar`` - ``float`` ``tinyint``, ``smallint``, ``int``, ``bigint``, ``double``, ``decimal``, ``varint``, ``text``, - ``varchar`` - ``inet`` ``text``, ``varchar`` - ``int`` ``tinyint``, ``smallint``, ``bigint``, ``float``, ``double``, ``decimal``, ``varint``, ``text``, - ``varchar`` - ``smallint`` ``tinyint``, ``int``, ``bigint``, ``float``, ``double``, ``decimal``, ``varint``, ``text``, - ``varchar`` - ``time`` ``text``, ``varchar`` - ``timestamp`` ``date``, ``text``, ``varchar`` - ``timeuuid`` ``timestamp``, ``date``, ``text``, ``varchar`` - ``tinyint`` ``tinyint``, ``smallint``, ``int``, ``bigint``, ``float``, ``double``, ``decimal``, ``varint``, - ``text``, ``varchar`` - ``uuid`` ``text``, ``varchar`` - ``varint`` ``tinyint``, ``smallint``, ``int``, ``bigint``, ``float``, ``double``, ``decimal``, ``text``, - ``varchar`` -=============== ======================================================================================================= - -The conversions rely strictly on Java's semantics. For example, the double value 1 will be converted to the text value -'1.0'. For instance:: - - SELECT avg(cast(count as double)) FROM myTable - -Token -````` - -The ``token`` function allows to compute the token for a given partition key. The exact signature of the token function -depends on the table concerned and of the partitioner used by the cluster. - -The type of the arguments of the ``token`` depend on the type of the partition key columns. The return type depend on -the partitioner in use: - -- For Murmur3Partitioner, the return type is ``bigint``. -- For RandomPartitioner, the return type is ``varint``. -- For ByteOrderedPartitioner, the return type is ``blob``. - -For instance, in a cluster using the default Murmur3Partitioner, if a table is defined by:: - - CREATE TABLE users ( - userid text PRIMARY KEY, - username text, - ) - -then the ``token`` function will take a single argument of type ``text`` (in that case, the partition key is ``userid`` -(there is no clustering columns so the partition key is the same than the primary key)), and the return type will be -``bigint``. - -Uuid -```` -The ``uuid`` function takes no parameters and generates a random type 4 uuid suitable for use in ``INSERT`` or -``UPDATE`` statements. - -.. _timeuuid-functions: - -Timeuuid functions -`````````````````` - -``now`` -####### - -The ``now`` function takes no arguments and generates, on the coordinator node, a new unique timeuuid (at the time where -the statement using it is executed). Note that this method is useful for insertion but is largely non-sensical in -``WHERE`` clauses. For instance, a query of the form:: - - SELECT * FROM myTable WHERE t = now() - -will never return any result by design, since the value returned by ``now()`` is guaranteed to be unique. - -``minTimeuuid`` and ``maxTimeuuid`` -################################### - -The ``minTimeuuid`` (resp. ``maxTimeuuid``) function takes a ``timestamp`` value ``t`` (which can be `either a timestamp -or a date string `) and return a *fake* ``timeuuid`` corresponding to the *smallest* (resp. *biggest*) -possible ``timeuuid`` having for timestamp ``t``. So for instance:: - - SELECT * FROM myTable - WHERE t > maxTimeuuid('2013-01-01 00:05+0000') - AND t < minTimeuuid('2013-02-02 10:00+0000') - -will select all rows where the ``timeuuid`` column ``t`` is strictly older than ``'2013-01-01 00:05+0000'`` but strictly -younger than ``'2013-02-02 10:00+0000'``. Please note that ``t >= maxTimeuuid('2013-01-01 00:05+0000')`` would still -*not* select a ``timeuuid`` generated exactly at '2013-01-01 00:05+0000' and is essentially equivalent to ``t > -maxTimeuuid('2013-01-01 00:05+0000')``. - -.. note:: We called the values generated by ``minTimeuuid`` and ``maxTimeuuid`` *fake* UUID because they do no respect - the Time-Based UUID generation process specified by the `RFC 4122 `__. In - particular, the value returned by these 2 methods will not be unique. This means you should only use those methods - for querying (as in the example above). Inserting the result of those methods is almost certainly *a bad idea*. - -Time conversion functions -````````````````````````` - -A number of functions are provided to “convert” a ``timeuuid``, a ``timestamp`` or a ``date`` into another ``native`` -type. - -===================== =============== =================================================================== - Function name Input type Description -===================== =============== =================================================================== - ``toDate`` ``timeuuid`` Converts the ``timeuuid`` argument into a ``date`` type - ``toDate`` ``timestamp`` Converts the ``timestamp`` argument into a ``date`` type - ``toTimestamp`` ``timeuuid`` Converts the ``timeuuid`` argument into a ``timestamp`` type - ``toTimestamp`` ``date`` Converts the ``date`` argument into a ``timestamp`` type - ``toUnixTimestamp`` ``timeuuid`` Converts the ``timeuuid`` argument into a ``bigInt`` raw value - ``toUnixTimestamp`` ``timestamp`` Converts the ``timestamp`` argument into a ``bigInt`` raw value - ``toUnixTimestamp`` ``date`` Converts the ``date`` argument into a ``bigInt`` raw value - ``dateOf`` ``timeuuid`` Similar to ``toTimestamp(timeuuid)`` (DEPRECATED) - ``unixTimestampOf`` ``timeuuid`` Similar to ``toUnixTimestamp(timeuuid)`` (DEPRECATED) -===================== =============== =================================================================== - -Blob conversion functions -````````````````````````` -A number of functions are provided to “convert” the native types into binary data (``blob``). For every -```` ``type`` supported by CQL (a notable exceptions is ``blob``, for obvious reasons), the function -``typeAsBlob`` takes a argument of type ``type`` and return it as a ``blob``. Conversely, the function ``blobAsType`` -takes a 64-bit ``blob`` argument and convert it to a ``bigint`` value. And so for instance, ``bigintAsBlob(3)`` is -``0x0000000000000003`` and ``blobAsBigint(0x0000000000000003)`` is ``3``. - -.. _user-defined-scalar-functions: - -User-defined functions -~~~~~~~~~~~~~~~~~~~~~~ - -User-defined functions allow execution of user-provided code in Cassandra. By default, Cassandra supports defining -functions in *Java* and *JavaScript*. Support for other JSR 223 compliant scripting languages (such as Python, Ruby, and -Scala) can be added by adding a JAR to the classpath. - -UDFs are part of the Cassandra schema. As such, they are automatically propagated to all nodes in the cluster. - -UDFs can be *overloaded* - i.e. multiple UDFs with different argument types but the same function name. Example:: - - CREATE FUNCTION sample ( arg int ) ...; - CREATE FUNCTION sample ( arg text ) ...; - -User-defined functions are susceptible to all of the normal problems with the chosen programming language. Accordingly, -implementations should be safe against null pointer exceptions, illegal arguments, or any other potential source of -exceptions. An exception during function execution will result in the entire statement failing. - -It is valid to use *complex* types like collections, tuple types and user-defined types as argument and return types. -Tuple types and user-defined types are handled by the conversion functions of the DataStax Java Driver. Please see the -documentation of the Java Driver for details on handling tuple types and user-defined types. - -Arguments for functions can be literals or terms. Prepared statement placeholders can be used, too. - -Note that you can use the double-quoted string syntax to enclose the UDF source code. For example:: - - CREATE FUNCTION some_function ( arg int ) - RETURNS NULL ON NULL INPUT - RETURNS int - LANGUAGE java - AS $$ return arg; $$; - - SELECT some_function(column) FROM atable ...; - UPDATE atable SET col = some_function(?) ...; - - CREATE TYPE custom_type (txt text, i int); - CREATE FUNCTION fct_using_udt ( udtarg frozen ) - RETURNS NULL ON NULL INPUT - RETURNS text - LANGUAGE java - AS $$ return udtarg.getString("txt"); $$; - -User-defined functions can be used in ``SELECT``, ``INSERT`` and ``UPDATE`` statements. - -The implicitly available ``udfContext`` field (or binding for script UDFs) provides the necessary functionality to -create new UDT and tuple values:: - - CREATE TYPE custom_type (txt text, i int); - CREATE FUNCTION fct\_using\_udt ( somearg int ) - RETURNS NULL ON NULL INPUT - RETURNS custom_type - LANGUAGE java - AS $$ - UDTValue udt = udfContext.newReturnUDTValue(); - udt.setString("txt", "some string"); - udt.setInt("i", 42); - return udt; - $$; - -The definition of the ``UDFContext`` interface can be found in the Apache Cassandra source code for -``org.apache.cassandra.cql3.functions.UDFContext``. - -.. code-block:: java - - public interface UDFContext - { - UDTValue newArgUDTValue(String argName); - UDTValue newArgUDTValue(int argNum); - UDTValue newReturnUDTValue(); - UDTValue newUDTValue(String udtName); - TupleValue newArgTupleValue(String argName); - TupleValue newArgTupleValue(int argNum); - TupleValue newReturnTupleValue(); - TupleValue newTupleValue(String cqlDefinition); - } - -Java UDFs already have some imports for common interfaces and classes defined. These imports are: - -.. code-block:: java - - import java.nio.ByteBuffer; - import java.util.List; - import java.util.Map; - import java.util.Set; - import org.apache.cassandra.cql3.functions.UDFContext; - import com.datastax.driver.core.TypeCodec; - import com.datastax.driver.core.TupleValue; - import com.datastax.driver.core.UDTValue; - -Please note, that these convenience imports are not available for script UDFs. - -.. _create-function-statement: - -CREATE FUNCTION -``````````````` - -Creating a new user-defined function uses the ``CREATE FUNCTION`` statement: - -.. productionlist:: - create_function_statement: CREATE [ OR REPLACE ] FUNCTION [ IF NOT EXISTS] - : `function_name` '(' `arguments_declaration` ')' - : [ CALLED | RETURNS NULL ] ON NULL INPUT - : RETURNS `cql_type` - : LANGUAGE `identifier` - : AS `string` - arguments_declaration: `identifier` `cql_type` ( ',' `identifier` `cql_type` )* - -For instance:: - - CREATE OR REPLACE FUNCTION somefunction(somearg int, anotherarg text, complexarg frozen, listarg list) - RETURNS NULL ON NULL INPUT - RETURNS text - LANGUAGE java - AS $$ - // some Java code - $$; - - CREATE FUNCTION IF NOT EXISTS akeyspace.fname(someArg int) - CALLED ON NULL INPUT - RETURNS text - LANGUAGE java - AS $$ - // some Java code - $$; - -``CREATE FUNCTION`` with the optional ``OR REPLACE`` keywords either creates a function or replaces an existing one with -the same signature. A ``CREATE FUNCTION`` without ``OR REPLACE`` fails if a function with the same signature already -exists. - -If the optional ``IF NOT EXISTS`` keywords are used, the function will -only be created if another function with the same signature does not -exist. - -``OR REPLACE`` and ``IF NOT EXISTS`` cannot be used together. - -Behavior on invocation with ``null`` values must be defined for each -function. There are two options: - -#. ``RETURNS NULL ON NULL INPUT`` declares that the function will always - return ``null`` if any of the input arguments is ``null``. -#. ``CALLED ON NULL INPUT`` declares that the function will always be - executed. - -Function Signature -################## - -Signatures are used to distinguish individual functions. The signature consists of: - -#. The fully qualified function name - i.e *keyspace* plus *function-name* -#. The concatenated list of all argument types - -Note that keyspace names, function names and argument types are subject to the default naming conventions and -case-sensitivity rules. - -Functions belong to a keyspace. If no keyspace is specified in ````, the current keyspace is used (i.e. -the keyspace specified using the ``USE`` statement). It is not possible to create a user-defined function in one of the -system keyspaces. - -.. _drop-function-statement: - -DROP FUNCTION -````````````` - -Dropping a function uses the ``DROP FUNCTION`` statement: - -.. productionlist:: - drop_function_statement: DROP FUNCTION [ IF EXISTS ] `function_name` [ '(' `arguments_signature` ')' ] - arguments_signature: `cql_type` ( ',' `cql_type` )* - -For instance:: - - DROP FUNCTION myfunction; - DROP FUNCTION mykeyspace.afunction; - DROP FUNCTION afunction ( int ); - DROP FUNCTION afunction ( text ); - -You must specify the argument types (:token:`arguments_signature`) of the function to drop if there are multiple -functions with the same name but a different signature (overloaded functions). - -``DROP FUNCTION`` with the optional ``IF EXISTS`` keywords drops a function if it exists, but does not throw an error if -it doesn't - -.. _aggregate-functions: - -Aggregate functions -^^^^^^^^^^^^^^^^^^^ - -Aggregate functions work on a set of rows. They receive values for each row and returns one value for the whole set. - -If ``normal`` columns, ``scalar functions``, ``UDT`` fields, ``writetime`` or ``ttl`` are selected together with -aggregate functions, the values returned for them will be the ones of the first row matching the query. - -Native aggregates -~~~~~~~~~~~~~~~~~ - -.. _count-function: - -Count -````` - -The ``count`` function can be used to count the rows returned by a query. Example:: - - SELECT COUNT (*) FROM plays; - SELECT COUNT (1) FROM plays; - -It also can be used to count the non null value of a given column:: - - SELECT COUNT (scores) FROM plays; - -Max and Min -``````````` - -The ``max`` and ``min`` functions can be used to compute the maximum and the minimum value returned by a query for a -given column. For instance:: - - SELECT MIN (players), MAX (players) FROM plays WHERE game = 'quake'; - -Sum -``` - -The ``sum`` function can be used to sum up all the values returned by a query for a given column. For instance:: - - SELECT SUM (players) FROM plays; - -Avg -``` - -The ``avg`` function can be used to compute the average of all the values returned by a query for a given column. For -instance:: - - SELECT AVG (players) FROM plays; - -.. _user-defined-aggregates-functions: - -User-Defined Aggregates -~~~~~~~~~~~~~~~~~~~~~~~ - -User-defined aggregates allow the creation of custom aggregate functions. Common examples of aggregate functions are -*count*, *min*, and *max*. - -Each aggregate requires an *initial state* (``INITCOND``, which defaults to ``null``) of type ``STYPE``. The first -argument of the state function must have type ``STYPE``. The remaining arguments of the state function must match the -types of the user-defined aggregate arguments. The state function is called once for each row, and the value returned by -the state function becomes the new state. After all rows are processed, the optional ``FINALFUNC`` is executed with last -state value as its argument. - -``STYPE`` is mandatory in order to be able to distinguish possibly overloaded versions of the state and/or final -function (since the overload can appear after creation of the aggregate). - -User-defined aggregates can be used in ``SELECT`` statement. - -A complete working example for user-defined aggregates (assuming that a keyspace has been selected using the ``USE`` -statement):: - - CREATE OR REPLACE FUNCTION averageState(state tuple, val int) - CALLED ON NULL INPUT - RETURNS tuple - LANGUAGE java - AS $$ - if (val != null) { - state.setInt(0, state.getInt(0)+1); - state.setLong(1, state.getLong(1)+val.intValue()); - } - return state; - $$; - - CREATE OR REPLACE FUNCTION averageFinal (state tuple) - CALLED ON NULL INPUT - RETURNS double - LANGUAGE java - AS $$ - double r = 0; - if (state.getInt(0) == 0) return null; - r = state.getLong(1); - r /= state.getInt(0); - return Double.valueOf(r); - $$; - - CREATE OR REPLACE AGGREGATE average(int) - SFUNC averageState - STYPE tuple - FINALFUNC averageFinal - INITCOND (0, 0); - - CREATE TABLE atable ( - pk int PRIMARY KEY, - val int - ); - - INSERT INTO atable (pk, val) VALUES (1,1); - INSERT INTO atable (pk, val) VALUES (2,2); - INSERT INTO atable (pk, val) VALUES (3,3); - INSERT INTO atable (pk, val) VALUES (4,4); - - SELECT average(val) FROM atable; - -.. _create-aggregate-statement: - -CREATE AGGREGATE -```````````````` - -Creating (or replacing) a user-defined aggregate function uses the ``CREATE AGGREGATE`` statement: - -.. productionlist:: - create_aggregate_statement: CREATE [ OR REPLACE ] AGGREGATE [ IF NOT EXISTS ] - : `function_name` '(' `arguments_signature` ')' - : SFUNC `function_name` - : STYPE `cql_type` - : [ FINALFUNC `function_name` ] - : [ INITCOND `term` ] - -See above for a complete example. - -``CREATE AGGREGATE`` with the optional ``OR REPLACE`` keywords either creates an aggregate or replaces an existing one -with the same signature. A ``CREATE AGGREGATE`` without ``OR REPLACE`` fails if an aggregate with the same signature -already exists. - -``CREATE AGGREGATE`` with the optional ``IF NOT EXISTS`` keywords either creates an aggregate if it does not already -exist. - -``OR REPLACE`` and ``IF NOT EXISTS`` cannot be used together. - -``STYPE`` defines the type of the state value and must be specified. - -The optional ``INITCOND`` defines the initial state value for the aggregate. It defaults to ``null``. A non-\ ``null`` -``INITCOND`` must be specified for state functions that are declared with ``RETURNS NULL ON NULL INPUT``. - -``SFUNC`` references an existing function to be used as the state modifying function. The type of first argument of the -state function must match ``STYPE``. The remaining argument types of the state function must match the argument types of -the aggregate function. State is not updated for state functions declared with ``RETURNS NULL ON NULL INPUT`` and called -with ``null``. - -The optional ``FINALFUNC`` is called just before the aggregate result is returned. It must take only one argument with -type ``STYPE``. The return type of the ``FINALFUNC`` may be a different type. A final function declared with ``RETURNS -NULL ON NULL INPUT`` means that the aggregate's return value will be ``null``, if the last state is ``null``. - -If no ``FINALFUNC`` is defined, the overall return type of the aggregate function is ``STYPE``. If a ``FINALFUNC`` is -defined, it is the return type of that function. - -.. _drop-aggregate-statement: - -DROP AGGREGATE -`````````````` - -Dropping an user-defined aggregate function uses the ``DROP AGGREGATE`` statement: - -.. productionlist:: - drop_aggregate_statement: DROP AGGREGATE [ IF EXISTS ] `function_name` [ '(' `arguments_signature` ')' ] - -For instance:: - - DROP AGGREGATE myAggregate; - DROP AGGREGATE myKeyspace.anAggregate; - DROP AGGREGATE someAggregate ( int ); - DROP AGGREGATE someAggregate ( text ); - -The ``DROP AGGREGATE`` statement removes an aggregate created using ``CREATE AGGREGATE``. You must specify the argument -types of the aggregate to drop if there are multiple aggregates with the same name but a different signature (overloaded -aggregates). - -``DROP AGGREGATE`` with the optional ``IF EXISTS`` keywords drops an aggregate if it exists, and does nothing if a -function with the signature does not exist. diff --git a/doc/source/cql/index.rst b/doc/source/cql/index.rst deleted file mode 100644 index 00d90e41e320..000000000000 --- a/doc/source/cql/index.rst +++ /dev/null @@ -1,47 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. _cql: - -The Cassandra Query Language (CQL) -================================== - -This document describes the Cassandra Query Language (CQL) [#]_. Note that this document describes the last version of -the languages. However, the `changes <#changes>`_ section provides the diff between the different versions of CQL. - -CQL offers a model close to SQL in the sense that data is put in *tables* containing *rows* of *columns*. For -that reason, when used in this document, these terms (tables, rows and columns) have the same definition than they have -in SQL. But please note that as such, they do **not** refer to the concept of rows and columns found in the deprecated -thrift API (and earlier version 1 and 2 of CQL). - -.. toctree:: - :maxdepth: 2 - - definitions - types - ddl - dml - indexes - mvs - security - functions - json - triggers - appendices - changes - -.. [#] Technically, this document CQL version 3, which is not backward compatible with CQL version 1 and 2 (which have - been deprecated and remove) and differs from it in numerous ways. diff --git a/doc/source/cql/indexes.rst b/doc/source/cql/indexes.rst deleted file mode 100644 index 81fe429d0463..000000000000 --- a/doc/source/cql/indexes.rst +++ /dev/null @@ -1,83 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: cql - -.. _secondary-indexes: - -Secondary Indexes ------------------ - -CQL supports creating secondary indexes on tables, allowing queries on the table to use those indexes. A secondary index -is identified by a name defined by: - -.. productionlist:: - index_name: re('[a-zA-Z_0-9]+') - - - -.. _create-index-statement: - -CREATE INDEX -^^^^^^^^^^^^ - -Creating a secondary index on a table uses the ``CREATE INDEX`` statement: - -.. productionlist:: - create_index_statement: CREATE [ CUSTOM ] INDEX [ IF NOT EXISTS ] [ `index_name` ] - : ON `table_name` '(' `index_identifier` ')' - : [ USING `string` [ WITH OPTIONS = `map_literal` ] ] - index_identifier: `column_name` - :| ( KEYS | VALUES | ENTRIES | FULL ) '(' `column_name` ')' - -For instance:: - - CREATE INDEX userIndex ON NerdMovies (user); - CREATE INDEX ON Mutants (abilityId); - CREATE INDEX ON users (keys(favs)); - CREATE CUSTOM INDEX ON users (email) USING 'path.to.the.IndexClass'; - CREATE CUSTOM INDEX ON users (email) USING 'path.to.the.IndexClass' WITH OPTIONS = {'storage': '/mnt/ssd/indexes/'}; - -The ``CREATE INDEX`` statement is used to create a new (automatic) secondary index for a given (existing) column in a -given table. A name for the index itself can be specified before the ``ON`` keyword, if desired. If data already exists -for the column, it will be indexed asynchronously. After the index is created, new data for the column is indexed -automatically at insertion time. - -Attempting to create an already existing index will return an error unless the ``IF NOT EXISTS`` option is used. If it -is used, the statement will be a no-op if the index already exists. - -Indexes on Map Keys -~~~~~~~~~~~~~~~~~~~ - -When creating an index on a :ref:`maps `, you may index either the keys or the values. If the column identifier is -placed within the ``keys()`` function, the index will be on the map keys, allowing you to use ``CONTAINS KEY`` in -``WHERE`` clauses. Otherwise, the index will be on the map values. - -.. _drop-index-statement: - -DROP INDEX -^^^^^^^^^^ - -Dropping a secondary index uses the ``DROP INDEX`` statement: - -.. productionlist:: - drop_index_statement: DROP INDEX [ IF EXISTS ] `index_name` - -The ``DROP INDEX`` statement is used to drop an existing secondary index. The argument of the statement is the index -name, which may optionally specify the keyspace of the index. - -If the index does not exists, the statement will return an error, unless ``IF EXISTS`` is used in which case the -operation is a no-op. diff --git a/doc/source/cql/json.rst b/doc/source/cql/json.rst deleted file mode 100644 index 539180aedda8..000000000000 --- a/doc/source/cql/json.rst +++ /dev/null @@ -1,115 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: cql - -.. _cql-json: - -JSON Support ------------- - -Cassandra 2.2 introduces JSON support to :ref:`SELECT ` and :ref:`INSERT ` -statements. This support does not fundamentally alter the CQL API (for example, the schema is still enforced), it simply -provides a convenient way to work with JSON documents. - -SELECT JSON -^^^^^^^^^^^ - -With ``SELECT`` statements, the ``JSON`` keyword can be used to return each row as a single ``JSON`` encoded map. The -remainder of the ``SELECT`` statement behavior is the same. - -The result map keys are the same as the column names in a normal result set. For example, a statement like ``SELECT JSON -a, ttl(b) FROM ...`` would result in a map with keys ``"a"`` and ``"ttl(b)"``. However, this is one notable exception: -for symmetry with ``INSERT JSON`` behavior, case-sensitive column names with upper-case letters will be surrounded with -double quotes. For example, ``SELECT JSON myColumn FROM ...`` would result in a map key ``"\"myColumn\""`` (note the -escaped quotes). - -The map values will ``JSON``-encoded representations (as described below) of the result set values. - -INSERT JSON -^^^^^^^^^^^ - -With ``INSERT`` statements, the new ``JSON`` keyword can be used to enable inserting a ``JSON`` encoded map as a single -row. The format of the ``JSON`` map should generally match that returned by a ``SELECT JSON`` statement on the same -table. In particular, case-sensitive column names should be surrounded with double quotes. For example, to insert into a -table with two columns named "myKey" and "value", you would do the following:: - - INSERT INTO mytable JSON '{ "\"myKey\"": 0, "value": 0}' - -By default (or if ``DEFAULT NULL`` is explicitly used), a column omitted from the ``JSON`` map will be set to ``NULL``, -meaning that any pre-existing value for that column will be removed (resulting in a tombstone being created). -Alternatively, if the ``DEFAULT UNSET`` directive is used after the value, omitted column values will be left unset, -meaning that pre-existing values for those column will be preserved. - - -JSON Encoding of Cassandra Data Types -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Where possible, Cassandra will represent and accept data types in their native ``JSON`` representation. Cassandra will -also accept string representations matching the CQL literal format for all single-field types. For example, floats, -ints, UUIDs, and dates can be represented by CQL literal strings. However, compound types, such as collections, tuples, -and user-defined types must be represented by native ``JSON`` collections (maps and lists) or a JSON-encoded string -representation of the collection. - -The following table describes the encodings that Cassandra will accept in ``INSERT JSON`` values (and ``fromJson()`` -arguments) as well as the format Cassandra will use when returning data for ``SELECT JSON`` statements (and -``fromJson()``): - -=============== ======================== =============== ============================================================== - Type Formats accepted Return format Notes -=============== ======================== =============== ============================================================== - ``ascii`` string string Uses JSON's ``\u`` character escape - ``bigint`` integer, string integer String must be valid 64 bit integer - ``blob`` string string String should be 0x followed by an even number of hex digits - ``boolean`` boolean, string boolean String must be "true" or "false" - ``date`` string string Date in format ``YYYY-MM-DD``, timezone UTC - ``decimal`` integer, float, string float May exceed 32 or 64-bit IEEE-754 floating point precision in - client-side decoder - ``double`` integer, float, string float String must be valid integer or float - ``float`` integer, float, string float String must be valid integer or float - ``inet`` string string IPv4 or IPv6 address - ``int`` integer, string integer String must be valid 32 bit integer - ``list`` list, string list Uses JSON's native list representation - ``map`` map, string map Uses JSON's native map representation - ``smallint`` integer, string integer String must be valid 16 bit integer - ``set`` list, string list Uses JSON's native list representation - ``text`` string string Uses JSON's ``\u`` character escape - ``time`` string string Time of day in format ``HH-MM-SS[.fffffffff]`` - ``timestamp`` integer, string string A timestamp. Strings constant allows to input :ref:`timestamps - as dates `. Datestamps with format ``YYYY-MM-DD - HH:MM:SS.SSS`` are returned. - ``timeuuid`` string string Type 1 UUID. See :token:`constant` for the UUID format - ``tinyint`` integer, string integer String must be valid 8 bit integer - ``tuple`` list, string list Uses JSON's native list representation - ``UDT`` map, string map Uses JSON's native map representation with field names as keys - ``uuid`` string string See :token:`constant` for the UUID format - ``varchar`` string string Uses JSON's ``\u`` character escape - ``varint`` integer, string integer Variable length; may overflow 32 or 64 bit integers in - client-side decoder -=============== ======================== =============== ============================================================== - -The fromJson() Function -^^^^^^^^^^^^^^^^^^^^^^^ - -The ``fromJson()`` function may be used similarly to ``INSERT JSON``, but for a single column value. It may only be used -in the ``VALUES`` clause of an ``INSERT`` statement or as one of the column values in an ``UPDATE``, ``DELETE``, or -``SELECT`` statement. For example, it cannot be used in the selection clause of a ``SELECT`` statement. - -The toJson() Function -^^^^^^^^^^^^^^^^^^^^^ - -The ``toJson()`` function may be used similarly to ``SELECT JSON``, but for a single column value. It may only be used -in the selection clause of a ``SELECT`` statement. diff --git a/doc/source/cql/mvs.rst b/doc/source/cql/mvs.rst deleted file mode 100644 index aabea10d8fb5..000000000000 --- a/doc/source/cql/mvs.rst +++ /dev/null @@ -1,166 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: cql - -.. _materialized-views: - -Materialized Views ------------------- - -Materialized views names are defined by: - -.. productionlist:: - view_name: re('[a-zA-Z_0-9]+') - - -.. _create-materialized-view-statement: - -CREATE MATERIALIZED VIEW -^^^^^^^^^^^^^^^^^^^^^^^^ - -You can create a materialized view on a table using a ``CREATE MATERIALIZED VIEW`` statement: - -.. productionlist:: - create_materialized_view_statement: CREATE MATERIALIZED VIEW [ IF NOT EXISTS ] `view_name` AS - : `select_statement` - : PRIMARY KEY '(' `primary_key` ')' - : WITH `table_options` - -For instance:: - - CREATE MATERIALIZED VIEW monkeySpecies_by_population AS - SELECT * FROM monkeySpecies - WHERE population IS NOT NULL AND species IS NOT NULL - PRIMARY KEY (population, species) - WITH comment='Allow query by population instead of species'; - -The ``CREATE MATERIALIZED VIEW`` statement creates a new materialized view. Each such view is a set of *rows* which -corresponds to rows which are present in the underlying, or base, table specified in the ``SELECT`` statement. A -materialized view cannot be directly updated, but updates to the base table will cause corresponding updates in the -view. - -Creating a materialized view has 3 main parts: - -- The :ref:`select statement ` that restrict the data included in the view. -- The :ref:`primary key ` definition for the view. -- The :ref:`options ` for the view. - -Attempting to create an already existing materialized view will return an error unless the ``IF NOT EXISTS`` option is -used. If it is used, the statement will be a no-op if the materialized view already exists. - -.. _mv-select: - -MV select statement -``````````````````` - -The select statement of a materialized view creation defines which of the base table is included in the view. That -statement is limited in a number of ways: - -- the :ref:`selection ` is limited to those that only select columns of the base table. In other - words, you can't use any function (aggregate or not), casting, term, etc. Aliases are also not supported. You can - however use `*` as a shortcut of selecting all columns. Further, :ref:`static columns ` cannot be - included in a materialized view (which means ``SELECT *`` isn't allowed if the base table has static columns). -- the ``WHERE`` clause have the following restrictions: - - - it cannot include any :token:`bind_marker`. - - the columns that are not part of the *base table* primary key can only be restricted by an ``IS NOT NULL`` - restriction. No other restriction is allowed. - - as the columns that are part of the *view* primary key cannot be null, they must always be at least restricted by a - ``IS NOT NULL`` restriction (or any other restriction, but they must have one). - -- it cannot have neither an :ref:`ordering clause `, nor a :ref:`limit `, nor :ref:`ALLOW - FILTERING `. - -.. _mv-primary-key: - -MV primary key -`````````````` - -A view must have a primary key and that primary key must conform to the following restrictions: - -- it must contain all the primary key columns of the base table. This ensures that every row of the view correspond to - exactly one row of the base table. -- it can only contain a single column that is not a primary key column in the base table. - -So for instance, give the following base table definition:: - - CREATE TABLE t ( - k int, - c1 int, - c2 int, - v1 int, - v2 int, - PRIMARY KEY (k, c1, c2) - ) - -then the following view definitions are allowed:: - - CREATE MATERIALIZED VIEW mv1 AS - SELECT * FROM t WHERE k IS NOT NULL AND c1 IS NOT NULL AND c2 IS NOT NULL - PRIMARY KEY (c1, k, c2) - - CREATE MATERIALIZED VIEW mv1 AS - SELECT * FROM t WHERE k IS NOT NULL AND c1 IS NOT NULL AND c2 IS NOT NULL - PRIMARY KEY (v1, k, c1, c2) - -but the following ones are **not** allowed:: - - // Error: cannot include both v1 and v2 in the primary key as both are not in the base table primary key - CREATE MATERIALIZED VIEW mv1 AS - SELECT * FROM t WHERE k IS NOT NULL AND c1 IS NOT NULL AND c2 IS NOT NULL AND v1 IS NOT NULL - PRIMARY KEY (v1, v2, k, c1, c2) - - // Error: must include k in the primary as it's a base table primary key column - CREATE MATERIALIZED VIEW mv1 AS - SELECT * FROM t WHERE c1 IS NOT NULL AND c2 IS NOT NULL - PRIMARY KEY (c1, c2) - - -.. _mv-options: - -MV options -`````````` - -A materialized view is internally implemented by a table and as such, creating a MV allows the :ref:`same options than -creating a table `. - - -.. _alter-materialized-view-statement: - -ALTER MATERIALIZED VIEW -^^^^^^^^^^^^^^^^^^^^^^^ - -After creation, you can alter the options of a materialized view using the ``ALTER MATERIALIZED VIEW`` statement: - -.. productionlist:: - alter_materialized_view_statement: ALTER MATERIALIZED VIEW `view_name` WITH `table_options` - -The options that can be updated are the same than at creation time and thus the :ref:`same than for tables -`. - -.. _drop-materialized-view-statement: - -DROP MATERIALIZED VIEW -^^^^^^^^^^^^^^^^^^^^^^ - -Dropping a materialized view users the ``DROP MATERIALIZED VIEW`` statement: - -.. productionlist:: - drop_materialized_view_statement: DROP MATERIALIZED VIEW [ IF EXISTS ] `view_name`; - -If the materialized view does not exists, the statement will return an error, unless ``IF EXISTS`` is used in which case -the operation is a no-op. diff --git a/doc/source/cql/security.rst b/doc/source/cql/security.rst deleted file mode 100644 index 099fcc48e013..000000000000 --- a/doc/source/cql/security.rst +++ /dev/null @@ -1,502 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: cql - -.. _cql-security: - -Security --------- - -.. _cql-roles: - -Database Roles -^^^^^^^^^^^^^^ - -CQL uses database roles to represent users and group of users. Syntactically, a role is defined by: - -.. productionlist:: - role_name: `identifier` | `string` - -.. _create-role-statement: - -CREATE ROLE -~~~~~~~~~~~ - -Creating a role uses the ``CREATE ROLE`` statement: - -.. productionlist:: - create_role_statement: CREATE ROLE [ IF NOT EXISTS ] `role_name` - : [ WITH `role_options` ] - role_options: `role_option` ( AND `role_option` )* - role_option: PASSWORD '=' `string` - :| LOGIN '=' `boolean` - :| SUPERUSER '=' `boolean` - :| OPTIONS '=' `map_literal` - -For instance:: - - CREATE ROLE new_role; - CREATE ROLE alice WITH PASSWORD = 'password_a' AND LOGIN = true; - CREATE ROLE bob WITH PASSWORD = 'password_b' AND LOGIN = true AND SUPERUSER = true; - CREATE ROLE carlos WITH OPTIONS = { 'custom_option1' : 'option1_value', 'custom_option2' : 99 }; - -By default roles do not possess ``LOGIN`` privileges or ``SUPERUSER`` status. - -:ref:`Permissions ` on database resources are granted to roles; types of resources include keyspaces, -tables, functions and roles themselves. Roles may be granted to other roles to create hierarchical permissions -structures; in these hierarchies, permissions and ``SUPERUSER`` status are inherited, but the ``LOGIN`` privilege is -not. - -If a role has the ``LOGIN`` privilege, clients may identify as that role when connecting. For the duration of that -connection, the client will acquire any roles and privileges granted to that role. - -Only a client with with the ``CREATE`` permission on the database roles resource may issue ``CREATE ROLE`` requests (see -the :ref:`relevant section ` below), unless the client is a ``SUPERUSER``. Role management in Cassandra -is pluggable and custom implementations may support only a subset of the listed options. - -Role names should be quoted if they contain non-alphanumeric characters. - -.. _setting-credentials-for-internal-authentication: - -Setting credentials for internal authentication -``````````````````````````````````````````````` - -Use the ``WITH PASSWORD`` clause to set a password for internal authentication, enclosing the password in single -quotation marks. - -If internal authentication has not been set up or the role does not have ``LOGIN`` privileges, the ``WITH PASSWORD`` -clause is not necessary. - -Creating a role conditionally -````````````````````````````` - -Attempting to create an existing role results in an invalid query condition unless the ``IF NOT EXISTS`` option is used. -If the option is used and the role exists, the statement is a no-op:: - - CREATE ROLE other_role; - CREATE ROLE IF NOT EXISTS other_role; - - -.. _alter-role-statement: - -ALTER ROLE -~~~~~~~~~~ - -Altering a role options uses the ``ALTER ROLE`` statement: - -.. productionlist:: - alter_role_statement: ALTER ROLE `role_name` WITH `role_options` - -For instance:: - - ALTER ROLE bob WITH PASSWORD = 'PASSWORD_B' AND SUPERUSER = false; - -Conditions on executing ``ALTER ROLE`` statements: - -- A client must have ``SUPERUSER`` status to alter the ``SUPERUSER`` status of another role -- A client cannot alter the ``SUPERUSER`` status of any role it currently holds -- A client can only modify certain properties of the role with which it identified at login (e.g. ``PASSWORD``) -- To modify properties of a role, the client must be granted ``ALTER`` :ref:`permission ` on that role - -.. _drop-role-statement: - -DROP ROLE -~~~~~~~~~ - -Dropping a role uses the ``DROP ROLE`` statement: - -.. productionlist:: - drop_role_statement: DROP ROLE [ IF EXISTS ] `role_name` - -``DROP ROLE`` requires the client to have ``DROP`` :ref:`permission ` on the role in question. In -addition, client may not ``DROP`` the role with which it identified at login. Finally, only a client with ``SUPERUSER`` -status may ``DROP`` another ``SUPERUSER`` role. - -Attempting to drop a role which does not exist results in an invalid query condition unless the ``IF EXISTS`` option is -used. If the option is used and the role does not exist the statement is a no-op. - -.. _grant-role-statement: - -GRANT ROLE -~~~~~~~~~~ - -Granting a role to another uses the ``GRANT ROLE`` statement: - -.. productionlist:: - grant_role_statement: GRANT `role_name` TO `role_name` - -For instance:: - - GRANT report_writer TO alice; - -This statement grants the ``report_writer`` role to ``alice``. Any permissions granted to ``report_writer`` are also -acquired by ``alice``. - -Roles are modelled as a directed acyclic graph, so circular grants are not permitted. The following examples result in -error conditions:: - - GRANT role_a TO role_b; - GRANT role_b TO role_a; - - GRANT role_a TO role_b; - GRANT role_b TO role_c; - GRANT role_c TO role_a; - -.. _revoke-role-statement: - -REVOKE ROLE -~~~~~~~~~~~ - -Revoking a role uses the ``REVOKE ROLE`` statement: - -.. productionlist:: - revoke_role_statement: REVOKE `role_name` FROM `role_name` - -For instance:: - - REVOKE report_writer FROM alice; - -This statement revokes the ``report_writer`` role from ``alice``. Any permissions that ``alice`` has acquired via the -``report_writer`` role are also revoked. - -.. _list-roles-statement: - -LIST ROLES -~~~~~~~~~~ - -All the known roles (in the system or granted to specific role) can be listed using the ``LIST ROLES`` statement: - -.. productionlist:: - list_roles_statement: LIST ROLES [ OF `role_name` ] [ NORECURSIVE ] - -For instance:: - - LIST ROLES; - -returns all known roles in the system, this requires ``DESCRIBE`` permission on the database roles resource. And:: - - LIST ROLES OF alice; - -enumerates all roles granted to ``alice``, including those transitively acquired. But:: - - LIST ROLES OF bob NORECURSIVE - -lists all roles directly granted to ``bob`` without including any of the transitively acquired ones. - -Users -^^^^^ - -Prior to the introduction of roles in Cassandra 2.2, authentication and authorization were based around the concept of a -``USER``. For backward compatibility, the legacy syntax has been preserved with ``USER`` centric statements becoming -synonyms for the ``ROLE`` based equivalents. In other words, creating/updating a user is just a different syntax for -creating/updating a role. - -.. _create-user-statement: - -CREATE USER -~~~~~~~~~~~ - -Creating a user uses the ``CREATE USER`` statement: - -.. productionlist:: - create_user_statement: CREATE USER [ IF NOT EXISTS ] `role_name` [ WITH PASSWORD `string` ] [ `user_option` ] - user_option: SUPERUSER | NOSUPERUSER - -For instance:: - - CREATE USER alice WITH PASSWORD 'password_a' SUPERUSER; - CREATE USER bob WITH PASSWORD 'password_b' NOSUPERUSER; - -``CREATE USER`` is equivalent to ``CREATE ROLE`` where the ``LOGIN`` option is ``true``. So, the following pairs of -statements are equivalent:: - - CREATE USER alice WITH PASSWORD 'password_a' SUPERUSER; - CREATE ROLE alice WITH PASSWORD = 'password_a' AND LOGIN = true AND SUPERUSER = true; - - CREATE USER IF NOT EXISTS alice WITH PASSWORD 'password_a' SUPERUSER; - CREATE ROLE IF NOT EXISTS alice WITH PASSWORD = 'password_a' AND LOGIN = true AND SUPERUSER = true; - - CREATE USER alice WITH PASSWORD 'password_a' NOSUPERUSER; - CREATE ROLE alice WITH PASSWORD = 'password_a' AND LOGIN = true AND SUPERUSER = false; - - CREATE USER alice WITH PASSWORD 'password_a' NOSUPERUSER; - CREATE ROLE alice WITH PASSWORD = 'password_a' AND LOGIN = true; - - CREATE USER alice WITH PASSWORD 'password_a'; - CREATE ROLE alice WITH PASSWORD = 'password_a' AND LOGIN = true; - -.. _alter-user-statement: - -ALTER USER -~~~~~~~~~~ - -Altering the options of a user uses the ``ALTER USER`` statement: - -.. productionlist:: - alter_user_statement: ALTER USER `role_name` [ WITH PASSWORD `string` ] [ `user_option` ] - -For instance:: - - ALTER USER alice WITH PASSWORD 'PASSWORD_A'; - ALTER USER bob SUPERUSER; - -.. _drop-user-statement: - -DROP USER -~~~~~~~~~ - -Dropping a user uses the ``DROP USER`` statement: - -.. productionlist:: - drop_user_statement: DROP USER [ IF EXISTS ] `role_name` - -.. _list-users-statement: - -LIST USERS -~~~~~~~~~~ - -Existing users can be listed using the ``LIST USERS`` statement: - -.. productionlist:: - list_users_statement: LIST USERS - -Note that this statement is equivalent to:: - - LIST ROLES; - -but only roles with the ``LOGIN`` privilege are included in the output. - -Data Control -^^^^^^^^^^^^ - -.. _cql-permissions: - -Permissions -~~~~~~~~~~~ - -Permissions on resources are granted to roles; there are several different types of resources in Cassandra and each type -is modelled hierarchically: - -- The hierarchy of Data resources, Keyspaces and Tables has the structure ``ALL KEYSPACES`` -> ``KEYSPACE`` -> - ``TABLE``. -- Function resources have the structure ``ALL FUNCTIONS`` -> ``KEYSPACE`` -> ``FUNCTION`` -- Resources representing roles have the structure ``ALL ROLES`` -> ``ROLE`` -- Resources representing JMX ObjectNames, which map to sets of MBeans/MXBeans, have the structure ``ALL MBEANS`` -> - ``MBEAN`` - -Permissions can be granted at any level of these hierarchies and they flow downwards. So granting a permission on a -resource higher up the chain automatically grants that same permission on all resources lower down. For example, -granting ``SELECT`` on a ``KEYSPACE`` automatically grants it on all ``TABLES`` in that ``KEYSPACE``. Likewise, granting -a permission on ``ALL FUNCTIONS`` grants it on every defined function, regardless of which keyspace it is scoped in. It -is also possible to grant permissions on all functions scoped to a particular keyspace. - -Modifications to permissions are visible to existing client sessions; that is, connections need not be re-established -following permissions changes. - -The full set of available permissions is: - -- ``CREATE`` -- ``ALTER`` -- ``DROP`` -- ``SELECT`` -- ``MODIFY`` -- ``AUTHORIZE`` -- ``DESCRIBE`` -- ``EXECUTE`` - -Not all permissions are applicable to every type of resource. For instance, ``EXECUTE`` is only relevant in the context -of functions or mbeans; granting ``EXECUTE`` on a resource representing a table is nonsensical. Attempting to ``GRANT`` -a permission on resource to which it cannot be applied results in an error response. The following illustrates which -permissions can be granted on which types of resource, and which statements are enabled by that permission. - -=============== =============================== ======================================================================= - Permission Resource Operations -=============== =============================== ======================================================================= - ``CREATE`` ``ALL KEYSPACES`` ``CREATE KEYSPACE`` and ``CREATE TABLE`` in any keyspace - ``CREATE`` ``KEYSPACE`` ``CREATE TABLE`` in specified keyspace - ``CREATE`` ``ALL FUNCTIONS`` ``CREATE FUNCTION`` in any keyspace and ``CREATE AGGREGATE`` in any - keyspace - ``CREATE`` ``ALL FUNCTIONS IN KEYSPACE`` ``CREATE FUNCTION`` and ``CREATE AGGREGATE`` in specified keyspace - ``CREATE`` ``ALL ROLES`` ``CREATE ROLE`` - ``ALTER`` ``ALL KEYSPACES`` ``ALTER KEYSPACE`` and ``ALTER TABLE`` in any keyspace - ``ALTER`` ``KEYSPACE`` ``ALTER KEYSPACE`` and ``ALTER TABLE`` in specified keyspace - ``ALTER`` ``TABLE`` ``ALTER TABLE`` - ``ALTER`` ``ALL FUNCTIONS`` ``CREATE FUNCTION`` and ``CREATE AGGREGATE``: replacing any existing - ``ALTER`` ``ALL FUNCTIONS IN KEYSPACE`` ``CREATE FUNCTION`` and ``CREATE AGGREGATE``: replacing existing in - specified keyspace - ``ALTER`` ``FUNCTION`` ``CREATE FUNCTION`` and ``CREATE AGGREGATE``: replacing existing - ``ALTER`` ``ALL ROLES`` ``ALTER ROLE`` on any role - ``ALTER`` ``ROLE`` ``ALTER ROLE`` - ``DROP`` ``ALL KEYSPACES`` ``DROP KEYSPACE`` and ``DROP TABLE`` in any keyspace - ``DROP`` ``KEYSPACE`` ``DROP TABLE`` in specified keyspace - ``DROP`` ``TABLE`` ``DROP TABLE`` - ``DROP`` ``ALL FUNCTIONS`` ``DROP FUNCTION`` and ``DROP AGGREGATE`` in any keyspace - ``DROP`` ``ALL FUNCTIONS IN KEYSPACE`` ``DROP FUNCTION`` and ``DROP AGGREGATE`` in specified keyspace - ``DROP`` ``FUNCTION`` ``DROP FUNCTION`` - ``DROP`` ``ALL ROLES`` ``DROP ROLE`` on any role - ``DROP`` ``ROLE`` ``DROP ROLE`` - ``SELECT`` ``ALL KEYSPACES`` ``SELECT`` on any table - ``SELECT`` ``KEYSPACE`` ``SELECT`` on any table in specified keyspace - ``SELECT`` ``TABLE`` ``SELECT`` on specified table - ``SELECT`` ``ALL MBEANS`` Call getter methods on any mbean - ``SELECT`` ``MBEANS`` Call getter methods on any mbean matching a wildcard pattern - ``SELECT`` ``MBEAN`` Call getter methods on named mbean - ``MODIFY`` ``ALL KEYSPACES`` ``INSERT``, ``UPDATE``, ``DELETE`` and ``TRUNCATE`` on any table - ``MODIFY`` ``KEYSPACE`` ``INSERT``, ``UPDATE``, ``DELETE`` and ``TRUNCATE`` on any table in - specified keyspace - ``MODIFY`` ``TABLE`` ``INSERT``, ``UPDATE``, ``DELETE`` and ``TRUNCATE`` on specified table - ``MODIFY`` ``ALL MBEANS`` Call setter methods on any mbean - ``MODIFY`` ``MBEANS`` Call setter methods on any mbean matching a wildcard pattern - ``MODIFY`` ``MBEAN`` Call setter methods on named mbean - ``AUTHORIZE`` ``ALL KEYSPACES`` ``GRANT PERMISSION`` and ``REVOKE PERMISSION`` on any table - ``AUTHORIZE`` ``KEYSPACE`` ``GRANT PERMISSION`` and ``REVOKE PERMISSION`` on any table in - specified keyspace - ``AUTHORIZE`` ``TABLE`` ``GRANT PERMISSION`` and ``REVOKE PERMISSION`` on specified table - ``AUTHORIZE`` ``ALL FUNCTIONS`` ``GRANT PERMISSION`` and ``REVOKE PERMISSION`` on any function - ``AUTHORIZE`` ``ALL FUNCTIONS IN KEYSPACE`` ``GRANT PERMISSION`` and ``REVOKE PERMISSION`` in specified keyspace - ``AUTHORIZE`` ``FUNCTION`` ``GRANT PERMISSION`` and ``REVOKE PERMISSION`` on specified function - ``AUTHORIZE`` ``ALL MBEANS`` ``GRANT PERMISSION`` and ``REVOKE PERMISSION`` on any mbean - ``AUTHORIZE`` ``MBEANS`` ``GRANT PERMISSION`` and ``REVOKE PERMISSION`` on any mbean matching - a wildcard pattern - ``AUTHORIZE`` ``MBEAN`` ``GRANT PERMISSION`` and ``REVOKE PERMISSION`` on named mbean - ``AUTHORIZE`` ``ALL ROLES`` ``GRANT ROLE`` and ``REVOKE ROLE`` on any role - ``AUTHORIZE`` ``ROLES`` ``GRANT ROLE`` and ``REVOKE ROLE`` on specified roles - ``DESCRIBE`` ``ALL ROLES`` ``LIST ROLES`` on all roles or only roles granted to another, - specified role - ``DESCRIBE`` ``ALL MBEANS`` Retrieve metadata about any mbean from the platform's MBeanServer - ``DESCRIBE`` ``MBEANS`` Retrieve metadata about any mbean matching a wildcard patter from the - platform's MBeanServer - ``DESCRIBE`` ``MBEAN`` Retrieve metadata about a named mbean from the platform's MBeanServer - ``EXECUTE`` ``ALL FUNCTIONS`` ``SELECT``, ``INSERT`` and ``UPDATE`` using any function, and use of - any function in ``CREATE AGGREGATE`` - ``EXECUTE`` ``ALL FUNCTIONS IN KEYSPACE`` ``SELECT``, ``INSERT`` and ``UPDATE`` using any function in specified - keyspace and use of any function in keyspace in ``CREATE AGGREGATE`` - ``EXECUTE`` ``FUNCTION`` ``SELECT``, ``INSERT`` and ``UPDATE`` using specified function and use - of the function in ``CREATE AGGREGATE`` - ``EXECUTE`` ``ALL MBEANS`` Execute operations on any mbean - ``EXECUTE`` ``MBEANS`` Execute operations on any mbean matching a wildcard pattern - ``EXECUTE`` ``MBEAN`` Execute operations on named mbean -=============== =============================== ======================================================================= - -.. _grant-permission-statement: - -GRANT PERMISSION -~~~~~~~~~~~~~~~~ - -Granting a permission uses the ``GRANT PERMISSION`` statement: - -.. productionlist:: - grant_permission_statement: GRANT `permissions` ON `resource` TO `role_name` - permissions: ALL [ PERMISSIONS ] | `permission` [ PERMISSION ] - permission: CREATE | ALTER | DROP | SELECT | MODIFY | AUTHORIZE | DESCRIBE | EXECUTE - resource: ALL KEYSPACES - :| KEYSPACE `keyspace_name` - :| [ TABLE ] `table_name` - :| ALL ROLES - :| ROLE `role_name` - :| ALL FUNCTIONS [ IN KEYSPACE `keyspace_name` ] - :| FUNCTION `function_name` '(' [ `cql_type` ( ',' `cql_type` )* ] ')' - :| ALL MBEANS - :| ( MBEAN | MBEANS ) `string` - -For instance:: - - GRANT SELECT ON ALL KEYSPACES TO data_reader; - -This gives any user with the role ``data_reader`` permission to execute ``SELECT`` statements on any table across all -keyspaces:: - - GRANT MODIFY ON KEYSPACE keyspace1 TO data_writer; - -This give any user with the role ``data_writer`` permission to perform ``UPDATE``, ``INSERT``, ``UPDATE``, ``DELETE`` -and ``TRUNCATE`` queries on all tables in the ``keyspace1`` keyspace:: - - GRANT DROP ON keyspace1.table1 TO schema_owner; - -This gives any user with the ``schema_owner`` role permissions to ``DROP`` ``keyspace1.table1``:: - - GRANT EXECUTE ON FUNCTION keyspace1.user_function( int ) TO report_writer; - -This grants any user with the ``report_writer`` role permission to execute ``SELECT``, ``INSERT`` and ``UPDATE`` queries -which use the function ``keyspace1.user_function( int )``:: - - GRANT DESCRIBE ON ALL ROLES TO role_admin; - -This grants any user with the ``role_admin`` role permission to view any and all roles in the system with a ``LIST -ROLES`` statement - -.. _grant-all: - -GRANT ALL -````````` - -When the ``GRANT ALL`` form is used, the appropriate set of permissions is determined automatically based on the target -resource. - -Automatic Granting -`````````````````` - -When a resource is created, via a ``CREATE KEYSPACE``, ``CREATE TABLE``, ``CREATE FUNCTION``, ``CREATE AGGREGATE`` or -``CREATE ROLE`` statement, the creator (the role the database user who issues the statement is identified as), is -automatically granted all applicable permissions on the new resource. - -.. _revoke-permission-statement: - -REVOKE PERMISSION -~~~~~~~~~~~~~~~~~ - -Revoking a permission from a role uses the ``REVOKE PERMISSION`` statement: - -.. productionlist:: - revoke_permission_statement: REVOKE `permissions` ON `resource` FROM `role_name` - -For instance:: - - REVOKE SELECT ON ALL KEYSPACES FROM data_reader; - REVOKE MODIFY ON KEYSPACE keyspace1 FROM data_writer; - REVOKE DROP ON keyspace1.table1 FROM schema_owner; - REVOKE EXECUTE ON FUNCTION keyspace1.user_function( int ) FROM report_writer; - REVOKE DESCRIBE ON ALL ROLES FROM role_admin; - -.. _list-permissions-statement: - -LIST PERMISSIONS -~~~~~~~~~~~~~~~~ - -Listing granted permissions uses the ``LIST PERMISSIONS`` statement: - -.. productionlist:: - list_permissions_statement: LIST `permissions` [ ON `resource` ] [ OF `role_name` [ NORECURSIVE ] ] - -For instance:: - - LIST ALL PERMISSIONS OF alice; - -Show all permissions granted to ``alice``, including those acquired transitively from any other roles:: - - LIST ALL PERMISSIONS ON keyspace1.table1 OF bob; - -Show all permissions on ``keyspace1.table1`` granted to ``bob``, including those acquired transitively from any other -roles. This also includes any permissions higher up the resource hierarchy which can be applied to ``keyspace1.table1``. -For example, should ``bob`` have ``ALTER`` permission on ``keyspace1``, that would be included in the results of this -query. Adding the ``NORECURSIVE`` switch restricts the results to only those permissions which were directly granted to -``bob`` or one of ``bob``'s roles:: - - LIST SELECT PERMISSIONS OF carlos; - -Show any permissions granted to ``carlos`` or any of ``carlos``'s roles, limited to ``SELECT`` permissions on any -resource. diff --git a/doc/source/cql/triggers.rst b/doc/source/cql/triggers.rst deleted file mode 100644 index db3f53e3869c..000000000000 --- a/doc/source/cql/triggers.rst +++ /dev/null @@ -1,63 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: cql - -.. _cql-triggers: - -Triggers --------- - -Triggers are identified by a name defined by: - -.. productionlist:: - trigger_name: `identifier` - - -.. _create-trigger-statement: - -CREATE TRIGGER -^^^^^^^^^^^^^^ - -Creating a new trigger uses the ``CREATE TRIGGER`` statement: - -.. productionlist:: - create_trigger_statement: CREATE TRIGGER [ IF NOT EXISTS ] `trigger_name` - : ON `table_name` - : USING `string` - -For instance:: - - CREATE TRIGGER myTrigger ON myTable USING 'org.apache.cassandra.triggers.InvertedIndex'; - -The actual logic that makes up the trigger can be written in any Java (JVM) language and exists outside the database. -You place the trigger code in a ``lib/triggers`` subdirectory of the Cassandra installation directory, it loads during -cluster startup, and exists on every node that participates in a cluster. The trigger defined on a table fires before a -requested DML statement occurs, which ensures the atomicity of the transaction. - -.. _drop-trigger-statement: - -DROP TRIGGER -^^^^^^^^^^^^ - -Dropping a trigger uses the ``DROP TRIGGER`` statement: - -.. productionlist:: - drop_trigger_statement: DROP TRIGGER [ IF EXISTS ] `trigger_name` ON `table_name` - -For instance:: - - DROP TRIGGER myTrigger ON myTable; diff --git a/doc/source/cql/types.rst b/doc/source/cql/types.rst deleted file mode 100644 index 509a7565e791..000000000000 --- a/doc/source/cql/types.rst +++ /dev/null @@ -1,559 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: cql - -.. _UUID: https://en.wikipedia.org/wiki/Universally_unique_identifier - -.. _data-types: - -Data Types ----------- - -CQL is a typed language and supports a rich set of data types, including :ref:`native types `, -:ref:`collection types `, :ref:`user-defined types `, :ref:`tuple types ` and :ref:`custom -types `: - -.. productionlist:: - cql_type: `native_type` | `collection_type` | `user_defined_type` | `tuple_type` | `custom_type` - - -.. _native-types: - -Native Types -^^^^^^^^^^^^ - -The native types supported by CQL are: - -.. productionlist:: - native_type: ASCII - : | BIGINT - : | BLOB - : | BOOLEAN - : | COUNTER - : | DATE - : | DECIMAL - : | DOUBLE - : | DURATION - : | FLOAT - : | INET - : | INT - : | SMALLINT - : | TEXT - : | TIME - : | TIMESTAMP - : | TIMEUUID - : | TINYINT - : | UUID - : | VARCHAR - : | VARINT - -The following table gives additional informations on the native data types, and on which kind of :ref:`constants -` each type supports: - -=============== ===================== ================================================================================== - type constants supported description -=============== ===================== ================================================================================== - ``ascii`` :token:`string` ASCII character string - ``bigint`` :token:`integer` 64-bit signed long - ``blob`` :token:`blob` Arbitrary bytes (no validation) - ``boolean`` :token:`boolean` Either ``true`` or ``false`` - ``counter`` :token:`integer` Counter column (64-bit signed value). See :ref:`counters` for details - ``date`` :token:`integer`, A date (with no corresponding time value). See :ref:`dates` below for details - :token:`string` - ``decimal`` :token:`integer`, Variable-precision decimal - :token:`float` - ``double`` :token:`integer` 64-bit IEEE-754 floating point - :token:`float` - ``duration`` :token:`duration`, A duration with nanosecond precision. See :ref:`durations` below for details - ``float`` :token:`integer`, 32-bit IEEE-754 floating point - :token:`float` - ``inet`` :token:`string` An IP address, either IPv4 (4 bytes long) or IPv6 (16 bytes long). Note that - there is no ``inet`` constant, IP address should be input as strings - ``int`` :token:`integer` 32-bit signed int - ``smallint`` :token:`integer` 16-bit signed int - ``text`` :token:`string` UTF8 encoded string - ``time`` :token:`integer`, A time (with no corresponding date value) with nanosecond precision. See - :token:`string` :ref:`times` below for details - ``timestamp`` :token:`integer`, A timestamp (date and time) with millisecond precision. See :ref:`timestamps` - :token:`string` below for details - ``timeuuid`` :token:`uuid` Version 1 UUID_, generally used as a “conflict-free” timestamp. Also see - :ref:`timeuuid-functions` - ``tinyint`` :token:`integer` 8-bit signed int - ``uuid`` :token:`uuid` A UUID_ (of any version) - ``varchar`` :token:`string` UTF8 encoded string - ``varint`` :token:`integer` Arbitrary-precision integer -=============== ===================== ================================================================================== - -.. _counters: - -Counters -~~~~~~~~ - -The ``counter`` type is used to define *counter columns*. A counter column is a column whose value is a 64-bit signed -integer and on which 2 operations are supported: incrementing and decrementing (see the :ref:`UPDATE statement -` for syntax). Note that the value of a counter cannot be set: a counter does not exist until first -incremented/decremented, and that first increment/decrement is made as if the prior value was 0. - -.. _counter-limitations: - -Counters have a number of important limitations: - -- They cannot be used for columns part of the ``PRIMARY KEY`` of a table. -- A table that contains a counter can only contain counters. In other words, either all the columns of a table outside - the ``PRIMARY KEY`` have the ``counter`` type, or none of them have it. -- Counters do not support :ref:`expiration `. -- The deletion of counters is supported, but is only guaranteed to work the first time you delete a counter. In other - words, you should not re-update a counter that you have deleted (if you do, proper behavior is not guaranteed). -- Counter updates are, by nature, not `idemptotent `__. An important - consequence is that if a counter update fails unexpectedly (timeout or loss of connection to the coordinator node), - the client has no way to know if the update has been applied or not. In particular, replaying the update may or may - not lead to an over count. - -.. _timestamps: - -Working with timestamps -^^^^^^^^^^^^^^^^^^^^^^^ - -Values of the ``timestamp`` type are encoded as 64-bit signed integers representing a number of milliseconds since the -standard base time known as `the epoch `__: January 1 1970 at 00:00:00 GMT. - -Timestamps can be input in CQL either using their value as an :token:`integer`, or using a :token:`string` that -represents an `ISO 8601 `__ date. For instance, all of the values below are -valid ``timestamp`` values for Mar 2, 2011, at 04:05:00 AM, GMT: - -- ``1299038700000`` -- ``'2011-02-03 04:05+0000'`` -- ``'2011-02-03 04:05:00+0000'`` -- ``'2011-02-03 04:05:00.000+0000'`` -- ``'2011-02-03T04:05+0000'`` -- ``'2011-02-03T04:05:00+0000'`` -- ``'2011-02-03T04:05:00.000+0000'`` - -The ``+0000`` above is an RFC 822 4-digit time zone specification; ``+0000`` refers to GMT. US Pacific Standard Time is -``-0800``. The time zone may be omitted if desired (``'2011-02-03 04:05:00'``), and if so, the date will be interpreted -as being in the time zone under which the coordinating Cassandra node is configured. There are however difficulties -inherent in relying on the time zone configuration being as expected, so it is recommended that the time zone always be -specified for timestamps when feasible. - -The time of day may also be omitted (``'2011-02-03'`` or ``'2011-02-03+0000'``), in which case the time of day will -default to 00:00:00 in the specified or default time zone. However, if only the date part is relevant, consider using -the :ref:`date ` type. - -.. _dates: - -Working with dates -^^^^^^^^^^^^^^^^^^ - -Values of the ``date`` type are encoded as 32-bit unsigned integers representing a number of days with “the epoch” at -the center of the range (2^31). Epoch is January 1st, 1970 - -As for :ref:`timestamp `, a date can be input either as an :token:`integer` or using a date -:token:`string`. In the later case, the format should be ``yyyy-mm-dd`` (so ``'2011-02-03'`` for instance). - -.. _times: - -Working with times -^^^^^^^^^^^^^^^^^^ - -Values of the ``time`` type are encoded as 64-bit signed integers representing the number of nanoseconds since midnight. - -As for :ref:`timestamp `, a time can be input either as an :token:`integer` or using a :token:`string` -representing the time. In the later case, the format should be ``hh:mm:ss[.fffffffff]`` (where the sub-second precision -is optional and if provided, can be less than the nanosecond). So for instance, the following are valid inputs for a -time: - -- ``'08:12:54'`` -- ``'08:12:54.123'`` -- ``'08:12:54.123456'`` -- ``'08:12:54.123456789'`` - -.. _durations: - -Working with durations -^^^^^^^^^^^^^^^^^^^^^^ - -Values of the ``duration`` type are encoded as 3 signed integer of variable lengths. The first integer represents the -number of months, the second the number of days and the third the number of nanoseconds. This is due to the fact that -the number of days in a month can change, and a day can have 23 or 25 hours depending on the daylight saving. -Internally, the number of months and days are decoded as 32 bits integers whereas the number of nanoseconds is decoded -as a 64 bits integer. - -A duration can be input as: - - #. ``(quantity unit)+`` like ``12h30m`` where the unit can be: - - * ``y``: years (12 months) - * ``mo``: months (1 month) - * ``w``: weeks (7 days) - * ``d``: days (1 day) - * ``h``: hours (3,600,000,000,000 nanoseconds) - * ``m``: minutes (60,000,000,000 nanoseconds) - * ``s``: seconds (1,000,000,000 nanoseconds) - * ``ms``: milliseconds (1,000,000 nanoseconds) - * ``us`` or ``µs`` : microseconds (1000 nanoseconds) - * ``ns``: nanoseconds (1 nanosecond) - #. ISO 8601 format: ``P[n]Y[n]M[n]DT[n]H[n]M[n]S or P[n]W`` - #. ISO 8601 alternative format: ``P[YYYY]-[MM]-[DD]T[hh]:[mm]:[ss]`` - -For example:: - - INSERT INTO RiderResults (rider, race, result) VALUES ('Christopher Froome', 'Tour de France', 89h4m48s); - INSERT INTO RiderResults (rider, race, result) VALUES ('BARDET Romain', 'Tour de France', PT89H8M53S); - INSERT INTO RiderResults (rider, race, result) VALUES ('QUINTANA Nairo', 'Tour de France', P0000-00-00T89:09:09); - -.. _duration-limitation: - -Duration columns cannot be used in a table's ``PRIMARY KEY``. This limitation is due to the fact that -durations cannot be ordered. It is effectively not possible to know if ``1mo`` is greater than ``29d`` without a date -context. - -A ``1d`` duration is not equals to a ``24h`` one as the duration type has been created to be able to support daylight -saving. - -.. _collections: - -Collections -^^^^^^^^^^^ - -CQL supports 3 kind of collections: :ref:`maps`, :ref:`sets` and :ref:`lists`. The types of those collections is defined -by: - -.. productionlist:: - collection_type: MAP '<' `cql_type` ',' `cql_type` '>' - : | SET '<' `cql_type` '>' - : | LIST '<' `cql_type` '>' - -and their values can be inputd using collection literals: - -.. productionlist:: - collection_literal: `map_literal` | `set_literal` | `list_literal` - map_literal: '{' [ `term` ':' `term` (',' `term` : `term`)* ] '}' - set_literal: '{' [ `term` (',' `term`)* ] '}' - list_literal: '[' [ `term` (',' `term`)* ] ']' - -Note however that neither :token:`bind_marker` nor ``NULL`` are supported inside collection literals. - -Noteworthy characteristics -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Collections are meant for storing/denormalizing relatively small amount of data. They work well for things like “the -phone numbers of a given user”, “labels applied to an email”, etc. But when items are expected to grow unbounded (“all -messages sent by a user”, “events registered by a sensor”...), then collections are not appropriate and a specific table -(with clustering columns) should be used. Concretely, (non-frozen) collections have the following noteworthy -characteristics and limitations: - -- Individual collections are not indexed internally. Which means that even to access a single element of a collection, - the while collection has to be read (and reading one is not paged internally). -- While insertion operations on sets and maps never incur a read-before-write internally, some operations on lists do. - Further, some lists operations are not idempotent by nature (see the section on :ref:`lists ` below for - details), making their retry in case of timeout problematic. It is thus advised to prefer sets over lists when - possible. - -Please note that while some of those limitations may or may not be removed/improved upon in the future, it is a -anti-pattern to use a (single) collection to store large amounts of data. - -.. _maps: - -Maps -~~~~ - -A ``map`` is a (sorted) set of key-value pairs, where keys are unique and the map is sorted by its keys. You can define -and insert a map with:: - - CREATE TABLE users ( - id text PRIMARY KEY, - name text, - favs map // A map of text keys, and text values - ); - - INSERT INTO users (id, name, favs) - VALUES ('jsmith', 'John Smith', { 'fruit' : 'Apple', 'band' : 'Beatles' }); - - // Replace the existing map entirely. - UPDATE users SET favs = { 'fruit' : 'Banana' } WHERE id = 'jsmith'; - -Further, maps support: - -- Updating or inserting one or more elements:: - - UPDATE users SET favs['author'] = 'Ed Poe' WHERE id = 'jsmith'; - UPDATE users SET favs = favs + { 'movie' : 'Cassablanca', 'band' : 'ZZ Top' } WHERE id = 'jsmith'; - -- Removing one or more element (if an element doesn't exist, removing it is a no-op but no error is thrown):: - - DELETE favs['author'] FROM users WHERE id = 'jsmith'; - UPDATE users SET favs = favs - { 'movie', 'band'} WHERE id = 'jsmith'; - - Note that for removing multiple elements in a ``map``, you remove from it a ``set`` of keys. - -Lastly, TTLs are allowed for both ``INSERT`` and ``UPDATE``, but in both case the TTL set only apply to the newly -inserted/updated elements. In other words:: - - UPDATE users USING TTL 10 SET favs['color'] = 'green' WHERE id = 'jsmith'; - -will only apply the TTL to the ``{ 'color' : 'green' }`` record, the rest of the map remaining unaffected. - - -.. _sets: - -Sets -~~~~ - -A ``set`` is a (sorted) collection of unique values. You can define and insert a map with:: - - CREATE TABLE images ( - name text PRIMARY KEY, - owner text, - tags set // A set of text values - ); - - INSERT INTO images (name, owner, tags) - VALUES ('cat.jpg', 'jsmith', { 'pet', 'cute' }); - - // Replace the existing set entirely - UPDATE images SET tags = { 'kitten', 'cat', 'lol' } WHERE name = 'cat.jpg'; - -Further, sets support: - -- Adding one or multiple elements (as this is a set, inserting an already existing element is a no-op):: - - UPDATE images SET tags = tags + { 'gray', 'cuddly' } WHERE name = 'cat.jpg'; - -- Removing one or multiple elements (if an element doesn't exist, removing it is a no-op but no error is thrown):: - - UPDATE images SET tags = tags - { 'cat' } WHERE name = 'cat.jpg'; - -Lastly, as for :ref:`maps `, TTLs if used only apply to the newly inserted values. - -.. _lists: - -Lists -~~~~~ - -.. note:: As mentioned above and further discussed at the end of this section, lists have limitations and specific - performance considerations that you should take into account before using them. In general, if you can use a - :ref:`set ` instead of list, always prefer a set. - -A ``list`` is a (sorted) collection of non-unique values where elements are ordered by there position in the list. You -can define and insert a list with:: - - CREATE TABLE plays ( - id text PRIMARY KEY, - game text, - players int, - scores list // A list of integers - ) - - INSERT INTO plays (id, game, players, scores) - VALUES ('123-afde', 'quake', 3, [17, 4, 2]); - - // Replace the existing list entirely - UPDATE plays SET scores = [ 3, 9, 4] WHERE id = '123-afde'; - -Further, lists support: - -- Appending and prepending values to a list:: - - UPDATE plays SET players = 5, scores = scores + [ 14, 21 ] WHERE id = '123-afde'; - UPDATE plays SET players = 6, scores = [ 3 ] + scores WHERE id = '123-afde'; - -- Setting the value at a particular position in the list. This imply that the list has a pre-existing element for that - position or an error will be thrown that the list is too small:: - - UPDATE plays SET scores[1] = 7 WHERE id = '123-afde'; - -- Removing an element by its position in the list. This imply that the list has a pre-existing element for that position - or an error will be thrown that the list is too small. Further, as the operation removes an element from the list, the - list size will be diminished by 1, shifting the position of all the elements following the one deleted:: - - DELETE scores[1] FROM plays WHERE id = '123-afde'; - -- Deleting *all* the occurrences of particular values in the list (if a particular element doesn't occur at all in the - list, it is simply ignored and no error is thrown):: - - UPDATE plays SET scores = scores - [ 12, 21 ] WHERE id = '123-afde'; - -.. warning:: The append and prepend operations are not idempotent by nature. So in particular, if one of these operation - timeout, then retrying the operation is not safe and it may (or may not) lead to appending/prepending the value - twice. - -.. warning:: Setting and removing an element by position and removing occurences of particular values incur an internal - *read-before-write*. They will thus run more slowly and take more ressources than usual updates (with the exclusion - of conditional write that have their own cost). - -Lastly, as for :ref:`maps `, TTLs when used only apply to the newly inserted values. - -.. _udts: - -User-Defined Types -^^^^^^^^^^^^^^^^^^ - -CQL support the definition of user-defined types (UDT for short). Such a type can be created, modified and removed using -the :token:`create_type_statement`, :token:`alter_type_statement` and :token:`drop_type_statement` described below. But -once created, a UDT is simply referred to by its name: - -.. productionlist:: - user_defined_type: `udt_name` - udt_name: [ `keyspace_name` '.' ] `identifier` - - -Creating a UDT -~~~~~~~~~~~~~~ - -Creating a new user-defined type is done using a ``CREATE TYPE`` statement defined by: - -.. productionlist:: - create_type_statement: CREATE TYPE [ IF NOT EXISTS ] `udt_name` - : '(' `field_definition` ( ',' `field_definition` )* ')' - field_definition: `identifier` `cql_type` - -A UDT has a name (used to declared columns of that type) and is a set of named and typed fields. Fields name can be any -type, including collections or other UDT. For instance:: - - CREATE TYPE phone ( - country_code int, - number text, - ) - - CREATE TYPE address ( - street text, - city text, - zip text, - phones map - ) - - CREATE TABLE user ( - name text PRIMARY KEY, - addresses map> - ) - -Note that: - -- Attempting to create an already existing type will result in an error unless the ``IF NOT EXISTS`` option is used. If - it is used, the statement will be a no-op if the type already exists. -- A type is intrinsically bound to the keyspace in which it is created, and can only be used in that keyspace. At - creation, if the type name is prefixed by a keyspace name, it is created in that keyspace. Otherwise, it is created in - the current keyspace. -- As of Cassandra |version|, UDT have to be frozen in most cases, hence the ``frozen
`` in the table definition - above. Please see the section on :ref:`frozen ` for more details. - -UDT literals -~~~~~~~~~~~~ - -Once a used-defined type has been created, value can be input using a UDT literal: - -.. productionlist:: - udt_literal: '{' `identifier` ':' `term` ( ',' `identifier` ':' `term` )* '}' - -In other words, a UDT literal is like a :ref:`map ` literal but its keys are the names of the fields of the type. -For instance, one could insert into the table define in the previous section using:: - - INSERT INTO user (name, addresses) - VALUES ('z3 Pr3z1den7', { - 'home' : { - street: '1600 Pennsylvania Ave NW', - city: 'Washington', - zip: '20500', - phones: { 'cell' : { country_code: 1, number: '202 456-1111' }, - 'landline' : { country_code: 1, number: '...' } } - }, - 'work' : { - street: '1600 Pennsylvania Ave NW', - city: 'Washington', - zip: '20500', - phones: { 'fax' : { country_code: 1, number: '...' } } - } - }) - -To be valid, a UDT literal should only include fields defined by the type it is a literal of, but it can omit some field -(in which case those will be ``null``). - -Altering a UDT -~~~~~~~~~~~~~~ - -An existing user-defined type can be modified using an ``ALTER TYPE`` statement: - -.. productionlist:: - alter_type_statement: ALTER TYPE `udt_name` `alter_type_modification` - alter_type_modification: ADD `field_definition` - : | RENAME `identifier` TO `identifier` ( `identifier` TO `identifier` )* - -You can: - -- add a new field to the type (``ALTER TYPE address ADD country text``). That new field will be ``null`` for any values - of the type created before the addition. -- rename the fields of the type (``ALTER TYPE address RENAME zip TO zipcode``). - -Dropping a UDT -~~~~~~~~~~~~~~ - -You can drop an existing user-defined type using a ``DROP TYPE`` statement: - -.. productionlist:: - drop_type_statement: DROP TYPE [ IF EXISTS ] `udt_name` - -Dropping a type results in the immediate, irreversible removal of that type. However, attempting to drop a type that is -still in use by another type, table or function will result in an error. - -If the type dropped does not exist, an error will be returned unless ``IF EXISTS`` is used, in which case the operation -is a no-op. - -.. _tuples: - -Tuples -^^^^^^ - -CQL also support tuples and tuple types (where the elements can be of different types). Functionally, tuples can be -though as anonymous UDT with anonymous fields. Tuple types and tuple literals are defined by: - -.. productionlist:: - tuple_type: TUPLE '<' `cql_type` ( ',' `cql_type` )* '>' - tuple_literal: '(' `term` ( ',' `term` )* ')' - -and can be used thusly:: - - CREATE TABLE durations ( - event text, - duration tuple, - ) - - INSERT INTO durations (event, duration) VALUES ('ev1', (3, 'hours')); - -Unlike other "composed" types (collections and UDT), a tuple is always :ref:`frozen ` (without the need of the -`frozen` keyword) and it is not possible to update only some elements of a tuple (without updating the whole tuple). -Also, a tuple literal should always have the same number of value than declared in the type it is a tuple of (some of -those values can be null but they need to be explicitly declared as so). - -.. _custom-types: - -Custom Types -^^^^^^^^^^^^ - -.. note:: Custom types exists mostly for backward compatiliby purposes and their usage is discouraged. Their usage is - complex, not user friendly and the other provided types, particularly :ref:`user-defined types `, should almost - always be enough. - -A custom type is defined by: - -.. productionlist:: - custom_type: `string` - -A custom type is a :token:`string` that contains the name of Java class that extends the server side ``AbstractType`` -class and that can be loaded by Cassandra (it should thus be in the ``CLASSPATH`` of every node running Cassandra). That -class will define what values are valid for the type and how the time sorts when used for a clustering column. For any -other purpose, a value of a custom type is the same than that of a ``blob``, and can in particular be input using the -:token:`blob` literal syntax. diff --git a/doc/source/data_modeling/index.rst b/doc/source/data_modeling/index.rst deleted file mode 100644 index dde031a19b53..000000000000 --- a/doc/source/data_modeling/index.rst +++ /dev/null @@ -1,20 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -Data Modeling -============= - -.. todo:: TODO diff --git a/doc/source/development/code_style.rst b/doc/source/development/code_style.rst deleted file mode 100644 index 5a486a4a38e3..000000000000 --- a/doc/source/development/code_style.rst +++ /dev/null @@ -1,94 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -Code Style -========== - -General Code Conventions ------------------------- - - - The Cassandra project follows `Sun's Java coding conventions `_ with an important exception: ``{`` and ``}`` are always placed on a new line - -Exception handling ------------------- - - - Never ever write ``catch (...) {}`` or ``catch (...) { logger.error() }`` merely to satisfy Java's compile-time exception checking. Always propagate the exception up or throw ``RuntimeException`` (or, if it "can't happen," ``AssertionError``). This makes the exceptions visible to automated tests. - - Avoid propagating up checked exceptions that no caller handles. Rethrow as ``RuntimeException`` (or ``IOError``, if that is more applicable). - - Similarly, logger.warn() is often a cop-out: is this an error or not? If it is don't hide it behind a warn; if it isn't, no need for the warning. - - If you genuinely know an exception indicates an expected condition, it's okay to ignore it BUT this must be explicitly explained in a comment. - -Boilerplate ------------ - - - Avoid redundant ``@Override`` annotations when implementing abstract or interface methods. - - Do not implement equals or hashcode methods unless they are actually needed. - - Prefer public final fields to private fields with getters. (But prefer encapsulating behavior in "real" methods to either.) - - Prefer requiring initialization in the constructor to setters. - - Avoid redundant ``this`` references to member fields or methods. - - Do not extract interfaces (or abstract classes) unless you actually need multiple implementations of it. - - Always include braces for nested levels of conditionals and loops. Only avoid braces for single level. - -Multiline statements --------------------- - - - Try to keep lines under 120 characters, but use good judgement -- it's better to exceed 120 by a little, than split a line that has no natural splitting points. - - When splitting inside a method call, use one line per parameter and align them, like this: - - :: - - SSTableWriter writer = new SSTableWriter(cfs.getTempSSTablePath(), - columnFamilies.size(), - StorageService.getPartitioner()); - - - When splitting a ternary, use one line per clause, carry the operator, and align like this: - - :: - - var = bar == null - ? doFoo() - : doBar(); - -Whitespace ----------- - - - Please make sure to use 4 spaces instead of the tab character for all your indentation. - - Many lines in many files have a bunch of trailing whitespace... Please either clean these up in a separate patch, or leave them alone, so that reviewers now and anyone reading code history later doesn't have to pay attention to whitespace diffs. - -Imports -------- - -Please observe the following order for your imports:: - - java - [blank line] - com.google.common - org.apache.commons - org.junit - org.slf4j - [blank line] - everything else alphabetically - -Format files for IDEs ---------------------- - - - IntelliJ: `intellij-codestyle.jar `_ - - IntelliJ 13: `gist for IntelliJ 13 `_ (this is a work in progress, still working on javadoc, ternary style, line continuations, etc) - - Eclipse (https://github.com/tjake/cassandra-style-eclipse) - - - diff --git a/doc/source/development/how_to_commit.rst b/doc/source/development/how_to_commit.rst deleted file mode 100644 index d3de9e511c24..000000000000 --- a/doc/source/development/how_to_commit.rst +++ /dev/null @@ -1,151 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -How-to Commit -============= - -If you are a committer, feel free to pick any process that works for you - so long as you are planning to commit the work yourself. - -Patch based Contribution ------------------------- - -Here is how committing and merging will usually look for merging and pushing for tickets that follow the convention (if patch-based): - -Hypothetical CASSANDRA-12345 ticket is a cassandra-3.0 based bug fix that requires different code for cassandra-3.11, cassandra-4.0, and trunk. Contributor Jackie supplied a patch for the root branch (12345-3.0.patch), and patches for the remaining branches (12345-3.11.patch, 12345-4.0.patch, 12345-trunk.patch). - -On cassandra-3.0: - #. ``git am -3 12345-3.0.patch`` (any problem b/c of CHANGES.txt not merging anymore, fix it in place) - #. ``ant realclean && ant jar build-test`` (rebuild to make sure code compiles) - -On cassandra-3.11: - #. ``git merge cassandra-3.0 -s ours`` - #. ``git apply -3 12345-3.11.patch`` (any issue with CHANGES.txt : fix and `git add CHANGES.txt`) - #. ``ant realclean && ant jar build-test`` (rebuild to make sure code compiles) - #. ``git commit --amend`` (Notice this will squash the 3.11 applied patch into the forward merge commit) - -On cassandra-4.0: - #. ``git merge cassandra-3.11 -s ours`` - #. ``git apply -3 12345-4.0.patch`` (any issue with CHANGES.txt : fix and `git add CHANGES.txt`) - #. ``ant realclean && ant jar build-test`` (rebuild to make sure code compiles) - #. ``git commit --amend`` (Notice this will squash the 4.0 applied patch into the forward merge commit) - -On trunk: - #. ``git merge cassandra-4.0 -s ours`` - #. ``git apply -3 12345-trunk.patch`` (any issue with CHANGES.txt : fix and `git add CHANGES.txt`) - #. ``ant realclean && ant jar build-test`` (rebuild to make sure code compiles) - #. ``git commit --amend`` (Notice this will squash the trunk applied patch into the forward merge commit) - -On any branch: - #. ``git push origin cassandra-3.0 cassandra-3.11 cassandra-4.0 trunk --atomic -n`` (dryrun check) - #. ``git push origin cassandra-3.0 cassandra-3.11 cassandra-4.0 trunk --atomic`` - - -Git branch based Contribution ------------------------------ - -Same scenario, but a branch-based contribution: - -On cassandra-3.0: - #. ``git cherry-pick `` (any problem b/c of CHANGES.txt not merging anymore, fix it in place) - #. ``ant realclean && ant jar build-test`` (rebuild to make sure code compiles) - -On cassandra-3.11: - #. ``git merge cassandra-3.0 -s ours`` - #. ``git format-patch -1 `` (alternative to format-patch and apply is `cherry-pick -n`) - #. ``git apply -3 .patch`` (any issue with CHANGES.txt : fix and `git add CHANGES.txt`) - #. ``ant realclean && ant jar build-test`` (rebuild to make sure code compiles) - #. ``git commit --amend`` (Notice this will squash the 3.11 applied patch into the forward merge commit) - -On cassandra-4.0: - #. ``git merge cassandra-3.11 -s ours`` - #. ``git format-patch -1 `` (alternative to format-patch and apply is `cherry-pick -n`) - #. ``git apply -3 .patch`` (any issue with CHANGES.txt : fix and `git add CHANGES.txt`) - #. ``ant realclean && ant jar build-test`` (rebuild to make sure code compiles) - #. ``git commit --amend`` (Notice this will squash the 4.0 applied patch into the forward merge commit) - -On trunk: - #. ``git merge cassandra-4.0 -s ours`` - #. ``git format-patch -1 `` (alternative to format-patch and apply is `cherry-pick -n`) - #. ``git apply -3 .patch`` (any issue with CHANGES.txt : fix and `git add CHANGES.txt`) - #. ``ant realclean && ant jar build-test`` (rebuild to make sure code compiles) - #. ``git commit --amend`` (Notice this will squash the trunk applied patch into the forward merge commit) - -On any branch: - #. ``git push origin cassandra-3.0 cassandra-3.11 cassandra-4.0 trunk --atomic -n`` (dryrun check) - #. ``git push origin cassandra-3.0 cassandra-3.11 cassandra-4.0 trunk --atomic`` - - -Contributions only for release branches ---------------------------------------- - -If the patch is for an older branch, and doesn't impact later branches (such as trunk), we still need to merge up. - -On cassandra-3.0: - #. ``git cherry-pick `` (any problem b/c of CHANGES.txt not merging anymore, fix it in place) - #. ``ant realclean && ant jar build-test`` (rebuild to make sure code compiles) - -On cassandra-3.11: - #. ``git merge cassandra-3.0 -s ours`` - #. ``ant realclean && ant jar build-test`` (rebuild to make sure code compiles) - -On cassandra-4.0: - #. ``git merge cassandra-3.11 -s ours`` - #. ``ant realclean && ant jar build-test`` (rebuild to make sure code compiles) - -On trunk: - #. ``git merge cassandra-4.0 -s ours`` - #. ``ant realclean && ant jar build-test`` (rebuild to make sure code compiles) - -On any branch: - #. ``git push origin cassandra-3.0 cassandra-3.11 cassandra-4.0 trunk --atomic -n`` (dryrun check) - #. ``git push origin cassandra-3.0 cassandra-3.11 cassandra-4.0 trunk --atomic`` - - -Tips ----- - -.. tip:: - - A template for commit messages: - - :: - - - - - patch by ; reviewed by for CASSANDRA-##### - - - Co-authored-by: Name1 - Co-authored-by: Name2 - -.. tip:: - - Notes on git flags: - ``-3`` flag to am and apply will instruct git to perform a 3-way merge for you. If a conflict is detected, you can either resolve it manually or invoke git mergetool - for both am and apply. - - ``--atomic`` flag to git push does the obvious thing: pushes all or nothing. Without the flag, the command is equivalent to running git push once per each branch. This is nifty in case a race condition happens - you won’t push half the branches, blocking other committers’ progress while you are resolving the issue. - -.. tip:: - - The fastest way to get a patch from someone’s commit in a branch on GH - if you don’t have their repo in remotes - is to append .patch to the commit url, e.g. - curl -O https://github.com/apache/cassandra/commit/7374e9b5ab08c1f1e612bf72293ea14c959b0c3c.patch - -.. tip:: - - ``git cherry-pick -n `` can be used in place of the ``git format-patch -1 ; git apply -3 .patch`` steps. diff --git a/doc/source/development/how_to_review.rst b/doc/source/development/how_to_review.rst deleted file mode 100644 index dc97743625ed..000000000000 --- a/doc/source/development/how_to_review.rst +++ /dev/null @@ -1,71 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -Review Checklist -**************** - -When reviewing tickets in Apache JIRA, the following items should be covered as part of the review process: - -**General** - - * Does it conform to the :doc:`code_style` guidelines? - * Is there any redundant or duplicate code? - * Is the code as modular as possible? - * Can any singletons be avoided? - * Can any of the code be replaced with library functions? - * Are units of measurement used in the code consistent, both internally and with the rest of the ecosystem? - -**Error-Handling** - - * Are all data inputs and outputs checked (for the correct type, length, format, and range) and encoded? - * Where third-party utilities are used, are returning errors being caught? - * Are invalid parameter values handled? - * Are any Throwable/Exceptions passed to the JVMStabilityInspector? - * Are errors well-documented? Does the error message tell the user how to proceed? - * Do exceptions propagate to the appropriate level in the code? - -**Documentation** - - * Do comments exist and describe the intent of the code (the "why", not the "how")? - * Are javadocs added where appropriate? - * Is any unusual behavior or edge-case handling described? - * Are data structures and units of measurement explained? - * Is there any incomplete code? If so, should it be removed or flagged with a suitable marker like ‘TODO’? - * Does the code self-document via clear naming, abstractions, and flow control? - * Have NEWS.txt, the cql3 docs, and the native protocol spec been updated if needed? - * Is the ticket tagged with "client-impacting" and "doc-impacting", where appropriate? - * Has lib/licences been updated for third-party libs? Are they Apache License compatible? - * Is the Component on the JIRA ticket set appropriately? - -**Testing** - - * Is the code testable? i.e. don’t add too many or hide dependencies, unable to initialize objects, test frameworks can use methods etc. - * Do tests exist and are they comprehensive? - * Do unit tests actually test that the code is performing the intended functionality? - * Could any test code use common functionality (e.g. ccm, dtest, or CqlTester methods) or abstract it there for reuse? - * If the code may be affected by multi-node clusters, are there dtests? - * If the code may take a long time to test properly, are there CVH tests? - * Is the test passing on CI for all affected branches (up to trunk, if applicable)? Are there any regressions? - * If patch affects read/write path, did we test for performance regressions w/multiple workloads? - * If adding a new feature, were tests added and performed confirming it meets the expected SLA/use-case requirements for the feature? - -**Logging** - - * Are logging statements logged at the correct level? - * Are there logs in the critical path that could affect performance? - * Is there any log that could be added to communicate status or troubleshoot potential problems in this feature? - * Can any unnecessary logging statement be removed? - diff --git a/doc/source/development/ide.rst b/doc/source/development/ide.rst deleted file mode 100644 index 298649576057..000000000000 --- a/doc/source/development/ide.rst +++ /dev/null @@ -1,161 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -Building and IDE Integration -**************************** - -Building From Source -==================== - -Getting started with Cassandra and IntelliJ IDEA or Eclipse is simple, once you manage to build Cassandra from source using `Java 8 `_, `Git `_ and `Ant `_. - -The source code for Cassandra is shared through the central Apache Git repository and organized by different branches. You can access the code for the current development branch through git as follows:: - - git clone http://git-wip-us.apache.org/repos/asf/cassandra.git cassandra-trunk - -Other branches will point to different versions of Cassandra. Switching to a different branch requires checking out the branch by its name:: - - git checkout cassandra-3.0 - -You can get a list of available branches with ``git branch``. - -Finally build Cassandra using ant:: - - ant - -This may take a significant amount of time depending on whether artifacts have to be downloaded and the number of classes that need to be compiled. - -.. hint:: - - You can setup multiple working trees for different Cassandra versions from the same repository using `git-worktree `_. - -.. note:: - - `Bleeding edge development snapshots `_ of Cassandra are available from Jenkins continuous integration. - -Setting up Cassandra in IntelliJ IDEA -===================================== - -`IntelliJ IDEA `_ by JetBrains is one of the most popular IDEs for Cassandra and Java development in general. The Community Edition is provided as a free download with all features needed to get started developing Cassandra. - -Setup Cassandra as a Project (C* 2.1 and newer) ------------------------------------------------ - -Since 2.1.5, there is a new ant target: ``generate-idea-files``. Please see our `wiki `_ for instructions for older Cassandra versions. - -Please clone and build Cassandra as described above and execute the following steps: - -1. Once Cassandra is built, generate the IDEA files using ant: - -:: - - ant generate-idea-files - -2. Start IDEA - -3. Open the IDEA project from the checked out Cassandra directory using the menu item Open in IDEA's File menu - -The project generated by the ant task ``generate-idea-files`` contains nearly everything you need to debug Cassandra and execute unit tests. - - * Run/debug defaults for JUnit - * Run/debug configuration for Cassandra daemon - * License header for Java source files - * Cassandra code style - * Inspections - -Setting up Cassandra in Eclipse -=============================== - -Eclipse is a popular open source IDE that can be used for Cassandra development. Various Eclipse environments are available from the `download page `_. The following guide was created with "Eclipse IDE for Java Developers". - -These instructions were tested on Ubuntu 16.04 with Eclipse Neon (4.6) using Cassandra 2.1, 2.2 and 3.x. - -Project Settings ----------------- - -**It is important that you generate the Eclipse files with Ant before trying to set up the Eclipse project.** - - * Clone and build Cassandra as described above. - * Run ``ant generate-eclipse-files`` to create the Eclipse settings. - * Start Eclipse. - * Select ``File->Import->Existing Projects into Workspace->Select git directory``. - * Make sure "cassandra-trunk" is recognized and selected as a project (assuming you checked the code out into the folder cassandra-trunk as described above). - * Confirm "Finish" to have your project imported. - -You should now be able to find the project as part of the "Package Explorer" or "Project Explorer" without having Eclipse complain about any errors after building the project automatically. - -Unit Tests ----------- - -Unit tests can be run from Eclipse by simply right-clicking the class file or method and selecting ``Run As->JUnit Test``. Tests can be debugged this way as well by defining breakpoints (double-click line number) and selecting ``Debug As->JUnit Test``. - -Alternatively all unit tests can be run from the command line as described in :doc:`testing` - -Debugging Cassandra Using Eclipse ---------------------------------- - -There are two ways how to start and debug a local Cassandra instance with Eclipse. You can either start Cassandra just as you normally would by using the ``./bin/cassandra`` script and connect to the JVM through `remotely `_ from Eclipse or start Cassandra from Eclipse right away. - -Starting Cassandra From Command Line -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - * Set environment variable to define remote debugging options for the JVM: - ``export JVM_EXTRA_OPTS="-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=1414"`` - * Start Cassandra by executing the ``./bin/cassandra`` - -Afterwards you should be able to connect to the running Cassandra process through the following steps: - -From the menu, select ``Run->Debug Configurations..`` - -.. image:: images/eclipse_debug0.png - -Create new remote application - -.. image:: images/eclipse_debug1.png - -Configure connection settings by specifying a name and port 1414 - -.. image:: images/eclipse_debug2.png - -Afterwards confirm "Debug" to connect to the JVM and start debugging Cassandra! - -Starting Cassandra From Eclipse -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Cassandra can also be started directly from Eclipse if you don't want to use the command line. - -From the menu, select ``Run->Run Configurations..`` - -.. image:: images/eclipse_debug3.png - -Create new application - -.. image:: images/eclipse_debug4.png - -Specify name, project and main class ``org.apache.cassandra.service.CassandraDaemon`` - -.. image:: images/eclipse_debug5.png - -Configure additional JVM specific parameters that will start Cassandra with some of the settings created by the regular startup script. Change heap related values as needed. - -:: - - -Xms1024M -Xmx1024M -Xmn220M -Xss256k -ea -XX:+UseThreadPriorities -XX:ThreadPriorityPolicy=42 -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+UseCondCardMark -javaagent:./lib/jamm-0.3.0.jar -Djava.net.preferIPv4Stack=true - -.. image:: images/eclipse_debug6.png - -Now just confirm "Debug" and you should see the output of Cassandra starting up in the Eclipse console and should be able to set breakpoints and start debugging! - diff --git a/doc/source/development/index.rst b/doc/source/development/index.rst deleted file mode 100644 index be3d2542c6c3..000000000000 --- a/doc/source/development/index.rst +++ /dev/null @@ -1,29 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -Cassandra Development -********************* - -.. toctree:: - :maxdepth: 2 - - ide - testing - patches - code_style - license_compliance - how_to_review - how_to_commit diff --git a/doc/source/development/license_compliance.rst b/doc/source/development/license_compliance.rst deleted file mode 100644 index e2eba2ab8256..000000000000 --- a/doc/source/development/license_compliance.rst +++ /dev/null @@ -1,37 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. _license_compliance: - -License Compliance -****************** - - -The target of this document is to provide an overview and guidance how the Apache Cassandra project's source code and -artifacts maintain compliance with the `ASF Licensing policy `. - -The repository contains a LICENSE file, and a NOTICE file. - -The Apache Cassandra project enforces and verifies ASF License header conformance on all source files using the Apache RAT tool. - -With a few exceptions, source files consisting of works submitted directly to the ASF by the copyright owner or owner's -agent must contain the appropriate ASF license header. Files without any degree of creativity don't require a license header. - -Currently, RAT checks all .bat, .btm, .cql, .css, .g, .hmtl, .iml, .java, .jflex, .jks, .md, .mod, .name, .pom, .py, .sh, .spec, .textile, .yml, .yaml, .xml files for a LICENSE header. - -If there is an incompliance, the build will fail with the following warning: - - Some files have missing or incorrect license information. Check RAT report in build/rat.txt for more details! diff --git a/doc/source/development/patches.rst b/doc/source/development/patches.rst deleted file mode 100644 index e3d968fab6c8..000000000000 --- a/doc/source/development/patches.rst +++ /dev/null @@ -1,125 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -Contributing Code Changes -************************* - -Choosing What to Work on -======================== - -Submitted patches can include bug fixes, changes to the Java code base, improvements for tooling (both Java or Python), documentation, testing or any other changes that requires changing the code base. Although the process of contributing code is always the same, the amount of work and time it takes to get a patch accepted also depends on the kind of issue you're addressing. - -As a general rule of thumb: - * Major new features and significant changes to the code based will likely not going to be accepted without deeper discussion within the `developer community `_ - * Bug fixes take higher priority compared to features - * The extend to which tests are required depend on how likely your changes will effect the stability of Cassandra in production. Tooling changes requires fewer tests than storage engine changes. - * Less complex patches will be faster to review: consider breaking up an issue into individual tasks and contributions that can be reviewed separately - -.. hint:: - - Not sure what to work? Just pick an issue tagged with the `low hanging fruit label `_ in JIRA, which we use to flag issues that could turn out to be good starter tasks for beginners. - -Before You Start Coding -======================= - -Although contributions are highly appreciated, we do not guarantee that each contribution will become a part of Cassandra. Therefor it's generally a good idea to first get some feedback on the things you plan to work on, especially about any new features or major changes to the code base. You can reach out to other developers on the mailing list or IRC channel listed on our `community page `_. - -You should also - * Avoid redundant work by searching for already reported issues in `JIRA `_ - * Create a new issue early in the process describing what you're working on - not just after finishing your patch - * Link related JIRA issues with your own ticket to provide a better context - * Update your ticket from time to time by giving feedback on your progress and link a GitHub WIP branch with your current code - * Ping people who you actively like to ask for advice on JIRA by `mentioning users `_ - -There are also some fixed rules that you need to be aware: - * Patches will only be applied to branches by following the release model - * Code must be testable - * Code must follow the :doc:`code_style` convention - * Changes must not break compatibility between different Cassandra versions - * Contributions must be covered by the Apache License - -Choosing the Right Branches to Work on -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -There are currently multiple Cassandra versions maintained in individual branches: - -======= ====== -Version Policy -======= ====== -3.x Tick-tock (see below) -3.0 Bug fixes only -2.2 Bug fixes only -2.1 Critical bug fixes only -======= ====== - -Corresponding branches in git are easy to recognize as they are named ``cassandra-`` (e.g. ``cassandra-3.0``). The ``trunk`` branch is an exception, as it contains the most recent commits from all other branches and is used for creating new branches for future tick-tock releases. - -Tick-Tock Releases -"""""""""""""""""" - -New releases created as part of the `tick-tock release process `_ will either focus on stability (odd version numbers) or introduce new features (even version numbers). Any code for new Cassandra features you should be based on the latest, unreleased 3.x branch with even version number or based on trunk. - -Bug Fixes -""""""""" - -Creating patches for bug fixes is a bit more complicated as this will depend on how many different versions of Cassandra are affected. In each case, the order for merging such changes will be ``cassandra-2.1`` -> ``cassandra-2.2`` -> ``cassandra-3.0`` -> ``cassandra-3.x`` -> ``trunk``. But don't worry, merging from 2.1 would be the worst case for bugs that affect all currently supported versions, which isn't very common. As a contributor, you're also not expected to provide a single patch for each version. What you need to do however is: - - * Be clear about which versions you could verify to be affected by the bug - * For 2.x: ask if a bug qualifies to be fixed in this release line, as this may be handled on case by case bases - * If possible, create a patch against the lowest version in the branches listed above (e.g. if you found the bug in 3.9 you should try to fix it already in 3.0) - * Test if the patch can be merged cleanly across branches in the direction listed above - * Be clear which branches may need attention by the committer or even create custom patches for those if you can - -Creating a Patch -================ - -So you've finished coding and the great moment arrives: it's time to submit your patch! - - 1. Create a branch for your changes if you haven't done already. Many contributors name their branches based on ticket number and Cassandra version, e.g. ``git checkout -b 12345-3.0`` - 2. Verify that you follow Cassandra's :doc:`code_style` - 3. Make sure all tests (including yours) pass using ant as described in :doc:`testing`. If you suspect a test failure is unrelated to your change, it may be useful to check the test's status by searching the issue tracker or looking at `CI `_ results for the relevant upstream version. Note that the full test suites take many hours to complete, so it is common to only run specific relevant tests locally before uploading a patch. Once a patch has been uploaded, the reviewer or committer can help setup CI jobs to run the full test suites. - 4. Consider going through the :doc:`how_to_review` for your code. This will help you to understand how others will consider your change for inclusion. - 5. Don’t make the committer squash commits for you in the root branch either. Multiple commits are fine - and often preferable - during review stage, especially for incremental review, but once +1d, do either: - - a. Attach a patch to JIRA with a single squashed commit in it (per branch), or - b. Squash the commits in-place in your branches into one - - 6. Include a CHANGES.txt entry (put it at the top of the list), and format the commit message appropriately in your patch ending with the following statement on the last line: ``patch by X; reviewed by Y for CASSANDRA-ZZZZZ`` - 7. When you're happy with the result, create a patch: - - :: - - git add - git commit -m '' - git format-patch HEAD~1 - mv (e.g. 12345-trunk.txt, 12345-3.0.txt) - - Alternatively, many contributors prefer to make their branch available on GitHub. In this case, fork the Cassandra repository on GitHub and push your branch: - - :: - - git push --set-upstream origin 12345-3.0 - - 8. To make life easier for your reviewer/committer, you may want to make sure your patch applies cleanly to later branches and create additional patches/branches for later Cassandra versions to which your original patch does not apply cleanly. That said, this is not critical, and you will receive feedback on your patch regardless. - 9. Attach the newly generated patch to the ticket/add a link to your branch and click "Submit Patch" at the top of the ticket. This will move the ticket into "Patch Available" status, indicating that your submission is ready for review. - 10. Wait for other developers or committers to review it and hopefully +1 the ticket (see :doc:`how_to_review`). If your change does not receive a +1, do not be discouraged. If possible, the reviewer will give suggestions to improve your patch or explain why it is not suitable. - 11. If the reviewer has given feedback to improve the patch, make the necessary changes and move the ticket into "Patch Available" once again. - -Once the review process is complete, you will receive a +1. Wait for a committer to commit it. Do not delete your branches immediately after they’ve been committed - keep them on GitHub for a while. Alternatively, attach a patch to JIRA for historical record. It’s not that uncommon for a committer to mess up a merge. In case of that happening, access to the original code is required, or else you’ll have to redo some of the work. - - diff --git a/doc/source/development/testing.rst b/doc/source/development/testing.rst deleted file mode 100644 index a0d2ae4a53b9..000000000000 --- a/doc/source/development/testing.rst +++ /dev/null @@ -1,170 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -Testing -******* - -Creating tests is one of the most important and also most difficult parts of developing Cassandra. There are different ways to test your code depending on what you're working on. - - -Unit Testing -============ - -The most simple way to test code in Cassandra is probably by writing a unit test. Cassandra uses JUnit as a testing framework and test cases can be found in the ``test/unit`` directory. Ideally you’d be able to create a unit test for your implementation that would exclusively cover the class you created (the unit under test). Unfortunately this is not always possible and Cassandra doesn’t have a very mock friendly code base. Often you’ll find yourself in a situation where you have to make use of an embedded Cassandra instance that you’ll be able to interact with in your test. If you want to make use of CQL in your test, you can simply extend CQLTester and use some of the convenient helper methods such as in the following example. - -.. code-block:: java - - @Test - public void testBatchAndList() throws Throwable - { - createTable("CREATE TABLE %s (k int PRIMARY KEY, l list)"); - execute("BEGIN BATCH " + - "UPDATE %1$s SET l = l +[ 1 ] WHERE k = 0; " + - "UPDATE %1$s SET l = l + [ 2 ] WHERE k = 0; " + - "UPDATE %1$s SET l = l + [ 3 ] WHERE k = 0; " + - "APPLY BATCH"); - - assertRows(execute("SELECT l FROM %s WHERE k = 0"), - row(list(1, 2, 3))); - } - -Unit tests can be run from the command line using the ``ant test`` command, ``ant test -Dtest.name=`` to execute a test suite or ``ant testsome -Dtest.name= -Dtest.methods=[,testmethod2]`` for individual tests. For example, to run all test methods in the ``org.apache.cassandra.cql3.SimpleQueryTest`` class, you would run:: - - ant test -Dtest.name=SimpleQueryTest - -To run only the ``testStaticCompactTables()`` test method from that class, you would run:: - - ant testsome -Dtest.name=org.apache.cassandra.cql3.SimpleQueryTest -Dtest.methods=testStaticCompactTables - -Long running tests ------------------- - -Test that consume a significant amount of time during execution can be found in the ``test/long`` directory and executed as a regular JUnit test or standalone program. Except for the execution time, there’s nothing really special about them. However, ant will execute tests under ``test/long`` only when using the ``ant long-test`` target. - -Flaky tests ------------ - -If a test failure is difficult to reproduce you can always use a shell loop, circle repeat strategy and similar solutions. At the JUnit level ``RepeatableRunner`` will let you run a JUnit class N times for convenience. On tests that are fast this is a much faster way to iterate than doing it at the shell level. Beware of tests that modify singleton state or similar as they won't work. - -DTests -====== - -One way of doing integration or system testing at larger scale is by using `dtest `_, which stands for “Cassandra Distributed Tests”. The idea is to automatically setup Cassandra clusters using various configurations and simulate certain use cases you want to test. This is done using Python scripts and ``ccmlib`` from the `ccm `_ project. Dtests will setup clusters using this library just as you do running ad-hoc ``ccm`` commands on your local machine. Afterwards dtests will use the `Python driver `_ to interact with the nodes, manipulate the file system, analyze logs or mess with individual nodes. - -Using dtests helps us to prevent regression bugs by continually executing tests on the `CI server `_ against new patches. For frequent contributors, this Jenkins is set up to build branches from their GitHub repositories. It is likely that your reviewer will use this Jenkins instance to run tests for your patch. Read more on the motivation behind the CI server `here `_. - -The best way to learn how to write dtests is probably by reading the introduction "`How to Write a Dtest `_" and by looking at existing, recently updated tests in the project. New tests must follow certain `style conventions `_ that are being checked before accepting contributions. In contrast to Cassandra, dtest issues and pull-requests are managed on github, therefor you should make sure to link any created dtests in your Cassandra ticket and also refer to the ticket number in your dtest PR. - -Creating a good dtest can be tough, but it should not prevent you from submitting patches! Please ask in the corresponding JIRA ticket how to write a good dtest for the patch. In most cases a reviewer or committer will able to support you, and in some cases they may offer to write a dtest for you. - -Performance Testing -=================== - -Performance tests for Cassandra are a special breed of tests that are not part of the usual patch contribution process. In fact you can contribute tons of patches to Cassandra without ever running performance tests. They are important however when working on performance improvements, as such improvements must be measurable. - -Cassandra Stress Tool ---------------------- - -TODO: `CASSANDRA-12365 `_ - -cstar_perf ----------- - -Another tool available on github is `cstar_perf `_ that can be used for intensive performance testing in large clusters or locally. Please refer to the project page on how to set it up and how to use it. - -CircleCI -======== - -Cassandra ships with a default `CircleCI `_ configuration, to enable running tests on your branches, you need to go the CircleCI website, click "Login" and log in with your github account. Then you need to give CircleCI permission to watch your repositories. Once you have done that, you can optionally configure CircleCI to run tests in parallel - click "Projects", then your github account and then click the settings for the project. If you leave the parallelism at 1 for Cassandra, only ``ant eclipse-warnings`` and ``ant test`` will be run. If you up the parallelism to 4, it also runs ``ant long-test``, ``ant test-compression`` and ``ant stress-test``. - -The configuration for CircleCI is in the ``.circleci/config.yml`` file. This configuration file is meant to use low resources, you can find equivalent configuration files using more resources in the same ``.circleci`` directory. Please read the ``readme.md`` file in that directory for further information. Note that the higher resources are not available in the free tier of CircleCI. - -The optional ``repeated_utest``/``repeated_dtest`` CircleCI jobs run a specific JUnit/Python test repeatedly. In an analogous way, upgrade tests can be run repeatedly with the jobs ``repeated_upgrade_dtest``/``repeated_jvm_upgrade_dtest``. This is useful to verify that a certain test is stable. It's usually a good idea to run these jobs when adding or modifying a test. To specify what test should be run and the number of repetitions you should edit the related evironment variables in the CircleCI configuration file: - -+----------------------------------------------+---------------------------------------------------------------+ -| Variable | Description | -+==============================================+===============================================================+ -|``REPEATED_UTEST_TARGET`` | The Ant test target to run, for example: | -| | | -| | * ``testsome`` | -| | * ``test-jvm-dtest-some`` | -| | * ``test-cdc`` | -| | * ``test-compression`` | -| | * ``test-system-keyspace-directory`` | -+----------------------------------------------+---------------------------------------------------------------+ -|``REPEATED_UTEST_CLASS`` | The name of the Java test class to be run multiple times, for | -| | example: | -| | | -| | * ``org.apache.cassandra.cql3.ViewTest`` | -| | * ``org.apache.cassandra.distributed.test.PagingTest`` | -+----------------------------------------------+---------------------------------------------------------------+ -|``REPEATED_UTEST_METHODS`` | The optional specific methods within ``REPEATED_UTEST_CLASS`` | -| | to be run, for example: | -| | | -| | * ``testCompoundPartitionKey`` | -| | * ``testCompoundPartitionKey,testStaticTable`` | -+----------------------------------------------+---------------------------------------------------------------+ -|``REPEATED_UTEST_COUNT`` | The number of times that the repeated Java test should be run | -+----------------------------------------------+---------------------------------------------------------------+ -|``REPEATED_UTEST_STOP_ON_FAILURE`` | Whether the utest iteration should stop on the first failure | -+----------------------------------------------+---------------------------------------------------------------+ -|``REPEATED_DTEST_NAME`` | The Python dtest to be run multiple times, for example: | -| | | -| | * ``cqlsh_tests/test_cqlsh.py`` | -| | * ``cqlsh_tests/test_cqlsh.py::TestCqlshSmoke`` | -+----------------------------------------------+---------------------------------------------------------------+ -|``REPEATED_DTEST_VNODES`` | Whether the repeated Python dtest should use vnodes | -+----------------------------------------------+---------------------------------------------------------------+ -|``REPEATED_DTEST_COUNT`` | The number of times that the repeated Python dtest should be | -| | run | -+----------------------------------------------+---------------------------------------------------------------+ -|``REPEATED_DTEST_STOP_ON_FAILURE`` | Whether the dtest iteration should stop on the first failure | -+----------------------------------------------+---------------------------------------------------------------+ -|``REPEATED_UPGRADE_DTEST_NAME`` | A Python upgrade dtest to be run multiple times, for example: | -| | | -| | * ``upgrade_tests/cql_tests.py`` | -| | * ``upgrade_tests/repair_test.py`` | -+----------------------------------------------+---------------------------------------------------------------+ -|``REPEATED_UPGRADE_DTEST_COUNT`` | The number of times that the repeated Python upgrade dtest | -| | should be run | -+----------------------------------------------+---------------------------------------------------------------+ -|``REPEATED_UPGRADE_DTEST_STOP_ON_ | Whether the Python upgrade dtest iteration should stop on the | -|FAILURE`` | first failure | -+----------------------------------------------+---------------------------------------------------------------+ -|``REPEATED_JVM_UPGRADE_DTEST_CLASS`` | The name of JVM upgrade dtest class to be run multiple times, | -| | for example: | -| | | -| | * | ``org.apache.cassandra.distributed.upgrade.`` | -| | | ``MixedModeAvailabilityV30Test`` | -| | * | ``org.apache.cassandra.distributed.upgrade.`` | -| | | ``MixedModeConsistencyV3XTest`` | -+----------------------------------------------+---------------------------------------------------------------+ -|``REPEATED_JVM_UPGRADE_DTEST_METHODS`` | The optional specific methods within | -| | ``REPEATED_JVM_UPGRADE_DTEST_CLASS`` to be run, for example: | -| | | -| | * ``testAvailabilityV30ToV4`` | -| | * ``testAvailabilityV30ToV3X,testAvailabilityV30ToV4`` | -+----------------------------------------------+---------------------------------------------------------------+ -|``REPEATED_JVM_UPGRADE_DTEST_COUNT`` | The number of times that the repeated JVM upgrade dtest | -| | should be run | -+----------------------------------------------+---------------------------------------------------------------+ -|``REPEATED_JVM_UPGRADE_DTEST_STOP_ON_FAILURE``| Whether the JVM upgrade dtest iteration should stop on the | -| | first failure | -+----------------------------------------------+---------------------------------------------------------------+ - - diff --git a/doc/source/faq/index.rst b/doc/source/faq/index.rst deleted file mode 100644 index d985e37168be..000000000000 --- a/doc/source/faq/index.rst +++ /dev/null @@ -1,298 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -Frequently Asked Questions -========================== - -- :ref:`why-cant-list-all` -- :ref:`what-ports` -- :ref:`what-happens-on-joins` -- :ref:`asynch-deletes` -- :ref:`one-entry-ring` -- :ref:`can-large-blob` -- :ref:`nodetool-connection-refused` -- :ref:`to-batch-or-not-to-batch` -- :ref:`selinux` -- :ref:`how-to-unsubscribe` -- :ref:`cassandra-eats-all-my-memory` -- :ref:`what-are-seeds` -- :ref:`are-seeds-SPOF` -- :ref:`why-message-dropped` -- :ref:`oom-map-failed` -- :ref:`what-on-same-timestamp-update` -- :ref:`why-bootstrapping-stream-error` - -.. _why-cant-list-all: - -Why can't I set ``listen_address`` to listen on 0.0.0.0 (all my addresses)? ---------------------------------------------------------------------------- - -Cassandra is a gossip-based distributed system and ``listen_address`` is the address a node tells other nodes to reach -it at. Telling other nodes "contact me on any of my addresses" is a bad idea; if different nodes in the cluster pick -different addresses for you, Bad Things happen. - -If you don't want to manually specify an IP to ``listen_address`` for each node in your cluster (understandable!), leave -it blank and Cassandra will use ``InetAddress.getLocalHost()`` to pick an address. Then it's up to you or your ops team -to make things resolve correctly (``/etc/hosts/``, dns, etc). - -One exception to this process is JMX, which by default binds to 0.0.0.0 (Java bug 6425769). - -See :jira:`256` and :jira:`43` for more gory details. - -.. _what-ports: - -What ports does Cassandra use? ------------------------------- - -By default, Cassandra uses 7000 for cluster communication (7001 if SSL is enabled), 9042 for native protocol clients, -and 7199 for JMX (and 9160 for the deprecated Thrift interface). The internode communication and native protocol ports -are configurable in the :ref:`cassandra-yaml`. The JMX port is configurable in ``cassandra-env.sh`` (through JVM -options). All ports are TCP. - -.. _what-happens-on-joins: - -What happens to existing data in my cluster when I add new nodes? ------------------------------------------------------------------ - -When a new nodes joins a cluster, it will automatically contact the other nodes in the cluster and copy the right data -to itself. See :ref:`topology-changes`. - -.. _asynch-deletes: - -I delete data from Cassandra, but disk usage stays the same. What gives? ------------------------------------------------------------------------- - -Data you write to Cassandra gets persisted to SSTables. Since SSTables are immutable, the data can't actually be removed -when you perform a delete, instead, a marker (also called a "tombstone") is written to indicate the value's new status. -Never fear though, on the first compaction that occurs between the data and the tombstone, the data will be expunged -completely and the corresponding disk space recovered. See :ref:`compaction` for more detail. - -.. _one-entry-ring: - -Why does nodetool ring only show one entry, even though my nodes logged that they see each other joining the ring? ------------------------------------------------------------------------------------------------------------------- - -This happens when you have the same token assigned to each node. Don't do that. - -Most often this bites people who deploy by installing Cassandra on a VM (especially when using the Debian package, which -auto-starts Cassandra after installation, thus generating and saving a token), then cloning that VM to other nodes. - -The easiest fix is to wipe the data and commitlog directories, thus making sure that each node will generate a random -token on the next restart. - -.. _change-replication-factor: - -Can I change the replication factor (a a keyspace) on a live cluster? ---------------------------------------------------------------------- - -Yes, but it will require running repair (or cleanup) to change the replica count of existing data: - -- :ref:`Alter ` the replication factor for desired keyspace (using cqlsh for instance). -- If you're reducing the replication factor, run ``nodetool cleanup`` on the cluster to remove surplus replicated data. - Cleanup runs on a per-node basis. -- If you're increasing the replication factor, run ``nodetool repair`` to ensure data is replicated according to the new - configuration. Repair runs on a per-replica set basis. This is an intensive process that may result in adverse cluster - performance. It's highly recommended to do rolling repairs, as an attempt to repair the entire cluster at once will - most likely swamp it. - -.. _can-large-blob: - -Can I Store (large) BLOBs in Cassandra? ---------------------------------------- - -Cassandra isn't optimized for large file or BLOB storage and a single ``blob`` value is always read and send to the -client entirely. As such, storing small blobs (less than single digit MB) should not be a problem, but it is advised to -manually split large blobs into smaller chunks. - -Please note in particular that by default, any value greater than 16MB will be rejected by Cassandra due the -``max_mutation_size_in_kb`` configuration of the :ref:`cassandra-yaml` file (which default to half of -``commitlog_segment_size_in_mb``, which itself default to 32MB). - -.. _nodetool-connection-refused: - -Nodetool says "Connection refused to host: 127.0.1.1" for any remote host. What gives? --------------------------------------------------------------------------------------- - -Nodetool relies on JMX, which in turn relies on RMI, which in turn sets up its own listeners and connectors as needed on -each end of the exchange. Normally all of this happens behind the scenes transparently, but incorrect name resolution -for either the host connecting, or the one being connected to, can result in crossed wires and confusing exceptions. - -If you are not using DNS, then make sure that your ``/etc/hosts`` files are accurate on both ends. If that fails, try -setting the ``-Djava.rmi.server.hostname=`` JVM option near the bottom of ``cassandra-env.sh`` to an -interface that you can reach from the remote machine. - -.. _to-batch-or-not-to-batch: - -Will batching my operations speed up my bulk load? --------------------------------------------------- - -No. Using batches to load data will generally just add "spikes" of latency. Use asynchronous INSERTs instead, or use -true :ref:`bulk-loading`. - -An exception is batching updates to a single partition, which can be a Good Thing (as long as the size of a single batch -stay reasonable). But never ever blindly batch everything! - -.. _selinux: - -On RHEL nodes are unable to join the ring ------------------------------------------ - -Check if `SELinux `__ is on; if it is, turn it off. - -.. _how-to-unsubscribe: - -How do I unsubscribe from the email list? ------------------------------------------ - -Send an email to ``user-unsubscribe@cassandra.apache.org``. - -.. _cassandra-eats-all-my-memory: - -Why does top report that Cassandra is using a lot more memory than the Java heap max? -------------------------------------------------------------------------------------- - -Cassandra uses `Memory Mapped Files `__ (mmap) internally. That is, we -use the operating system's virtual memory system to map a number of on-disk files into the Cassandra process' address -space. This will "use" virtual memory; i.e. address space, and will be reported by tools like top accordingly, but on 64 -bit systems virtual address space is effectively unlimited so you should not worry about that. - -What matters from the perspective of "memory use" in the sense as it is normally meant, is the amount of data allocated -on brk() or mmap'd /dev/zero, which represent real memory used. The key issue is that for a mmap'd file, there is never -a need to retain the data resident in physical memory. Thus, whatever you do keep resident in physical memory is -essentially just there as a cache, in the same way as normal I/O will cause the kernel page cache to retain data that -you read/write. - -The difference between normal I/O and mmap() is that in the mmap() case the memory is actually mapped to the process, -thus affecting the virtual size as reported by top. The main argument for using mmap() instead of standard I/O is the -fact that reading entails just touching memory - in the case of the memory being resident, you just read it - you don't -even take a page fault (so no overhead in entering the kernel and doing a semi-context switch). This is covered in more -detail `here `__. - -.. _what-are-seeds: - -What are seeds? ---------------- - -Seeds are used during startup to discover the cluster. - -If you configure your nodes to refer some node as seed, nodes in your ring tend to send Gossip message to seeds more -often (also see the :ref:`section on gossip `) than to non-seeds. In other words, seeds are worked as hubs of -Gossip network. With seeds, each node can detect status changes of other nodes quickly. - -Seeds are also referred by new nodes on bootstrap to learn other nodes in ring. When you add a new node to ring, you -need to specify at least one live seed to contact. Once a node join the ring, it learns about the other nodes, so it -doesn't need seed on subsequent boot. - -You can make a seed a node at any time. There is nothing special about seed nodes. If you list the node in seed list it -is a seed - -Seeds do not auto bootstrap (i.e. if a node has itself in its seed list it will not automatically transfer data to itself) -If you want a node to do that, bootstrap it first and then add it to seeds later. If you have no data (new install) you -do not have to worry about bootstrap at all. - -Recommended usage of seeds: - -- pick two (or more) nodes per data center as seed nodes. -- sync the seed list to all your nodes - -.. _are-seeds-SPOF: - -Does single seed mean single point of failure? ----------------------------------------------- - -The ring can operate or boot without a seed; however, you will not be able to add new nodes to the cluster. It is -recommended to configure multiple seeds in production system. - -.. _cant-call-jmx-method: - -Why can't I call jmx method X on jconsole? ------------------------------------------- - -Some of JMX operations use array argument and as jconsole doesn't support array argument, those operations can't be -called with jconsole (the buttons are inactive for them). You need to write a JMX client to call such operations or need -array-capable JMX monitoring tool. - -.. _why-message-dropped: - -Why do I see "... messages dropped ..." in the logs? ----------------------------------------------------- - -This is a symptom of load shedding -- Cassandra defending itself against more requests than it can handle. - -Internode messages which are received by a node, but do not get not to be processed within their proper timeout (see -``read_request_timeout``, ``write_request_timeout``, ... in the :ref:`cassandra-yaml`), are dropped rather than -processed (since the as the coordinator node will no longer be waiting for a response). - -For writes, this means that the mutation was not applied to all replicas it was sent to. The inconsistency will be -repaired by read repair, hints or a manual repair. The write operation may also have timeouted as a result. - -For reads, this means a read request may not have completed. - -Load shedding is part of the Cassandra architecture, if this is a persistent issue it is generally a sign of an -overloaded node or cluster. - -.. _oom-map-failed: - -Cassandra dies with ``java.lang.OutOfMemoryError: Map failed`` --------------------------------------------------------------- - -If Cassandra is dying **specifically** with the "Map failed" message, it means the OS is denying java the ability to -lock more memory. In linux, this typically means memlock is limited. Check ``/proc//limits`` to verify -this and raise it (eg, via ulimit in bash). You may also need to increase ``vm.max_map_count.`` Note that the debian -package handles this for you automatically. - - -.. _what-on-same-timestamp-update: - -What happens if two updates are made with the same timestamp? -------------------------------------------------------------- - -Updates must be commutative, since they may arrive in different orders on different replicas. As long as Cassandra has a -deterministic way to pick the winner (in a timestamp tie), the one selected is as valid as any other, and the specifics -should be treated as an implementation detail. That said, in the case of a timestamp tie, Cassandra follows two rules: -first, deletes take precedence over inserts/updates. Second, if there are two updates, the one with the lexically larger -value is selected. - -.. _why-bootstrapping-stream-error: - -Why bootstrapping a new node fails with a "Stream failed" error? ----------------------------------------------------------------- - -Two main possibilities: - -#. the GC may be creating long pauses disrupting the streaming process -#. compactions happening in the background hold streaming long enough that the TCP connection fails - -In the first case, regular GC tuning advices apply. In the second case, you need to set TCP keepalive to a lower value -(default is very high on Linux). Try to just run the following:: - - $ sudo /sbin/sysctl -w net.ipv4.tcp_keepalive_time=60 net.ipv4.tcp_keepalive_intvl=60 net.ipv4.tcp_keepalive_probes=5 - -To make those settings permanent, add them to your ``/etc/sysctl.conf`` file. - -Note: `GCE `__'s firewall will always interrupt TCP connections that are inactive for -more than 10 min. Running the above command is highly recommended in that environment. - - - - - - - - - - - diff --git a/doc/source/getting_started/configuring.rst b/doc/source/getting_started/configuring.rst deleted file mode 100644 index 27fac7872808..000000000000 --- a/doc/source/getting_started/configuring.rst +++ /dev/null @@ -1,67 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -Configuring Cassandra ---------------------- - -For running Cassandra on a single node, the steps above are enough, you don't really need to change any configuration. -However, when you deploy a cluster of nodes, or use clients that are not on the same host, then there are some -parameters that must be changed. - -The Cassandra configuration files can be found in the ``conf`` directory of tarballs. For packages, the configuration -files will be located in ``/etc/cassandra``. - -Main runtime properties -^^^^^^^^^^^^^^^^^^^^^^^ - -Most of configuration in Cassandra is done via yaml properties that can be set in ``cassandra.yaml``. At a minimum you -should consider setting the following properties: - -- ``cluster_name``: the name of your cluster. -- ``seeds``: a comma separated list of the IP addresses of your cluster seeds. -- ``storage_port``: you don't necessarily need to change this but make sure that there are no firewalls blocking this - port. -- ``listen_address``: the IP address of your node, this is what allows other nodes to communicate with this node so it - is important that you change it. Alternatively, you can set ``listen_interface`` to tell Cassandra which interface to - use, and consecutively which address to use. Set only one, not both. -- ``native_transport_port``: as for storage\_port, make sure this port is not blocked by firewalls as clients will - communicate with Cassandra on this port. - -Changing the location of directories -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The following yaml properties control the location of directories: - -- ``data_file_directories``: one or more directories where data files are located. -- ``commitlog_directory``: the directory where commitlog files are located. -- ``saved_caches_directory``: the directory where saved caches are located. -- ``hints_directory``: the directory where hints are located. - -For performance reasons, if you have multiple disks, consider putting commitlog and data files on different disks. - -Environment variables -^^^^^^^^^^^^^^^^^^^^^ - -JVM-level settings such as heap size can be set in ``cassandra-env.sh``. You can add any additional JVM command line -argument to the ``JVM_OPTS`` environment variable; when Cassandra starts these arguments will be passed to the JVM. - -Logging -^^^^^^^ - -The logger in use is logback. You can change logging properties by editing ``logback.xml``. By default it will log at -INFO level into a file called ``system.log`` and at debug level into a file called ``debug.log``. When running in the -foreground, it will also log at INFO level to the console. - diff --git a/doc/source/getting_started/drivers.rst b/doc/source/getting_started/drivers.rst deleted file mode 100644 index baec82378f3e..000000000000 --- a/doc/source/getting_started/drivers.rst +++ /dev/null @@ -1,107 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. _client-drivers: - -Client drivers --------------- - -Here are known Cassandra client drivers organized by language. Before choosing a driver, you should verify the Cassandra -version and functionality supported by a specific driver. - -Java -^^^^ - -- `Achilles `__ -- `Astyanax `__ -- `Casser `__ -- `Datastax Java driver `__ -- `Kundera `__ -- `PlayORM `__ - -Python -^^^^^^ - -- `Datastax Python driver `__ - -Ruby -^^^^ - -- `Datastax Ruby driver `__ - -C# / .NET -^^^^^^^^^ - -- `Cassandra Sharp `__ -- `Datastax C# driver `__ -- `Fluent Cassandra `__ - -Nodejs -^^^^^^ - -- `Datastax Nodejs driver `__ -- `Node-Cassandra-CQL `__ - -PHP -^^^ - -- `CQL \| PHP `__ -- `Datastax PHP driver `__ -- `PHP-Cassandra `__ -- `PHP Library for Cassandra `__ - -C++ -^^^ - -- `Datastax C++ driver `__ -- `libQTCassandra `__ - -Scala -^^^^^ - -- `Datastax Spark connector `__ -- `Phantom `__ -- `Quill `__ - -Clojure -^^^^^^^ - -- `Alia `__ -- `Cassaforte `__ -- `Hayt `__ - -Erlang -^^^^^^ - -- `CQerl `__ -- `Erlcass `__ - -Go -^^ - -- `CQLc `__ -- `Gocassa `__ -- `GoCQL `__ - -Haskell -^^^^^^^ - -- `Cassy `__ - -Rust -^^^^ - -- `Rust CQL `__ diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst deleted file mode 100644 index 4ca9c4d40bee..000000000000 --- a/doc/source/getting_started/index.rst +++ /dev/null @@ -1,33 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -Getting Started -=============== - -This section covers how to get started using Apache Cassandra and should be the first thing to read if you are new to -Cassandra. - -.. toctree:: - :maxdepth: 2 - - installing - configuring - querying - drivers - - diff --git a/doc/source/getting_started/installing.rst b/doc/source/getting_started/installing.rst deleted file mode 100644 index 9be85e5874a9..000000000000 --- a/doc/source/getting_started/installing.rst +++ /dev/null @@ -1,106 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -Installing Cassandra --------------------- - -Prerequisites -^^^^^^^^^^^^^ - -- The latest version of Java 8, either the `Oracle Java Standard Edition 8 - `__ or `OpenJDK 8 `__. To - verify that you have the correct version of java installed, type ``java -version``. - -- For using cqlsh, the latest version of `Python 2.7 `__. To verify that you have - the correct version of Python installed, type ``python --version``. - -Installation from binary tarball files -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -- Download the latest stable release from the `Apache Cassandra downloads website `__. - -- Untar the file somewhere, for example: - -:: - - tar -xvf apache-cassandra-3.6-bin.tar.gz cassandra - -The files will be extracted into ``apache-cassandra-3.6``, you need to substitute 3.6 with the release number that you -have downloaded. - -- Optionally add ``apache-cassandra-3.6\bin`` to your path. -- Start Cassandra in the foreground by invoking ``bin/cassandra -f`` from the command line. Press "Control-C" to stop - Cassandra. Start Cassandra in the background by invoking ``bin/cassandra`` from the command line. Invoke ``kill pid`` - or ``pkill -f CassandraDaemon`` to stop Cassandra, where pid is the Cassandra process id, which you can find for - example by invoking ``pgrep -f CassandraDaemon``. -- Verify that Cassandra is running by invoking ``bin/nodetool status`` from the command line. -- Configuration files are located in the ``conf`` sub-directory. -- Since Cassandra 2.1, log and data directories are located in the ``logs`` and ``data`` sub-directories respectively. - Older versions defaulted to ``/var/log/cassandra`` and ``/var/lib/cassandra``. Due to this, it is necessary to either - start Cassandra with root privileges or change ``conf/cassandra.yaml`` to use directories owned by the current user, - as explained below in the section on changing the location of directories. - -Installation from Debian packages -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -- Add the Apache repository of Cassandra to ``/etc/apt/sources.list.d/cassandra.sources.list``, for example for version - 3.6: - -:: - - echo "deb https://downloads.apache.org/cassandra/debian 36x main" | sudo tee -a /etc/apt/sources.list.d/cassandra.sources.list - -- Add the Apache Cassandra repository keys: - -:: - - curl https://downloads.apache.org/cassandra/KEYS | sudo apt-key add - - -- Update the repositories: - -:: - - sudo apt-get update - -- If you encounter this error: - -:: - - GPG error: http://www.apache.org 36x InRelease: The following signatures couldn't be verified because the public key is not available: NO_PUBKEY A278B781FE4B2BDA - -Then add the public key A278B781FE4B2BDA as follows: - -:: - - sudo apt-key adv --keyserver pool.sks-keyservers.net --recv-key A278B781FE4B2BDA - -and repeat ``sudo apt-get update``. The actual key may be different, you get it from the error message itself. For a -full list of Apache contributors public keys, you can refer to `this link `__. - -- Install Cassandra: - -:: - - sudo apt-get install cassandra - -- You can start Cassandra with ``sudo service cassandra start`` and stop it with ``sudo service cassandra stop``. - However, normally the service will start automatically. For this reason be sure to stop it if you need to make any - configuration changes. -- Verify that Cassandra is running by invoking ``nodetool status`` from the command line. -- The default location of configuration files is ``/etc/cassandra``. -- The default location of log and data directories is ``/var/log/cassandra/`` and ``/var/lib/cassandra``. diff --git a/doc/source/getting_started/querying.rst b/doc/source/getting_started/querying.rst deleted file mode 100644 index 55b162bb43fc..000000000000 --- a/doc/source/getting_started/querying.rst +++ /dev/null @@ -1,52 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -Inserting and querying ----------------------- - -The API to Cassandra is :ref:`CQL `, the Cassandra Query Language. To use CQL, you will need to connect to the -cluster, which can be done: - -- either using cqlsh, -- or through a client driver for Cassandra. - -CQLSH -^^^^^ - -cqlsh is a command line shell for interacting with Cassandra through CQL. It is shipped with every Cassandra package, -and can be found in the bin/ directory alongside the cassandra executable. It connects to the single node specified on -the command line. For example:: - - $ bin/cqlsh localhost - Connected to Test Cluster at localhost:9042. - [cqlsh 5.0.1 | Cassandra 3.8 | CQL spec 3.4.2 | Native protocol v4] - Use HELP for help. - cqlsh> SELECT cluster_name, listen_address FROM system.local; - - cluster_name | listen_address - --------------+---------------- - Test Cluster | 127.0.0.1 - - (1 rows) - cqlsh> - -See the :ref:`cqlsh section ` for full documentation. - -Client drivers -^^^^^^^^^^^^^^ - -A lot of client drivers are provided by the Community and a list of known drivers is provided in :ref:`the next section -`. You should refer to the documentation of each drivers for more information on how to use them. diff --git a/doc/source/index.rst b/doc/source/index.rst deleted file mode 100644 index 562603d19bf5..000000000000 --- a/doc/source/index.rst +++ /dev/null @@ -1,41 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -Welcome to Apache Cassandra's documentation! -============================================ - -This is the official documentation for `Apache Cassandra `__ |version|. If you would like -to contribute to this documentation, you are welcome to do so by submitting your contribution like any other patch -following `these instructions `__. - -Contents: - -.. toctree:: - :maxdepth: 2 - - getting_started/index - architecture/index - data_modeling/index - cql/index - configuration/index - operating/index - tools/index - troubleshooting/index - development/index - faq/index - - bugs - contactus diff --git a/doc/source/operating/backups.rst b/doc/source/operating/backups.rst deleted file mode 100644 index c071e83b5bbe..000000000000 --- a/doc/source/operating/backups.rst +++ /dev/null @@ -1,22 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -Backups -======= - -.. todo:: TODO diff --git a/doc/source/operating/bloom_filters.rst b/doc/source/operating/bloom_filters.rst deleted file mode 100644 index 0b37c18dab83..000000000000 --- a/doc/source/operating/bloom_filters.rst +++ /dev/null @@ -1,65 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -Bloom Filters -------------- - -In the read path, Cassandra merges data on disk (in SSTables) with data in RAM (in memtables). To avoid checking every -SSTable data file for the partition being requested, Cassandra employs a data structure known as a bloom filter. - -Bloom filters are a probabilistic data structure that allows Cassandra to determine one of two possible states: - The -data definitely does not exist in the given file, or - The data probably exists in the given file. - -While bloom filters can not guarantee that the data exists in a given SSTable, bloom filters can be made more accurate -by allowing them to consume more RAM. Operators have the opportunity to tune this behavior per table by adjusting the -the ``bloom_filter_fp_chance`` to a float between 0 and 1. - -The default value for ``bloom_filter_fp_chance`` is 0.1 for tables using LeveledCompactionStrategy and 0.01 for all -other cases. - -Bloom filters are stored in RAM, but are stored offheap, so operators should not consider bloom filters when selecting -the maximum heap size. As accuracy improves (as the ``bloom_filter_fp_chance`` gets closer to 0), memory usage -increases non-linearly - the bloom filter for ``bloom_filter_fp_chance = 0.01`` will require about three times as much -memory as the same table with ``bloom_filter_fp_chance = 0.1``. - -Typical values for ``bloom_filter_fp_chance`` are usually between 0.01 (1%) to 0.1 (10%) false-positive chance, where -Cassandra may scan an SSTable for a row, only to find that it does not exist on the disk. The parameter should be tuned -by use case: - -- Users with more RAM and slower disks may benefit from setting the ``bloom_filter_fp_chance`` to a numerically lower - number (such as 0.01) to avoid excess IO operations -- Users with less RAM, more dense nodes, or very fast disks may tolerate a higher ``bloom_filter_fp_chance`` in order to - save RAM at the expense of excess IO operations -- In workloads that rarely read, or that only perform reads by scanning the entire data set (such as analytics - workloads), setting the ``bloom_filter_fp_chance`` to a much higher number is acceptable. - -Changing -^^^^^^^^ - -The bloom filter false positive chance is visible in the ``DESCRIBE TABLE`` output as the field -``bloom_filter_fp_chance``. Operators can change the value with an ``ALTER TABLE`` statement: -:: - - ALTER TABLE keyspace.table WITH bloom_filter_fp_chance=0.01 - -Operators should be aware, however, that this change is not immediate: the bloom filter is calculated when the file is -written, and persisted on disk as the Filter component of the SSTable. Upon issuing an ``ALTER TABLE`` statement, new -files on disk will be written with the new ``bloom_filter_fp_chance``, but existing sstables will not be modified until -they are compacted - if an operator needs a change to ``bloom_filter_fp_chance`` to take effect, they can trigger an -SSTable rewrite using ``nodetool scrub`` or ``nodetool upgradesstables -a``, both of which will rebuild the sstables on -disk, regenerating the bloom filters in the progress. diff --git a/doc/source/operating/bulk_loading.rst b/doc/source/operating/bulk_loading.rst deleted file mode 100644 index c8224d5cb838..000000000000 --- a/doc/source/operating/bulk_loading.rst +++ /dev/null @@ -1,24 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -.. _bulk-loading: - -Bulk Loading ------------- - -.. todo:: TODO diff --git a/doc/source/operating/cdc.rst b/doc/source/operating/cdc.rst deleted file mode 100644 index 192f62a093b3..000000000000 --- a/doc/source/operating/cdc.rst +++ /dev/null @@ -1,89 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -Change Data Capture -------------------- - -Overview -^^^^^^^^ - -Change data capture (CDC) provides a mechanism to flag specific tables for archival as well as rejecting writes to those -tables once a configurable size-on-disk for the combined flushed and unflushed CDC-log is reached. An operator can -enable CDC on a table by setting the table property ``cdc=true`` (either when :ref:`creating the table -` or :ref:`altering it `), after which any CommitLogSegments containing -data for a CDC-enabled table are moved to the directory specified in ``cassandra.yaml`` on segment discard. A threshold -of total disk space allowed is specified in the yaml at which time newly allocated CommitLogSegments will not allow CDC -data until a consumer parses and removes data from the destination archival directory. - -Configuration -^^^^^^^^^^^^^ - -Enabling or disable CDC on a table -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -CDC is enable or disable through the `cdc` table property, for instance:: - - CREATE TABLE foo (a int, b text, PRIMARY KEY(a)) WITH cdc=true; - - ALTER TABLE foo WITH cdc=true; - - ALTER TABLE foo WITH cdc=false; - -cassandra.yaml parameters -~~~~~~~~~~~~~~~~~~~~~~~~~ - -The following `cassandra.yaml` are available for CDC: - -``cdc_enabled`` (default: false) - Enable or disable CDC operations node-wide. -``cdc_raw_directory`` (default: ``$CASSANDRA_HOME/data/cdc_raw``) - Destination for CommitLogSegments to be moved after all corresponding memtables are flushed. -``cdc_free_space_in_mb``: (default: min of 4096 and 1/8th volume space) - Calculated as sum of all active CommitLogSegments that permit CDC + all flushed CDC segments in - ``cdc_raw_directory``. -``cdc_free_space_check_interval_ms`` (default: 250) - When at capacity, we limit the frequency with which we re-calculate the space taken up by ``cdc_raw_directory`` to - prevent burning CPU cycles unnecessarily. Default is to check 4 times per second. - -.. _reading-commitlogsegments: - -Reading CommitLogSegments -^^^^^^^^^^^^^^^^^^^^^^^^^ -This implementation included a refactor of CommitLogReplayer into `CommitLogReader.java -`__. -Usage is `fairly straightforward -`__ -with a `variety of signatures -`__ -available for use. In order to handle mutations read from disk, implement `CommitLogReadHandler -`__. - -Warnings -^^^^^^^^ - -**Do not enable CDC without some kind of consumption process in-place.** - -The initial implementation of Change Data Capture does not include a parser (see :ref:`reading-commitlogsegments` above) -so, if CDC is enabled on a node and then on a table, the ``cdc_free_space_in_mb`` will fill up and then writes to -CDC-enabled tables will be rejected unless some consumption process is in place. - -Further Reading -^^^^^^^^^^^^^^^ - -- `Design doc `__ -- `JIRA ticket `__ diff --git a/doc/source/operating/compaction.rst b/doc/source/operating/compaction.rst deleted file mode 100644 index 290c211d467f..000000000000 --- a/doc/source/operating/compaction.rst +++ /dev/null @@ -1,443 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -.. _compaction: - -Compaction ----------- - -Types of compaction -^^^^^^^^^^^^^^^^^^^ - -The concept of compaction is used for different kinds of operations in Cassandra, the common thing about these -operations is that it takes one or more sstables and output new sstables. The types of compactions are; - -Minor compaction - triggered automatically in Cassandra. -Major compaction - a user executes a compaction over all sstables on the node. -User defined compaction - a user triggers a compaction on a given set of sstables. -Scrub - try to fix any broken sstables. This can actually remove valid data if that data is corrupted, if that happens you - will need to run a full repair on the node. -Upgradesstables - upgrade sstables to the latest version. Run this after upgrading to a new major version. -Cleanup - remove any ranges this node does not own anymore, typically triggered on neighbouring nodes after a node has been - bootstrapped since that node will take ownership of some ranges from those nodes. -Secondary index rebuild - rebuild the secondary indexes on the node. -Anticompaction - after repair the ranges that were actually repaired are split out of the sstables that existed when repair started. -Sub range compaction - It is possible to only compact a given sub range - this could be useful if you know a token that has been - misbehaving - either gathering many updates or many deletes. (``nodetool compact -st x -et y``) will pick - all sstables containing the range between x and y and issue a compaction for those sstables. For STCS this will - most likely include all sstables but with LCS it can issue the compaction for a subset of the sstables. With LCS - the resulting sstable will end up in L0. - -When is a minor compaction triggered? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -# When an sstable is added to the node through flushing/streaming etc. -# When autocompaction is enabled after being disabled (``nodetool enableautocompaction``) -# When compaction adds new sstables. -# A check for new minor compactions every 5 minutes. - -Merging sstables -^^^^^^^^^^^^^^^^ - -Compaction is about merging sstables, since partitions in sstables are sorted based on the hash of the partition key it -is possible to efficiently merge separate sstables. Content of each partition is also sorted so each partition can be -merged efficiently. - -Tombstones and Garbage Collection (GC) Grace -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Why Tombstones -~~~~~~~~~~~~~~ - -When a delete request is received by Cassandra it does not actually remove the data from the underlying store. Instead -it writes a special piece of data known as a tombstone. The Tombstone represents the delete and causes all values which -occurred before the tombstone to not appear in queries to the database. This approach is used instead of removing values -because of the distributed nature of Cassandra. - -Deletes without tombstones -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Imagine a three node cluster which has the value [A] replicated to every node.:: - - [A], [A], [A] - -If one of the nodes fails and and our delete operation only removes existing values we can end up with a cluster that -looks like:: - - [], [], [A] - -Then a repair operation would replace the value of [A] back onto the two -nodes which are missing the value.:: - - [A], [A], [A] - -This would cause our data to be resurrected even though it had been -deleted. - -Deletes with Tombstones -~~~~~~~~~~~~~~~~~~~~~~~ - -Starting again with a three node cluster which has the value [A] replicated to every node.:: - - [A], [A], [A] - -If instead of removing data we add a tombstone record, our single node failure situation will look like this.:: - - [A, Tombstone[A]], [A, Tombstone[A]], [A] - -Now when we issue a repair the Tombstone will be copied to the replica, rather than the deleted data being -resurrected.:: - - [A, Tombstone[A]], [A, Tombstone[A]], [A, Tombstone[A]] - -Our repair operation will correctly put the state of the system to what we expect with the record [A] marked as deleted -on all nodes. This does mean we will end up accruing Tombstones which will permanently accumulate disk space. To avoid -keeping tombstones forever we have a parameter known as ``gc_grace_seconds`` for every table in Cassandra. - -The gc_grace_seconds parameter and Tombstone Removal -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The table level ``gc_grace_seconds`` parameter controls how long Cassandra will retain tombstones through compaction -events before finally removing them. This duration should directly reflect the amount of time a user expects to allow -before recovering a failed node. After ``gc_grace_seconds`` has expired the tombstone may be removed (meaning there will -no longer be any record that a certain piece of data was deleted), but as a tombstone can live in one sstable and the -data it covers in another, a compaction must also include both sstable for a tombstone to be removed. More precisely, to -be able to drop an actual tombstone the following needs to be true; - -- The tombstone must be older than ``gc_grace_seconds`` -- If partition X contains the tombstone, the sstable containing the partition plus all sstables containing data older - than the tombstone containing X must be included in the same compaction. We don't need to care if the partition is in - an sstable if we can guarantee that all data in that sstable is newer than the tombstone. If the tombstone is older - than the data it cannot shadow that data. -- If the option ``only_purge_repaired_tombstones`` is enabled, tombstones are only removed if the data has also been - repaired. - -If a node remains down or disconnected for longer than ``gc_grace_seconds`` it's deleted data will be repaired back to -the other nodes and re-appear in the cluster. This is basically the same as in the "Deletes without Tombstones" section. -Note that tombstones will not be removed until a compaction event even if ``gc_grace_seconds`` has elapsed. - -The default value for ``gc_grace_seconds`` is 864000 which is equivalent to 10 days. This can be set when creating or -altering a table using ``WITH gc_grace_seconds``. - -TTL -^^^ - -Data in Cassandra can have an additional property called time to live - this is used to automatically drop data that has -expired once the time is reached. Once the TTL has expired the data is converted to a tombstone which stays around for -at least ``gc_grace_seconds``. Note that if you mix data with TTL and data without TTL (or just different length of the -TTL) Cassandra will have a hard time dropping the tombstones created since the partition might span many sstables and -not all are compacted at once. - -Fully expired sstables -^^^^^^^^^^^^^^^^^^^^^^ - -If an sstable contains only tombstones and it is guaranteed that that sstable is not shadowing data in any other sstable -compaction can drop that sstable. If you see sstables with only tombstones (note that TTL:ed data is considered -tombstones once the time to live has expired) but it is not being dropped by compaction, it is likely that other -sstables contain older data. There is a tool called ``sstableexpiredblockers`` that will list which sstables are -droppable and which are blocking them from being dropped. This is especially useful for time series compaction with -``TimeWindowCompactionStrategy`` (and the deprecated ``DateTieredCompactionStrategy``). - -Repaired/unrepaired data -^^^^^^^^^^^^^^^^^^^^^^^^ - -With incremental repairs Cassandra must keep track of what data is repaired and what data is unrepaired. With -anticompaction repaired data is split out into repaired and unrepaired sstables. To avoid mixing up the data again -separate compaction strategy instances are run on the two sets of data, each instance only knowing about either the -repaired or the unrepaired sstables. This means that if you only run incremental repair once and then never again, you -might have very old data in the repaired sstables that block compaction from dropping tombstones in the unrepaired -(probably newer) sstables. - -Data directories -^^^^^^^^^^^^^^^^ - -Since tombstones and data can live in different sstables it is important to realize that losing an sstable might lead to -data becoming live again - the most common way of losing sstables is to have a hard drive break down. To avoid making -data live tombstones and actual data are always in the same data directory. This way, if a disk is lost, all versions of -a partition are lost and no data can get undeleted. To achieve this a compaction strategy instance per data directory is -run in addition to the compaction strategy instances containing repaired/unrepaired data, this means that if you have 4 -data directories there will be 8 compaction strategy instances running. This has a few more benefits than just avoiding -data getting undeleted: - -- It is possible to run more compactions in parallel - leveled compaction will have several totally separate levelings - and each one can run compactions independently from the others. -- Users can backup and restore a single data directory. -- Note though that currently all data directories are considered equal, so if you have a tiny disk and a big disk - backing two data directories, the big one will be limited the by the small one. One work around to this is to create - more data directories backed by the big disk. - -Single sstable tombstone compaction -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -When an sstable is written a histogram with the tombstone expiry times is created and this is used to try to find -sstables with very many tombstones and run single sstable compaction on that sstable in hope of being able to drop -tombstones in that sstable. Before starting this it is also checked how likely it is that any tombstones will actually -will be able to be dropped how much this sstable overlaps with other sstables. To avoid most of these checks the -compaction option ``unchecked_tombstone_compaction`` can be enabled. - -.. _compaction-options: - -Common options -^^^^^^^^^^^^^^ - -There is a number of common options for all the compaction strategies; - -``enabled`` (default: true) - Whether minor compactions should run. Note that you can have 'enabled': true as a compaction option and then do - 'nodetool enableautocompaction' to start running compactions. -``tombstone_threshold`` (default: 0.2) - How much of the sstable should be tombstones for us to consider doing a single sstable compaction of that sstable. -``tombstone_compaction_interval`` (default: 86400s (1 day)) - Since it might not be possible to drop any tombstones when doing a single sstable compaction we need to make sure - that one sstable is not constantly getting recompacted - this option states how often we should try for a given - sstable. -``log_all`` (default: false) - New detailed compaction logging, see :ref:`below `. -``unchecked_tombstone_compaction`` (default: false) - The single sstable compaction has quite strict checks for whether it should be started, this option disables those - checks and for some usecases this might be needed. Note that this does not change anything for the actual - compaction, tombstones are only dropped if it is safe to do so - it might just rewrite an sstable without being able - to drop any tombstones. -``only_purge_repaired_tombstone`` (default: false) - Option to enable the extra safety of making sure that tombstones are only dropped if the data has been repaired. -``min_threshold`` (default: 4) - Lower limit of number of sstables before a compaction is triggered. Not used for ``LeveledCompactionStrategy``. -``max_threshold`` (default: 32) - Upper limit of number of sstables before a compaction is triggered. Not used for ``LeveledCompactionStrategy``. - -Further, see the section on each strategy for specific additional options. - -Compaction nodetool commands -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The :ref:`nodetool ` utility provides a number of commands related to compaction: - -``enableautocompaction`` - Enable compaction. -``disableautocompaction`` - Disable compaction. -``setcompactionthroughput`` - How fast compaction should run at most - defaults to 16MB/s, but note that it is likely not possible to reach this - throughput. -``compactionstats`` - Statistics about current and pending compactions. -``compactionhistory`` - List details about the last compactions. -``setcompactionthreshold`` - Set the min/max sstable count for when to trigger compaction, defaults to 4/32. - -Switching the compaction strategy and options using JMX -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -It is possible to switch compaction strategies and its options on just a single node using JMX, this is a great way to -experiment with settings without affecting the whole cluster. The mbean is:: - - org.apache.cassandra.db:type=ColumnFamilies,keyspace=,columnfamily= - -and the attribute to change is ``CompactionParameters`` or ``CompactionParametersJson`` if you use jconsole or jmc. The -syntax for the json version is the same as you would use in an :ref:`ALTER TABLE ` statement - -for example:: - - { 'class': 'LeveledCompactionStrategy', 'sstable_size_in_mb': 123, 'fanout_size': 10} - -The setting is kept until someone executes an :ref:`ALTER TABLE ` that touches the compaction -settings or restarts the node. - -.. _detailed-compaction-logging: - -More detailed compaction logging -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Enable with the compaction option ``log_all`` and a more detailed compaction log file will be produced in your log -directory. - -.. _STCS: - -Size Tiered Compaction Strategy -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The basic idea of ``SizeTieredCompactionStrategy`` (STCS) is to merge sstables of approximately the same size. All -sstables are put in different buckets depending on their size. An sstable is added to the bucket if size of the sstable -is within ``bucket_low`` and ``bucket_high`` of the current average size of the sstables already in the bucket. This -will create several buckets and the most interesting of those buckets will be compacted. The most interesting one is -decided by figuring out which bucket's sstables takes the most reads. - -Major compaction -~~~~~~~~~~~~~~~~ - -When running a major compaction with STCS you will end up with two sstables per data directory (one for repaired data -and one for unrepaired data). There is also an option (-s) to do a major compaction that splits the output into several -sstables. The sizes of the sstables are approximately 50%, 25%, 12.5%... of the total size. - -.. _stcs-options: - -STCS options -~~~~~~~~~~~~ - -``min_sstable_size`` (default: 50MB) - Sstables smaller than this are put in the same bucket. -``bucket_low`` (default: 0.5) - How much smaller than the average size of a bucket a sstable should be before not being included in the bucket. That - is, if ``bucket_low * avg_bucket_size < sstable_size`` (and the ``bucket_high`` condition holds, see below), then - the sstable is added to the bucket. -``bucket_high`` (default: 1.5) - How much bigger than the average size of a bucket a sstable should be before not being included in the bucket. That - is, if ``sstable_size < bucket_high * avg_bucket_size`` (and the ``bucket_low`` condition holds, see above), then - the sstable is added to the bucket. - -Defragmentation -~~~~~~~~~~~~~~~ - -Defragmentation is done when many sstables are touched during a read. The result of the read is put in to the memtable -so that the next read will not have to touch as many sstables. This can cause writes on a read-only-cluster. - -.. _LCS: - -Leveled Compaction Strategy -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The idea of ``LeveledCompactionStrategy`` (LCS) is that all sstables are put into different levels where we guarantee -that no overlapping sstables are in the same level. By overlapping we mean that the first/last token of a single sstable -are never overlapping with other sstables. This means that for a SELECT we will only have to look for the partition key -in a single sstable per level. Each level is 10x the size of the previous one and each sstable is 160MB by default. L0 -is where sstables are streamed/flushed - no overlap guarantees are given here. - -When picking compaction candidates we have to make sure that the compaction does not create overlap in the target level. -This is done by always including all overlapping sstables in the next level. For example if we select an sstable in L3, -we need to guarantee that we pick all overlapping sstables in L4 and make sure that no currently ongoing compactions -will create overlap if we start that compaction. We can start many parallel compactions in a level if we guarantee that -we wont create overlap. For L0 -> L1 compactions we almost always need to include all L1 sstables since most L0 sstables -cover the full range. We also can't compact all L0 sstables with all L1 sstables in a single compaction since that can -use too much memory. - -When deciding which level to compact LCS checks the higher levels first (with LCS, a "higher" level is one with a higher -number: L0 is the lowest one, L8 is the highest one) and if the level is behind a compaction will be started -in that level. - -Major compaction -~~~~~~~~~~~~~~~~ - -It is possible to do a major compaction with LCS - it will currently start by filling out L1 and then once L1 is full, -it continues with L2 etc. This is sub optimal and will change to create all the sstables in a high level instead, -CASSANDRA-11817. - -Bootstrapping -~~~~~~~~~~~~~ - -During bootstrap sstables are streamed from other nodes. The level of the remote sstable is kept to avoid many -compactions after the bootstrap is done. During bootstrap the new node also takes writes while it is streaming the data -from a remote node - these writes are flushed to L0 like all other writes and to avoid those sstables blocking the -remote sstables from going to the correct level, we only do STCS in L0 until the bootstrap is done. - -STCS in L0 -~~~~~~~~~~ - -If LCS gets very many L0 sstables reads are going to hit all (or most) of the L0 sstables since they are likely to be -overlapping. To more quickly remedy this LCS does STCS compactions in L0 if there are more than 32 sstables there. This -should improve read performance more quickly compared to letting LCS do its L0 -> L1 compactions. If you keep getting -too many sstables in L0 it is likely that LCS is not the best fit for your workload and STCS could work out better. - -Starved sstables -~~~~~~~~~~~~~~~~ - -If a node ends up with a leveling where there are a few very high level sstables that are not getting compacted they -might make it impossible for lower levels to drop tombstones etc. For example, if there are sstables in L6 but there is -only enough data to actually get a L4 on the node the left over sstables in L6 will get starved and not compacted. This -can happen if a user changes sstable\_size\_in\_mb from 5MB to 160MB for example. To avoid this LCS tries to include -those starved high level sstables in other compactions if there has been 25 compaction rounds where the highest level -has not been involved. - -.. _lcs-options: - -LCS options -~~~~~~~~~~~ - -``sstable_size_in_mb`` (default: 160MB) - The target compressed (if using compression) sstable size - the sstables can end up being larger if there are very - large partitions on the node. - -``fanout_size`` (default: 10) - The target size of levels increases by this fanout_size multiplier. You can reduce the space amplification by tuning - this option. - -LCS also support the ``cassandra.disable_stcs_in_l0`` startup option (``-Dcassandra.disable_stcs_in_l0=true``) to avoid -doing STCS in L0. - -.. _TWCS: - -Time Window CompactionStrategy -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -``TimeWindowCompactionStrategy`` (TWCS) is designed specifically for workloads where it's beneficial to have data on -disk grouped by the timestamp of the data, a common goal when the workload is time-series in nature or when all data is -written with a TTL. In an expiring/TTL workload, the contents of an entire SSTable likely expire at approximately the -same time, allowing them to be dropped completely, and space reclaimed much more reliably than when using -``SizeTieredCompactionStrategy`` or ``LeveledCompactionStrategy``. The basic concept is that -``TimeWindowCompactionStrategy`` will create 1 sstable per file for a given window, where a window is simply calculated -as the combination of two primary options: - -``compaction_window_unit`` (default: DAYS) - A Java TimeUnit (MINUTES, HOURS, or DAYS). -``compaction_window_size`` (default: 1) - The number of units that make up a window. - -Taken together, the operator can specify windows of virtually any size, and `TimeWindowCompactionStrategy` will work to -create a single sstable for writes within that window. For efficiency during writing, the newest window will be -compacted using `SizeTieredCompactionStrategy`. - -Ideally, operators should select a ``compaction_window_unit`` and ``compaction_window_size`` pair that produces -approximately 20-30 windows - if writing with a 90 day TTL, for example, a 3 Day window would be a reasonable choice -(``'compaction_window_unit':'DAYS','compaction_window_size':3``). - -TimeWindowCompactionStrategy Operational Concerns -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The primary motivation for TWCS is to separate data on disk by timestamp and to allow fully expired SSTables to drop -more efficiently. One potential way this optimal behavior can be subverted is if data is written to SSTables out of -order, with new data and old data in the same SSTable. Out of order data can appear in two ways: - -- If the user mixes old data and new data in the traditional write path, the data will be comingled in the memtables - and flushed into the same SSTable, where it will remain comingled. -- If the user's read requests for old data cause read repairs that pull old data into the current memtable, that data - will be comingled and flushed into the same SSTable. - -While TWCS tries to minimize the impact of comingled data, users should attempt to avoid this behavior. Specifically, -users should avoid queries that explicitly set the timestamp via CQL ``USING TIMESTAMP``. Additionally, users should run -frequent repairs (which streams data in such a way that it does not become comingled), and disable background read -repair by setting the table's ``read_repair_chance`` and ``dclocal_read_repair_chance`` to 0. - -Changing TimeWindowCompactionStrategy Options -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Operators wishing to enable ``TimeWindowCompactionStrategy`` on existing data should consider running a major compaction -first, placing all existing data into a single (old) window. Subsequent newer writes will then create typical SSTables -as expected. - -Operators wishing to change ``compaction_window_unit`` or ``compaction_window_size`` can do so, but may trigger -additional compactions as adjacent windows are joined together. If the window size is decrease d (for example, from 24 -hours to 12 hours), then the existing SSTables will not be modified - TWCS can not split existing SSTables into multiple -windows. diff --git a/doc/source/operating/compression.rst b/doc/source/operating/compression.rst deleted file mode 100644 index 01da34b6dfa7..000000000000 --- a/doc/source/operating/compression.rst +++ /dev/null @@ -1,94 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -Compression ------------ - -Cassandra offers operators the ability to configure compression on a per-table basis. Compression reduces the size of -data on disk by compressing the SSTable in user-configurable compression ``chunk_length_in_kb``. Because Cassandra -SSTables are immutable, the CPU cost of compressing is only necessary when the SSTable is written - subsequent updates -to data will land in different SSTables, so Cassandra will not need to decompress, overwrite, and recompress data when -UPDATE commands are issued. On reads, Cassandra will locate the relevant compressed chunks on disk, decompress the full -chunk, and then proceed with the remainder of the read path (merging data from disks and memtables, read repair, and so -on). - -Configuring Compression -^^^^^^^^^^^^^^^^^^^^^^^ - -Compression is configured on a per-table basis as an optional argument to ``CREATE TABLE`` or ``ALTER TABLE``. By -default, three options are relevant: - -- ``class`` specifies the compression class - Cassandra provides three classes (``LZ4Compressor``, - ``SnappyCompressor``, and ``DeflateCompressor`` ). The default is ``LZ4Compressor``. -- ``chunk_length_in_kb`` specifies the number of kilobytes of data per compression chunk. The default is 64KB. -- ``crc_check_chance`` determines how likely Cassandra is to verify the checksum on each compression chunk during - reads. The default is 1.0. - -Users can set compression using the following syntax: - -:: - - CREATE TABLE keyspace.table (id int PRIMARY KEY) WITH compression = {'class': 'LZ4Compressor'}; - -Or - -:: - - ALTER TABLE keyspace.table WITH compression = {'class': 'SnappyCompressor', 'chunk_length_in_kb': 128, 'crc_check_chance': 0.5}; - -Once enabled, compression can be disabled with ``ALTER TABLE`` setting ``enabled`` to ``false``: - -:: - - ALTER TABLE keyspace.table WITH compression = {'enabled':'false'}; - -Operators should be aware, however, that changing compression is not immediate. The data is compressed when the SSTable -is written, and as SSTables are immutable, the compression will not be modified until the table is compacted. Upon -issuing a change to the compression options via ``ALTER TABLE``, the existing SSTables will not be modified until they -are compacted - if an operator needs compression changes to take effect immediately, the operator can trigger an SSTable -rewrite using ``nodetool scrub`` or ``nodetool upgradesstables -a``, both of which will rebuild the SSTables on disk, -re-compressing the data in the process. - -Benefits and Uses -^^^^^^^^^^^^^^^^^ - -Compression's primary benefit is that it reduces the amount of data written to disk. Not only does the reduced size save -in storage requirements, it often increases read and write throughput, as the CPU overhead of compressing data is faster -than the time it would take to read or write the larger volume of uncompressed data from disk. - -Compression is most useful in tables comprised of many rows, where the rows are similar in nature. Tables containing -similar text columns (such as repeated JSON blobs) often compress very well. - -Operational Impact -^^^^^^^^^^^^^^^^^^ - -- Compression metadata is stored off-heap and scales with data on disk. This often requires 1-3GB of off-heap RAM per - terabyte of data on disk, though the exact usage varies with ``chunk_length_in_kb`` and compression ratios. - -- Streaming operations involve compressing and decompressing data on compressed tables - in some code paths (such as - non-vnode bootstrap), the CPU overhead of compression can be a limiting factor. - -- The compression path checksums data to ensure correctness - while the traditional Cassandra read path does not have a - way to ensure correctness of data on disk, compressed tables allow the user to set ``crc_check_chance`` (a float from - 0.0 to 1.0) to allow Cassandra to probabilistically validate chunks on read to verify bits on disk are not corrupt. - -Advanced Use -^^^^^^^^^^^^ - -Advanced users can provide their own compression class by implementing the interface at -``org.apache.cassandra.io.compress.ICompressor``. diff --git a/doc/source/operating/error_codes.txt b/doc/source/operating/error_codes.txt deleted file mode 100644 index 279fe400a8f4..000000000000 --- a/doc/source/operating/error_codes.txt +++ /dev/null @@ -1,31 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -Error Codes ------------ - -In Cassandra 3.10 and higher, when the v5 native protocol (or a higher version) is used, -``ReadFailure`` and ``WriteFailure`` errors will contain a map of replica addresses -to error codes. Those error codes are explained here: - -``0x0000`` - The error does not have a specific code assigned yet, or the cause is unknown. - -``0x0001`` - The read operation scanned too many tombstones (as defined by ``tombstone_failure_threshold`` in ``cassandra.yaml``), - causing a TombstoneOverwhelmingException. diff --git a/doc/source/operating/hardware.rst b/doc/source/operating/hardware.rst deleted file mode 100644 index ad3aa8d21d2e..000000000000 --- a/doc/source/operating/hardware.rst +++ /dev/null @@ -1,87 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -Hardware Choices ----------------- - -Like most databases, Cassandra throughput improves with more CPU cores, more RAM, and faster disks. While Cassandra can -be made to run on small servers for testing or development environments (including Raspberry Pis), a minimal production -server requires at least 2 cores, and at least 8GB of RAM. Typical production servers have 8 or more cores and at least -32GB of RAM. - -CPU -^^^ -Cassandra is highly concurrent, handling many simultaneous requests (both read and write) using multiple threads running -on as many CPU cores as possible. The Cassandra write path tends to be heavily optimized (writing to the commitlog and -then inserting the data into the memtable), so writes, in particular, tend to be CPU bound. Consequently, adding -additional CPU cores often increases throughput of both reads and writes. - -Memory -^^^^^^ -Cassandra runs within a Java VM, which will pre-allocate a fixed size heap (java's Xmx system parameter). In addition to -the heap, Cassandra will use significant amounts of RAM offheap for compression metadata, bloom filters, row, key, and -counter caches, and an in process page cache. Finally, Cassandra will take advantage of the operating system's page -cache, storing recently accessed portions files in RAM for rapid re-use. - -For optimal performance, operators should benchmark and tune their clusters based on their individual workload. However, -basic guidelines suggest: - -- ECC RAM should always be used, as Cassandra has few internal safeguards to protect against bit level corruption -- The Cassandra heap should be no less than 2GB, and no more than 50% of your system RAM -- Heaps smaller than 12GB should consider ParNew/ConcurrentMarkSweep garbage collection -- Heaps larger than 12GB should consider G1GC - -Disks -^^^^^ -Cassandra persists data to disk for two very different purposes. The first is to the commitlog when a new write is made -so that it can be replayed after a crash or system shutdown. The second is to the data directory when thresholds are -exceeded and memtables are flushed to disk as SSTables. - -Commitlogs receive every write made to a Cassandra node and have the potential to block client operations, but they are -only ever read on node start-up. SSTable (data file) writes on the other hand occur asynchronously, but are read to -satisfy client look-ups. SSTables are also periodically merged and rewritten in a process called compaction. The data -held in the commitlog directory is data that has not been permanently saved to the SSTable data directories - it will be -periodically purged once it is flushed to the SSTable data files. - -Cassandra performs very well on both spinning hard drives and solid state disks. In both cases, Cassandra's sorted -immutable SSTables allow for linear reads, few seeks, and few overwrites, maximizing throughput for HDDs and lifespan of -SSDs by avoiding write amplification. However, when using spinning disks, it's important that the commitlog -(``commitlog_directory``) be on one physical disk (not simply a partition, but a physical disk), and the data files -(``data_file_directories``) be set to a separate physical disk. By separating the commitlog from the data directory, -writes can benefit from sequential appends to the commitlog without having to seek around the platter as reads request -data from various SSTables on disk. - -In most cases, Cassandra is designed to provide redundancy via multiple independent, inexpensive servers. For this -reason, using NFS or a SAN for data directories is an antipattern and should typically be avoided. Similarly, servers -with multiple disks are often better served by using RAID0 or JBOD than RAID1 or RAID5 - replication provided by -Cassandra obsoletes the need for replication at the disk layer, so it's typically recommended that operators take -advantage of the additional throughput of RAID0 rather than protecting against failures with RAID1 or RAID5. - -Common Cloud Choices -^^^^^^^^^^^^^^^^^^^^ - -Many large users of Cassandra run in various clouds, including AWS, Azure, and GCE - Cassandra will happily run in any -of these environments. Users should choose similar hardware to what would be needed in physical space. In EC2, popular -options include: - -- m1.xlarge instances, which provide 1.6TB of local ephemeral spinning storage and sufficient RAM to run moderate - workloads -- i2 instances, which provide both a high RAM:CPU ratio and local ephemeral SSDs -- m4.2xlarge / c4.4xlarge instances, which provide modern CPUs, enhanced networking and work well with EBS GP2 (SSD) - storage - -Generally, disk and network performance increases with instance size and generation, so newer generations of instances -and larger instance types within each family often perform better than their smaller or older alternatives. diff --git a/doc/source/operating/hints.rst b/doc/source/operating/hints.rst deleted file mode 100644 index f79f18ab784c..000000000000 --- a/doc/source/operating/hints.rst +++ /dev/null @@ -1,22 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -Hints ------ - -.. todo:: todo diff --git a/doc/source/operating/index.rst b/doc/source/operating/index.rst deleted file mode 100644 index e2cead2551ff..000000000000 --- a/doc/source/operating/index.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -Operating Cassandra -=================== - -.. toctree:: - :maxdepth: 2 - - snitch - topo_changes - repair - read_repair - hints - compaction - bloom_filters - compression - cdc - backups - bulk_loading - metrics - security - hardware - diff --git a/doc/source/operating/metrics.rst b/doc/source/operating/metrics.rst deleted file mode 100644 index 4bd0c0843af2..000000000000 --- a/doc/source/operating/metrics.rst +++ /dev/null @@ -1,710 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -Monitoring ----------- - -Metrics in Cassandra are managed using the `Dropwizard Metrics `__ library. These metrics -can be queried via JMX or pushed to external monitoring systems using a number of `built in -`__ and `third party -`__ reporter plugins. - -Metrics are collected for a single node. It's up to the operator to use an external monitoring system to aggregate them. - -Metric Types -^^^^^^^^^^^^ -All metrics reported by cassandra fit into one of the following types. - -``Gauge`` - An instantaneous measurement of a value. - -``Counter`` - A gauge for an ``AtomicLong`` instance. Typically this is consumed by monitoring the change since the last call to - see if there is a large increase compared to the norm. - -``Histogram`` - Measures the statistical distribution of values in a stream of data. - - In addition to minimum, maximum, mean, etc., it also measures median, 75th, 90th, 95th, 98th, 99th, and 99.9th - percentiles. - -``Timer`` - Measures both the rate that a particular piece of code is called and the histogram of its duration. - -``Latency`` - Special type that tracks latency (in microseconds) with a ``Timer`` plus a ``Counter`` that tracks the total latency - accrued since starting. The former is useful if you track the change in total latency since the last check. Each - metric name of this type will have 'Latency' and 'TotalLatency' appended to it. - -``Meter`` - A meter metric which measures mean throughput and one-, five-, and fifteen-minute exponentially-weighted moving - average throughputs. - -Table Metrics -^^^^^^^^^^^^^ - -Each table in Cassandra has metrics responsible for tracking its state and performance. - -The metric names are all appended with the specific ``Keyspace`` and ``Table`` name. - -Reported name format: - -**Metric Name** - ``org.apache.cassandra.metrics.Table...`` - -**JMX MBean** - ``org.apache.cassandra.metrics:type=Table keyspace= scope=
name=`` - -.. NOTE:: - There is a special table called '``all``' without a keyspace. This represents the aggregation of metrics across - **all** tables and keyspaces on the node. - - -=============================================== ============== =========== -Name Type Description -=============================================== ============== =========== -MemtableOnHeapSize Gauge Total amount of data stored in the memtable that resides **on**-heap, including column related overhead and partitions overwritten. -MemtableOffHeapSize Gauge Total amount of data stored in the memtable that resides **off**-heap, including column related overhead and partitions overwritten. -MemtableLiveDataSize Gauge Total amount of live data stored in the memtable, excluding any data structure overhead. -AllMemtablesOnHeapSize Gauge Total amount of data stored in the memtables (2i and pending flush memtables included) that resides **on**-heap. -AllMemtablesOffHeapSize Gauge Total amount of data stored in the memtables (2i and pending flush memtables included) that resides **off**-heap. -AllMemtablesLiveDataSize Gauge Total amount of live data stored in the memtables (2i and pending flush memtables included) that resides off-heap, excluding any data structure overhead. -MemtableColumnsCount Gauge Total number of columns present in the memtable. -MemtableSwitchCount Counter Number of times flush has resulted in the memtable being switched out. -CompressionRatio Gauge Current compression ratio for all SSTables. -EstimatedPartitionSizeHistogram Gauge Histogram of estimated partition size (in bytes). -EstimatedPartitionCount Gauge Approximate number of keys in table. -EstimatedColumnCountHistogram Gauge Histogram of estimated number of columns. -SSTablesPerReadHistogram Histogram Histogram of the number of sstable data files accessed per single partition read. SSTables skipped due to Bloom Filters, min-max key or partition index lookup are not taken into acoount. -ReadLatency Latency Local read latency for this table. -RangeLatency Latency Local range scan latency for this table. -WriteLatency Latency Local write latency for this table. -CoordinatorReadLatency Timer Coordinator read latency for this table. -CoordinatorScanLatency Timer Coordinator range scan latency for this table. -PendingFlushes Counter Estimated number of flush tasks pending for this table. -BytesFlushed Counter Total number of bytes flushed since server [re]start. -CompactionBytesWritten Counter Total number of bytes written by compaction since server [re]start. -PendingCompactions Gauge Estimate of number of pending compactions for this table. -LiveSSTableCount Gauge Number of SSTables on disk for this table. -LiveDiskSpaceUsed Counter Disk space used by SSTables belonging to this table (in bytes). -TotalDiskSpaceUsed Counter Total disk space used by SSTables belonging to this table, including obsolete ones waiting to be GC'd. -MinPartitionSize Gauge Size of the smallest compacted partition (in bytes). -MaxPartitionSize Gauge Size of the largest compacted partition (in bytes). -MeanPartitionSize Gauge Size of the average compacted partition (in bytes). -BloomFilterFalsePositives Gauge Number of false positives on table's bloom filter. -BloomFilterFalseRatio Gauge False positive ratio of table's bloom filter. -BloomFilterDiskSpaceUsed Gauge Disk space used by bloom filter (in bytes). -BloomFilterOffHeapMemoryUsed Gauge Off-heap memory used by bloom filter. -IndexSummaryOffHeapMemoryUsed Gauge Off-heap memory used by index summary. -CompressionMetadataOffHeapMemoryUsed Gauge Off-heap memory used by compression meta data. -KeyCacheHitRate Gauge Key cache hit rate for this table. -TombstoneScannedHistogram Histogram Histogram of tombstones scanned in queries on this table. -LiveScannedHistogram Histogram Histogram of live cells scanned in queries on this table. -ColUpdateTimeDeltaHistogram Histogram Histogram of column update time delta on this table. -ViewLockAcquireTime Timer Time taken acquiring a partition lock for materialized view updates on this table. -ViewReadTime Timer Time taken during the local read of a materialized view update. -TrueSnapshotsSize Gauge Disk space used by snapshots of this table including all SSTable components. -RowCacheHitOutOfRange Counter Number of table row cache hits that do not satisfy the query filter, thus went to disk. -RowCacheHit Counter Number of table row cache hits. -RowCacheMiss Counter Number of table row cache misses. -CasPrepare Latency Latency of paxos prepare round. -CasPropose Latency Latency of paxos propose round. -CasCommit Latency Latency of paxos commit round. -PercentRepaired Gauge Percent of table data that is repaired on disk. -SpeculativeRetries Counter Number of times speculative retries were sent for this table. -WaitingOnFreeMemtableSpace Histogram Histogram of time spent waiting for free memtable space, either on- or off-heap. -DroppedMutations Counter Number of dropped mutations on this table. -ReadRepairRequests Meter Throughput for mutations generated by read-repair. -ShortReadProtectionRequests Meter Throughput for requests to get extra rows during short read protection. -ReplicaFilteringProtectionRequests Meter Throughput for row completion requests during replica filtering protection. -ReplicaFilteringProtectionRowsCachedPerQuery Histogram Histogram of the number of rows cached per query when replica filtering protection is engaged. -============================================ ============== =========== - -Keyspace Metrics -^^^^^^^^^^^^^^^^ -Each keyspace in Cassandra has metrics responsible for tracking its state and performance. - -These metrics are the same as the ``Table Metrics`` above, only they are aggregated at the Keyspace level. - -Reported name format: - -**Metric Name** - ``org.apache.cassandra.metrics.keyspace..`` - -**JMX MBean** - ``org.apache.cassandra.metrics:type=Keyspace scope= name=`` - -ThreadPool Metrics -^^^^^^^^^^^^^^^^^^ - -Cassandra splits work of a particular type into its own thread pool. This provides back-pressure and asynchrony for -requests on a node. It's important to monitor the state of these thread pools since they can tell you how saturated a -node is. - -The metric names are all appended with the specific ``ThreadPool`` name. The thread pools are also categorized under a -specific type. - -Reported name format: - -**Metric Name** - ``org.apache.cassandra.metrics.ThreadPools...`` - -**JMX MBean** - ``org.apache.cassandra.metrics:type=ThreadPools scope= type= name=`` - -===================== ============== =========== -Name Type Description -===================== ============== =========== -ActiveTasks Gauge Number of tasks being actively worked on by this pool. -PendingTasks Gauge Number of queued tasks queued up on this pool. -CompletedTasks Counter Number of tasks completed. -TotalBlockedTasks Counter Number of tasks that were blocked due to queue saturation. -CurrentlyBlockedTask Counter Number of tasks that are currently blocked due to queue saturation but on retry will become unblocked. -MaxPoolSize Gauge The maximum number of threads in this pool. -===================== ============== =========== - -The following thread pools can be monitored. - -============================ ============== =========== -Name Type Description -============================ ============== =========== -Native-Transport-Requests transport Handles client CQL requests -CounterMutationStage request Responsible for counter writes -ViewMutationStage request Responsible for materialized view writes -MutationStage request Responsible for all other writes -ReadRepairStage request ReadRepair happens on this thread pool -ReadStage request Local reads run on this thread pool -RequestResponseStage request Coordinator requests to the cluster run on this thread pool -AntiEntropyStage internal Builds merkle tree for repairs -CacheCleanupExecutor internal Cache maintenance performed on this thread pool -CompactionExecutor internal Compactions are run on these threads -GossipStage internal Handles gossip requests -HintsDispatcher internal Performs hinted handoff -InternalResponseStage internal Responsible for intra-cluster callbacks -MemtableFlushWriter internal Writes memtables to disk -MemtablePostFlush internal Cleans up commit log after memtable is written to disk -MemtableReclaimMemory internal Memtable recycling -MigrationStage internal Runs schema migrations -MiscStage internal Misceleneous tasks run here -PendingRangeCalculator internal Calculates token range -PerDiskMemtableFlushWriter_0 internal Responsible for writing a spec (there is one of these per disk 0-N) -Sampler internal Responsible for re-sampling the index summaries of SStables -SecondaryIndexManagement internal Performs updates to secondary indexes -ValidationExecutor internal Performs validation compaction or scrubbing -============================ ============== =========== - -.. |nbsp| unicode:: 0xA0 .. nonbreaking space - -Client Request Metrics -^^^^^^^^^^^^^^^^^^^^^^ - -Client requests have their own set of metrics that encapsulate the work happening at coordinator level. - -Different types of client requests are broken down by ``RequestType``. - -Reported name format: - -**Metric Name** - ``org.apache.cassandra.metrics.ClientRequest..`` - -**JMX MBean** - ``org.apache.cassandra.metrics:type=ClientRequest scope= name=`` - - -:RequestType: CASRead -:Description: Metrics related to transactional read requests. -:Metrics: - ===================== ============== ============================================================= - Name Type Description - ===================== ============== ============================================================= - Timeouts Counter Number of timeouts encountered. - Failures Counter Number of transaction failures encountered. - |nbsp| Latency Transaction read latency. - Unavailables Counter Number of unavailable exceptions encountered. - UnfinishedCommit Counter Number of transactions that were committed on read. - ConditionNotMet Counter Number of transaction preconditions did not match current values. - ContentionHistogram Histogram How many contended reads were encountered - ===================== ============== ============================================================= - -:RequestType: CASWrite -:Description: Metrics related to transactional write requests. -:Metrics: - ===================== ============== ============================================================= - Name Type Description - ===================== ============== ============================================================= - Timeouts Counter Number of timeouts encountered. - Failures Counter Number of transaction failures encountered. - |nbsp| Latency Transaction write latency. - UnfinishedCommit Counter Number of transactions that were committed on write. - ConditionNotMet Counter Number of transaction preconditions did not match current values. - ContentionHistogram Histogram How many contended writes were encountered - ===================== ============== ============================================================= - - -:RequestType: Read -:Description: Metrics related to standard read requests. -:Metrics: - ===================== ============== ============================================================= - Name Type Description - ===================== ============== ============================================================= - Timeouts Counter Number of timeouts encountered. - Failures Counter Number of read failures encountered. - |nbsp| Latency Read latency. - Unavailables Counter Number of unavailable exceptions encountered. - ===================== ============== ============================================================= - -:RequestType: RangeSlice -:Description: Metrics related to token range read requests. -:Metrics: - ===================== ============== ============================================================= - Name Type Description - ===================== ============== ============================================================= - Timeouts Counter Number of timeouts encountered. - Failures Counter Number of range query failures encountered. - |nbsp| Latency Range query latency. - Unavailables Counter Number of unavailable exceptions encountered. - ===================== ============== ============================================================= - -:RequestType: Write -:Description: Metrics related to regular write requests. -:Metrics: - ===================== ============== ============================================================= - Name Type Description - ===================== ============== ============================================================= - Timeouts Counter Number of timeouts encountered. - Failures Counter Number of write failures encountered. - |nbsp| Latency Write latency. - Unavailables Counter Number of unavailable exceptions encountered. - ===================== ============== ============================================================= - - -:RequestType: ViewWrite -:Description: Metrics related to materialized view write wrtes. -:Metrics: - ===================== ============== ============================================================= - Timeouts Counter Number of timeouts encountered. - Failures Counter Number of transaction failures encountered. - Unavailables Counter Number of unavailable exceptions encountered. - ViewReplicasAttempted Counter Total number of attempted view replica writes. - ViewReplicasSuccess Counter Total number of succeded view replica writes. - ViewPendingMutations Gauge ViewReplicasAttempted - ViewReplicasSuccess. - ViewWriteLatency Timer Time between when mutation is applied to base table and when CL.ONE is achieved on view. - ===================== ============== ============================================================= - -Cache Metrics -^^^^^^^^^^^^^ - -Cassandra caches have metrics to track the effectivness of the caches. Though the ``Table Metrics`` might be more useful. - -Reported name format: - -**Metric Name** - ``org.apache.cassandra.metrics.Cache..`` - -**JMX MBean** - ``org.apache.cassandra.metrics:type=Cache scope= name=`` - -========================== ============== =========== -Name Type Description -========================== ============== =========== -Capacity Gauge Cache capacity in bytes. -Entries Gauge Total number of cache entries. -FifteenMinuteCacheHitRate Gauge 15m cache hit rate. -FiveMinuteCacheHitRate Gauge 5m cache hit rate. -OneMinuteCacheHitRate Gauge 1m cache hit rate. -HitRate Gauge All time cache hit rate. -Hits Meter Total number of cache hits. -Misses Meter Total number of cache misses. -MissLatency Timer Latency of misses. -Requests Gauge Total number of cache requests. -Size Gauge Total size of occupied cache, in bytes. -========================== ============== =========== - -The following caches are covered: - -============================ =========== -Name Description -============================ =========== -CounterCache Keeps hot counters in memory for performance. -ChunkCache In process uncompressed page cache. -KeyCache Cache for partition to sstable offsets. -RowCache Cache for rows kept in memory. -============================ =========== - -.. NOTE:: - Misses and MissLatency are only defined for the ChunkCache - -CQL Metrics -^^^^^^^^^^^ - -Metrics specific to CQL prepared statement caching. - -Reported name format: - -**Metric Name** - ``org.apache.cassandra.metrics.CQL.`` - -**JMX MBean** - ``org.apache.cassandra.metrics:type=CQL name=`` - -========================== ============== =========== -Name Type Description -========================== ============== =========== -PreparedStatementsCount Gauge Number of cached prepared statements. -PreparedStatementsEvicted Counter Number of prepared statements evicted from the prepared statement cache -PreparedStatementsExecuted Counter Number of prepared statements executed. -RegularStatementsExecuted Counter Number of **non** prepared statements executed. -PreparedStatementsRatio Gauge Percentage of statements that are prepared vs unprepared. -========================== ============== =========== - - -DroppedMessage Metrics -^^^^^^^^^^^^^^^^^^^^^^ - -Metrics specific to tracking dropped messages for different types of requests. -Dropped writes are stored and retried by ``Hinted Handoff`` - -Reported name format: - -**Metric Name** - ``org.apache.cassandra.metrics.DroppedMessages..`` - -**JMX MBean** - ``org.apache.cassandra.metrics:type=DroppedMetrics scope= name=`` - -========================== ============== =========== -Name Type Description -========================== ============== =========== -CrossNodeDroppedLatency Timer The dropped latency across nodes. -InternalDroppedLatency Timer The dropped latency within node. -Dropped Meter Number of dropped messages. -========================== ============== =========== - -The different types of messages tracked are: - -============================ =========== -Name Description -============================ =========== -BATCH_STORE Batchlog write -BATCH_REMOVE Batchlog cleanup (after succesfully applied) -COUNTER_MUTATION Counter writes -HINT Hint replay -MUTATION Regular writes -READ Regular reads -READ_REPAIR Read repair -PAGED_SLICE Paged read -RANGE_SLICE Token range read -REQUEST_RESPONSE RPC Callbacks -_TRACE Tracing writes -============================ =========== - -Streaming Metrics -^^^^^^^^^^^^^^^^^ - -Metrics reported during ``Streaming`` operations, such as repair, bootstrap, rebuild. - -These metrics are specific to a peer endpoint, with the source node being the node you are pulling the metrics from. - -Reported name format: - -**Metric Name** - ``org.apache.cassandra.metrics.Streaming..`` - -**JMX MBean** - ``org.apache.cassandra.metrics:type=Streaming scope= name=`` - -========================== ============== =========== -Name Type Description -========================== ============== =========== -IncomingBytes Counter Number of bytes streamed to this node from the peer. -OutgoingBytes Counter Number of bytes streamed to the peer endpoint from this node. -========================== ============== =========== - - -Compaction Metrics -^^^^^^^^^^^^^^^^^^ - -Metrics specific to ``Compaction`` work. - -Reported name format: - -**Metric Name** - ``org.apache.cassandra.metrics.Compaction.`` - -**JMX MBean** - ``org.apache.cassandra.metrics:type=Compaction name=`` - -========================== ======================================== =============================================== -Name Type Description -========================== ======================================== =============================================== -BytesCompacted Counter Total number of bytes compacted since server [re]start. -PendingTasks Gauge Estimated number of compactions remaining to perform. -CompletedTasks Gauge Number of completed compactions since server [re]start. -TotalCompactionsCompleted Meter Throughput of completed compactions since server [re]start. -PendingTasksByTableName Gauge>> Estimated number of compactions remaining to perform, grouped by keyspace and then table name. This info is also kept in ``Table Metrics``. -========================== ======================================== =============================================== - -CommitLog Metrics -^^^^^^^^^^^^^^^^^ - -Metrics specific to the ``CommitLog`` - -Reported name format: - -**Metric Name** - ``org.apache.cassandra.metrics.CommitLog.`` - -**JMX MBean** - ``org.apache.cassandra.metrics:type=CommitLog name=`` - -========================== ============== =========== -Name Type Description -========================== ============== =========== -CompletedTasks Gauge Total number of commit log messages written since [re]start. -PendingTasks Gauge Number of commit log messages written but yet to be fsync'd. -TotalCommitLogSize Gauge Current size, in bytes, used by all the commit log segments. -WaitingOnSegmentAllocation Timer Time spent waiting for a CommitLogSegment to be allocated - under normal conditions this should be zero. -WaitingOnCommit Timer The time spent waiting on CL fsync; for Periodic this is only occurs when the sync is lagging its sync interval. -========================== ============== =========== - -Storage Metrics -^^^^^^^^^^^^^^^ - -Metrics specific to the storage engine. - -Reported name format: - -**Metric Name** - ``org.apache.cassandra.metrics.Storage.`` - -**JMX MBean** - ``org.apache.cassandra.metrics:type=Storage name=`` - -========================== ============== =========== -Name Type Description -========================== ============== =========== -Exceptions Counter Number of internal exceptions caught. Under normal exceptions this should be zero. -Load Counter Size, in bytes, of the on disk data size this node manages. -TotalHints Counter Number of hint messages written to this node since [re]start. Includes one entry for each host to be hinted per hint. -TotalHintsInProgress Counter Number of hints attemping to be sent currently. -========================== ============== =========== - -HintedHandoff Metrics -^^^^^^^^^^^^^^^^^^^^^ - -Metrics specific to Hinted Handoff. There are also some metrics related to hints tracked in ``Storage Metrics`` - -These metrics include the peer endpoint **in the metric name** - -Reported name format: - -**Metric Name** - ``org.apache.cassandra.metrics.HintedHandOffManager.`` - -**JMX MBean** - ``org.apache.cassandra.metrics:type=HintedHandOffManager name=`` - -=========================== ============== =========== -Name Type Description -=========================== ============== =========== -Hints_created- Counter Number of hints on disk for this peer. -Hints_not_stored- Counter Number of hints not stored for this peer, due to being down past the configured hint window. -=========================== ============== =========== - -SSTable Index Metrics -^^^^^^^^^^^^^^^^^^^^^ - -Metrics specific to the SSTable index metadata. - -Reported name format: - -**Metric Name** - ``org.apache.cassandra.metrics.Index..RowIndexEntry`` - -**JMX MBean** - ``org.apache.cassandra.metrics:type=Index scope=RowIndexEntry name=`` - -=========================== ============== =========== -Name Type Description -=========================== ============== =========== -IndexedEntrySize Histogram Histogram of the on-heap size, in bytes, of the index across all SSTables. -IndexInfoCount Histogram Histogram of the number of on-heap index entries managed across all SSTables. -IndexInfoGets Histogram Histogram of the number index seeks performed per SSTable. -=========================== ============== =========== - -BufferPool Metrics -^^^^^^^^^^^^^^^^^^ - -Metrics specific to the internal recycled buffer pool Cassandra manages. This pool is meant to keep allocations and GC -lower by recycling on and off heap buffers. - -Reported name format: - -**Metric Name** - ``org.apache.cassandra.metrics.BufferPool.`` - -**JMX MBean** - ``org.apache.cassandra.metrics:type=BufferPool name=`` - -=========================== ============== =========== -Name Type Description -=========================== ============== =========== -Size Gauge Size, in bytes, of the managed buffer pool -Misses Meter The rate of misses in the pool. The higher this is the more allocations incurred. -=========================== ============== =========== - - -Client Metrics -^^^^^^^^^^^^^^ - -Metrics specifc to client managment. - -Reported name format: - -**Metric Name** - ``org.apache.cassandra.metrics.Client.`` - -**JMX MBean** - ``org.apache.cassandra.metrics:type=Client name=`` - -=========================== ============== =========== -Name Type Description -=========================== ============== =========== -connectedNativeClients Counter Number of clients connected to this nodes native protocol server -connectedThriftClients Counter Number of clients connected to this nodes thrift protocol server -=========================== ============== =========== - -JVM Metrics -^^^^^^^^^^^ - -JVM metrics such as memory and garbage collection statistics can either be accessed by connecting to the JVM using JMX or can be exported using `Metric Reporters`_. - -BufferPool -++++++++++ - -**Metric Name** - ``jvm.buffers..`` - -**JMX MBean** - ``java.nio:type=BufferPool name=`` - -========================== ============== =========== -Name Type Description -========================== ============== =========== -Capacity Gauge Estimated total capacity of the buffers in this pool -Count Gauge Estimated number of buffers in the pool -Used Gauge Estimated memory that the Java virtual machine is using for this buffer pool -========================== ============== =========== - -FileDescriptorRatio -+++++++++++++++++++ - -**Metric Name** - ``jvm.fd.`` - -**JMX MBean** - ``java.lang:type=OperatingSystem name=`` - -========================== ============== =========== -Name Type Description -========================== ============== =========== -Usage Ratio Ratio of used to total file descriptors -========================== ============== =========== - -GarbageCollector -++++++++++++++++ - -**Metric Name** - ``jvm.gc..`` - -**JMX MBean** - ``java.lang:type=GarbageCollector name=`` - -========================== ============== =========== -Name Type Description -========================== ============== =========== -Count Gauge Total number of collections that have occurred -Time Gauge Approximate accumulated collection elapsed time in milliseconds -========================== ============== =========== - -Memory -++++++ - -**Metric Name** - ``jvm.memory..`` - -**JMX MBean** - ``java.lang:type=Memory`` - -========================== ============== =========== -Committed Gauge Amount of memory in bytes that is committed for the JVM to use -Init Gauge Amount of memory in bytes that the JVM initially requests from the OS -Max Gauge Maximum amount of memory in bytes that can be used for memory management -Usage Ratio Ratio of used to maximum memory -Used Gauge Amount of used memory in bytes -========================== ============== =========== - -MemoryPool -++++++++++ - -**Metric Name** - ``jvm.memory.pools..`` - -**JMX MBean** - ``java.lang:type=MemoryPool name=`` - -========================== ============== =========== -Committed Gauge Amount of memory in bytes that is committed for the JVM to use -Init Gauge Amount of memory in bytes that the JVM initially requests from the OS -Max Gauge Maximum amount of memory in bytes that can be used for memory management -Usage Ratio Ratio of used to maximum memory -Used Gauge Amount of used memory in bytes -========================== ============== =========== - -JMX -^^^ - -Any JMX based client can access metrics from cassandra. - -If you wish to access JMX metrics over http it's possible to download `Mx4jTool `__ and -place ``mx4j-tools.jar`` into the classpath. On startup you will see in the log:: - - HttpAdaptor version 3.0.2 started on port 8081 - -To choose a different port (8081 is the default) or a different listen address (0.0.0.0 is not the default) edit -``conf/cassandra-env.sh`` and uncomment:: - - #MX4J_ADDRESS="-Dmx4jaddress=0.0.0.0" - - #MX4J_PORT="-Dmx4jport=8081" - - -Metric Reporters -^^^^^^^^^^^^^^^^ - -As mentioned at the top of this section on monitoring the Cassandra metrics can be exported to a number of monitoring -system a number of `built in `__ and `third party -`__ reporter plugins. - -The configuration of these plugins is managed by the `metrics reporter config project -`__. There is a sample configuration file located at -``conf/metrics-reporter-config-sample.yaml``. - -Once configured, you simply start cassandra with the flag -``-Dcassandra.metricsReporterConfigFile=metrics-reporter-config.yaml``. The specified .yaml file plus any 3rd party -reporter jars must all be in Cassandra's classpath. diff --git a/doc/source/operating/read_repair.rst b/doc/source/operating/read_repair.rst deleted file mode 100644 index 0e52bf52389d..000000000000 --- a/doc/source/operating/read_repair.rst +++ /dev/null @@ -1,22 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -Read repair ------------ - -.. todo:: todo diff --git a/doc/source/operating/repair.rst b/doc/source/operating/repair.rst deleted file mode 100644 index 97d8ce8badd3..000000000000 --- a/doc/source/operating/repair.rst +++ /dev/null @@ -1,22 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -Repair ------- - -.. todo:: todo diff --git a/doc/source/operating/security.rst b/doc/source/operating/security.rst deleted file mode 100644 index dfcd9e6c534c..000000000000 --- a/doc/source/operating/security.rst +++ /dev/null @@ -1,410 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -Security --------- - -There are three main components to the security features provided by Cassandra: - -- TLS/SSL encryption for client and inter-node communication -- Client authentication -- Authorization - -TLS/SSL Encryption -^^^^^^^^^^^^^^^^^^ -Cassandra provides secure communication between a client machine and a database cluster and between nodes within a -cluster. Enabling encryption ensures that data in flight is not compromised and is transferred securely. The options for -client-to-node and node-to-node encryption are managed separately and may be configured independently. - -In both cases, the JVM defaults for supported protocols and cipher suites are used when encryption is enabled. These can -be overidden using the settings in ``cassandra.yaml``, but this is not recommended unless there are policies in place -which dictate certain settings or a need to disable vulnerable ciphers or protocols in cases where the JVM cannot be -updated. - -FIPS compliant settings can be configured at the JVM level and should not involve changing encryption settings in -cassandra.yaml. See `the java document on FIPS `__ -for more details. - -For information on generating the keystore and truststore files used in SSL communications, see the -`java documentation on creating keystores `__ - -Inter-node Encryption -~~~~~~~~~~~~~~~~~~~~~ - -The settings for managing inter-node encryption are found in ``cassandra.yaml`` in the ``server_encryption_options`` -section. To enable inter-node encryption, change the ``internode_encryption`` setting from its default value of ``none`` -to one value from: ``rack``, ``dc`` or ``all``. - -Client to Node Encryption -~~~~~~~~~~~~~~~~~~~~~~~~~ - -The settings for managing client to node encryption are found in ``cassandra.yaml`` in the ``client_encryption_options`` -section. There are two primary toggles here for enabling encryption, ``enabled`` and ``optional``. - -- If neither is set to ``true``, client connections are entirely unencrypted. -- If ``enabled`` is set to ``true`` and ``optional`` is set to ``false``, all client connections must be secured. -- If both options are set to ``true``, both encrypted and unencrypted connections are supported using the same port. - Client connections using encryption with this configuration will be automatically detected and handled by the server. - -As an alternative to the ``optional`` setting, separate ports can also be configured for secure and unsecure connections -where operational requirements demand it. To do so, set ``optional`` to false and use the ``native_transport_port_ssl`` -setting in ``cassandra.yaml`` to specify the port to be used for secure client communication. - -.. _operation-roles: - -Roles -^^^^^ - -Cassandra uses database roles, which may represent either a single user or a group of users, in both authentication and -permissions management. Role management is an extension point in Cassandra and may be configured using the -``role_manager`` setting in ``cassandra.yaml``. The default setting uses ``CassandraRoleManager``, an implementation -which stores role information in the tables of the ``system_auth`` keyspace. - -See also the :ref:`CQL documentation on roles `. - -Authentication -^^^^^^^^^^^^^^ - -Authentication is pluggable in Cassandra and is configured using the ``authenticator`` setting in ``cassandra.yaml``. -Cassandra ships with two options included in the default distribution. - -By default, Cassandra is configured with ``AllowAllAuthenticator`` which performs no authentication checks and therefore -requires no credentials. It is used to disable authentication completely. Note that authentication is a necessary -condition of Cassandra's permissions subsystem, so if authentication is disabled, effectively so are permissions. - -The default distribution also includes ``PasswordAuthenticator``, which stores encrypted credentials in a system table. -This can be used to enable simple username/password authentication. - -.. _password-authentication: - -Enabling Password Authentication -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Before enabling client authentication on the cluster, client applications should be pre-configured with their intended -credentials. When a connection is initiated, the server will only ask for credentials once authentication is -enabled, so setting up the client side config in advance is safe. In contrast, as soon as a server has authentication -enabled, any connection attempt without proper credentials will be rejected which may cause availability problems for -client applications. Once clients are setup and ready for authentication to be enabled, follow this procedure to enable -it on the cluster. - -Pick a single node in the cluster on which to perform the initial configuration. Ideally, no clients should connect -to this node during the setup process, so you may want to remove it from client config, block it at the network level -or possibly add a new temporary node to the cluster for this purpose. On that node, perform the following steps: - -1. Open a ``cqlsh`` session and change the replication factor of the ``system_auth`` keyspace. By default, this keyspace - uses ``SimpleReplicationStrategy`` and a ``replication_factor`` of 1. It is recommended to change this for any - non-trivial deployment to ensure that should nodes become unavailable, login is still possible. Best practice is to - configure a replication factor of 3 to 5 per-DC. - -:: - - ALTER KEYSPACE system_auth WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1': 3, 'DC2': 3}; - -2. Edit ``cassandra.yaml`` to change the ``authenticator`` option like so: - -:: - - authenticator: PasswordAuthenticator - -3. Restart the node. - -4. Open a new ``cqlsh`` session using the credentials of the default superuser: - -:: - - cqlsh -u cassandra -p cassandra - -5. During login, the credentials for the default superuser are read with a consistency level of ``QUORUM``, whereas - those for all other users (including superusers) are read at ``LOCAL_ONE``. In the interests of performance and - availability, as well as security, operators should create another superuser and disable the default one. This step - is optional, but highly recommended. While logged in as the default superuser, create another superuser role which - can be used to bootstrap further configuration. - -:: - - # create a new superuser - CREATE ROLE dba WITH SUPERUSER = true AND LOGIN = true AND PASSWORD = 'super'; - -6. Start a new cqlsh session, this time logging in as the new_superuser and disable the default superuser. - -:: - - ALTER ROLE cassandra WITH SUPERUSER = false AND LOGIN = false; - -7. Finally, set up the roles and credentials for your application users with :ref:`CREATE ROLE ` - statements. - -At the end of these steps, the one node is configured to use password authentication. To roll that out across the -cluster, repeat steps 2 and 3 on each node in the cluster. Once all nodes have been restarted, authentication will be -fully enabled throughout the cluster. - -Note that using ``PasswordAuthenticator`` also requires the use of :ref:`CassandraRoleManager `. - -See also: :ref:`setting-credentials-for-internal-authentication`, :ref:`CREATE ROLE `, -:ref:`ALTER ROLE `, :ref:`ALTER KEYSPACE ` and :ref:`GRANT PERMISSION -`, - -Authorization -^^^^^^^^^^^^^ - -Authorization is pluggable in Cassandra and is configured using the ``authorizer`` setting in ``cassandra.yaml``. -Cassandra ships with two options included in the default distribution. - -By default, Cassandra is configured with ``AllowAllAuthorizer`` which performs no checking and so effectively grants all -permissions to all roles. This must be used if ``AllowAllAuthenticator`` is the configured authenticator. - -The default distribution also includes ``CassandraAuthorizer``, which does implement full permissions management -functionality and stores its data in Cassandra system tables. - -Enabling Internal Authorization -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Permissions are modelled as a whitelist, with the default assumption that a given role has no access to any database -resources. The implication of this is that once authorization is enabled on a node, all requests will be rejected until -the required permissions have been granted. For this reason, it is strongly recommended to perform the initial setup on -a node which is not processing client requests. - -The following assumes that authentication has already been enabled via the process outlined in -:ref:`password-authentication`. Perform these steps to enable internal authorization across the cluster: - -1. On the selected node, edit ``cassandra.yaml`` to change the ``authorizer`` option like so: - -:: - - authorizer: CassandraAuthorizer - -2. Restart the node. - -3. Open a new ``cqlsh`` session using the credentials of a role with superuser credentials: - -:: - - cqlsh -u dba -p super - -4. Configure the appropriate access privileges for your clients using `GRANT PERMISSION `_ - statements. On the other nodes, until configuration is updated and the node restarted, this will have no effect so - disruption to clients is avoided. - -:: - - GRANT SELECT ON ks.t1 TO db_user; - -5. Once all the necessary permissions have been granted, repeat steps 1 and 2 for each node in turn. As each node - restarts and clients reconnect, the enforcement of the granted permissions will begin. - -See also: :ref:`GRANT PERMISSION `, `GRANT ALL ` and :ref:`REVOKE PERMISSION -` - -Caching -^^^^^^^ - -Enabling authentication and authorization places additional load on the cluster by frequently reading from the -``system_auth`` tables. Furthermore, these reads are in the critical paths of many client operations, and so has the -potential to severely impact quality of service. To mitigate this, auth data such as credentials, permissions and role -details are cached for a configurable period. The caching can be configured (and even disabled) from ``cassandra.yaml`` -or using a JMX client. The JMX interface also supports invalidation of the various caches, but any changes made via JMX -are not persistent and will be re-read from ``cassandra.yaml`` when the node is restarted. - -Each cache has 3 options which can be set: - -Validity Period - Controls the expiration of cache entries. After this period, entries are invalidated and removed from the cache. -Refresh Rate - Controls the rate at which background reads are performed to pick up any changes to the underlying data. While these - async refreshes are performed, caches will continue to serve (possibly) stale data. Typically, this will be set to a - shorter time than the validity period. -Max Entries - Controls the upper bound on cache size. - -The naming for these options in ``cassandra.yaml`` follows the convention: - -* ``_validity_in_ms`` -* ``_update_interval_in_ms`` -* ``_cache_max_entries`` - -Where ```` is one of ``credentials``, ``permissions``, or ``roles``. - -As mentioned, these are also exposed via JMX in the mbeans under the ``org.apache.cassandra.auth`` domain. - -JMX access -^^^^^^^^^^ - -Access control for JMX clients is configured separately to that for CQL. For both authentication and authorization, two -providers are available; the first based on standard JMX security and the second which integrates more closely with -Cassandra's own auth subsystem. - -The default settings for Cassandra make JMX accessible only from localhost. To enable remote JMX connections, edit -``cassandra-env.sh`` (or ``cassandra-env.ps1`` on Windows) to change the ``LOCAL_JMX`` setting to ``yes``. Under the -standard configuration, when remote JMX connections are enabled, :ref:`standard JMX authentication ` -is also switched on. - -Note that by default, local-only connections are not subject to authentication, but this can be enabled. - -If enabling remote connections, it is recommended to also use :ref:`SSL ` connections. - -Finally, after enabling auth and/or SSL, ensure that tools which use JMX, such as :ref:`nodetool `, are -correctly configured and working as expected. - -.. _standard-jmx-auth: - -Standard JMX Auth -~~~~~~~~~~~~~~~~~ - -Users permitted to connect to the JMX server are specified in a simple text file. The location of this file is set in -``cassandra-env.sh`` by the line: - -:: - - JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.password.file=/etc/cassandra/jmxremote.password" - -Edit the password file to add username/password pairs: - -:: - - jmx_user jmx_password - -Secure the credentials file so that only the user running the Cassandra process can read it : - -:: - - $ chown cassandra:cassandra /etc/cassandra/jmxremote.password - $ chmod 400 /etc/cassandra/jmxremote.password - -Optionally, enable access control to limit the scope of what defined users can do via JMX. Note that this is a fairly -blunt instrument in this context as most operational tools in Cassandra require full read/write access. To configure a -simple access file, uncomment this line in ``cassandra-env.sh``: - -:: - - #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.access.file=/etc/cassandra/jmxremote.access" - -Then edit the access file to grant your JMX user readwrite permission: - -:: - - jmx_user readwrite - -Cassandra must be restarted to pick up the new settings. - -See also : `Using File-Based Password Authentication In JMX -`__ - - -Cassandra Integrated Auth -~~~~~~~~~~~~~~~~~~~~~~~~~ - -An alternative to the out-of-the-box JMX auth is to useeCassandra's own authentication and/or authorization providers -for JMX clients. This is potentially more flexible and secure but it come with one major caveat. Namely that it is not -available until `after` a node has joined the ring, because the auth subsystem is not fully configured until that point -However, it is often critical for monitoring purposes to have JMX access particularly during bootstrap. So it is -recommended, where possible, to use local only JMX auth during bootstrap and then, if remote connectivity is required, -to switch to integrated auth once the node has joined the ring and initial setup is complete. - -With this option, the same database roles used for CQL authentication can be used to control access to JMX, so updates -can be managed centrally using just ``cqlsh``. Furthermore, fine grained control over exactly which operations are -permitted on particular MBeans can be acheived via :ref:`GRANT PERMISSION `. - -To enable integrated authentication, edit ``cassandra-env.sh`` to uncomment these lines: - -:: - - #JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.login.config=CassandraLogin" - #JVM_OPTS="$JVM_OPTS -Djava.security.auth.login.config=$CASSANDRA_HOME/conf/cassandra-jaas.config" - -And disable the JMX standard auth by commenting this line: - -:: - - JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.password.file=/etc/cassandra/jmxremote.password" - -To enable integrated authorization, uncomment this line: - -:: - - #JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.authorizer=org.apache.cassandra.auth.jmx.AuthorizationProxy" - -Check standard access control is off by ensuring this line is commented out: - -:: - - #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.access.file=/etc/cassandra/jmxremote.access" - -With integrated authentication and authorization enabled, operators can define specific roles and grant them access to -the particular JMX resources that they need. For example, a role with the necessary permissions to use tools such as -jconsole or jmc in read-only mode would be defined as: - -:: - - CREATE ROLE jmx WITH LOGIN = false; - GRANT SELECT ON ALL MBEANS TO jmx; - GRANT DESCRIBE ON ALL MBEANS TO jmx; - GRANT EXECUTE ON MBEAN 'java.lang:type=Threading' TO jmx; - GRANT EXECUTE ON MBEAN 'com.sun.management:type=HotSpotDiagnostic' TO jmx; - - # Grant the jmx role to one with login permissions so that it can access the JMX tooling - CREATE ROLE ks_user WITH PASSWORD = 'password' AND LOGIN = true AND SUPERUSER = false; - GRANT jmx TO ks_user; - -Fine grained access control to individual MBeans is also supported: - -:: - - GRANT EXECUTE ON MBEAN 'org.apache.cassandra.db:type=Tables,keyspace=test_keyspace,table=t1' TO ks_user; - GRANT EXECUTE ON MBEAN 'org.apache.cassandra.db:type=Tables,keyspace=test_keyspace,table=*' TO ks_owner; - -This permits the ``ks_user`` role to invoke methods on the MBean representing a single table in ``test_keyspace``, while -granting the same permission for all table level MBeans in that keyspace to the ``ks_owner`` role. - -Adding/removing roles and granting/revoking of permissions is handled dynamically once the initial setup is complete, so -no further restarts are required if permissions are altered. - -See also: :ref:`Permissions `. - -.. _jmx-with-ssl: - -JMX With SSL -~~~~~~~~~~~~ - -JMX SSL configuration is controlled by a number of system properties, some of which are optional. To turn on SSL, edit -the relevant lines in ``cassandra-env.sh`` (or ``cassandra-env.ps1`` on Windows) to uncomment and set the values of these -properties as required: - -``com.sun.management.jmxremote.ssl`` - set to true to enable SSL -``com.sun.management.jmxremote.ssl.need.client.auth`` - set to true to enable validation of client certificates -``com.sun.management.jmxremote.registry.ssl`` - enables SSL sockets for the RMI registry from which clients obtain the JMX connector stub -``com.sun.management.jmxremote.ssl.enabled.protocols`` - by default, the protocols supported by the JVM will be used, override with a comma-separated list. Note that this is - not usually necessary and using the defaults is the preferred option. -``com.sun.management.jmxremote.ssl.enabled.cipher.suites`` - by default, the cipher suites supported by the JVM will be used, override with a comma-separated list. Note that - this is not usually necessary and using the defaults is the preferred option. -``javax.net.ssl.keyStore`` - set the path on the local filesystem of the keystore containing server private keys and public certificates -``javax.net.ssl.keyStorePassword`` - set the password of the keystore file -``javax.net.ssl.trustStore`` - if validation of client certificates is required, use this property to specify the path of the truststore containing - the public certificates of trusted clients -``javax.net.ssl.trustStorePassword`` - set the password of the truststore file - -See also: `Oracle Java7 Docs `__, -`Monitor Java with JMX `__ diff --git a/doc/source/operating/snitch.rst b/doc/source/operating/snitch.rst deleted file mode 100644 index faea0b3e1f13..000000000000 --- a/doc/source/operating/snitch.rst +++ /dev/null @@ -1,78 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -Snitch ------- - -In cassandra, the snitch has two functions: - -- it teaches Cassandra enough about your network topology to route requests efficiently. -- it allows Cassandra to spread replicas around your cluster to avoid correlated failures. It does this by grouping - machines into "datacenters" and "racks." Cassandra will do its best not to have more than one replica on the same - "rack" (which may not actually be a physical location). - -Dynamic snitching -^^^^^^^^^^^^^^^^^ - -The dynamic snitch monitor read latencies to avoid reading from hosts that have slowed down. The dynamic snitch is -configured with the following properties on ``cassandra.yaml``: - -- ``dynamic_snitch``: whether the dynamic snitch should be enabled or disabled. -- ``dynamic_snitch_update_interval_in_ms``: controls how often to perform the more expensive part of host score - calculation. -- ``dynamic_snitch_reset_interval_in_ms``: if set greater than zero and read_repair_chance is < 1.0, this will allow - 'pinning' of replicas to hosts in order to increase cache capacity. -- ``dynamic_snitch_badness_threshold:``: The badness threshold will control how much worse the pinned host has to be - before the dynamic snitch will prefer other replicas over it. This is expressed as a double which represents a - percentage. Thus, a value of 0.2 means Cassandra would continue to prefer the static snitch values until the pinned - host was 20% worse than the fastest. - -Snitch classes -^^^^^^^^^^^^^^ - -The ``endpoint_snitch`` parameter in ``cassandra.yaml`` should be set to the class the class that implements -``IEndPointSnitch`` which will be wrapped by the dynamic snitch and decide if two endpoints are in the same data center -or on the same rack. Out of the box, Cassandra provides the snitch implementations: - -GossipingPropertyFileSnitch - This should be your go-to snitch for production use. The rack and datacenter for the local node are defined in - cassandra-rackdc.properties and propagated to other nodes via gossip. If ``cassandra-topology.properties`` exists, - it is used as a fallback, allowing migration from the PropertyFileSnitch. - -SimpleSnitch - Treats Strategy order as proximity. This can improve cache locality when disabling read repair. Only appropriate for - single-datacenter deployments. - -PropertyFileSnitch - Proximity is determined by rack and data center, which are explicitly configured in - ``cassandra-topology.properties``. - -Ec2Snitch - Appropriate for EC2 deployments in a single Region. Loads Region and Availability Zone information from the EC2 API. - The Region is treated as the datacenter, and the Availability Zone as the rack. Only private IPs are used, so this - will not work across multiple regions. - -Ec2MultiRegionSnitch - Uses public IPs as broadcast_address to allow cross-region connectivity (thus, you should set seed addresses to the - public IP as well). You will need to open the ``storage_port`` or ``ssl_storage_port`` on the public IP firewall - (For intra-Region traffic, Cassandra will switch to the private IP after establishing a connection). - -RackInferringSnitch - Proximity is determined by rack and data center, which are assumed to correspond to the 3rd and 2nd octet of each - node's IP address, respectively. Unless this happens to match your deployment conventions, this is best used as an - example of writing a custom Snitch class and is provided in that spirit. diff --git a/doc/source/operating/topo_changes.rst b/doc/source/operating/topo_changes.rst deleted file mode 100644 index c42708e029f9..000000000000 --- a/doc/source/operating/topo_changes.rst +++ /dev/null @@ -1,124 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. highlight:: none - -.. _topology-changes: - -Adding, replacing, moving and removing nodes --------------------------------------------- - -Bootstrap -^^^^^^^^^ - -Adding new nodes is called "bootstrapping". The ``num_tokens`` parameter will define the amount of virtual nodes -(tokens) the joining node will be assigned during bootstrap. The tokens define the sections of the ring (token ranges) -the node will become responsible for. - -Token allocation -~~~~~~~~~~~~~~~~ - -With the default token allocation algorithm the new node will pick ``num_tokens`` random tokens to become responsible -for. Since tokens are distributed randomly, load distribution improves with a higher amount of virtual nodes, but it -also increases token management overhead. The default of 256 virtual nodes should provide a reasonable load balance with -acceptable overhead. - -On 3.0+ a new token allocation algorithm was introduced to allocate tokens based on the load of existing virtual nodes -for a given keyspace, and thus yield an improved load distribution with a lower number of tokens. To use this approach, -the new node must be started with the JVM option ``-Dcassandra.allocate_tokens_for_keyspace=``, where -```` is the keyspace from which the algorithm can find the load information to optimize token assignment for. - -Manual token assignment -""""""""""""""""""""""" - -You may specify a comma-separated list of tokens manually with the ``initial_token`` ``cassandra.yaml`` parameter, and -if that is specified Cassandra will skip the token allocation process. This may be useful when doing token assignment -with an external tool or when restoring a node with its previous tokens. - -Range streaming -~~~~~~~~~~~~~~~~ - -After the tokens are allocated, the joining node will pick current replicas of the token ranges it will become -responsible for to stream data from. By default it will stream from the primary replica of each token range in order to -guarantee data in the new node will be consistent with the current state. - -In the case of any unavailable replica, the consistent bootstrap process will fail. To override this behavior and -potentially miss data from an unavailable replica, set the JVM flag ``-Dcassandra.consistent.rangemovement=false``. - -Resuming failed/hanged bootstrap -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -On 2.2+, if the bootstrap process fails, it's possible to resume bootstrap from the previous saved state by calling -``nodetool bootstrap resume``. If for some reason the bootstrap hangs or stalls, it may also be resumed by simply -restarting the node. In order to cleanup bootstrap state and start fresh, you may set the JVM startup flag -``-Dcassandra.reset_bootstrap_progress=true``. - -On lower versions, when the bootstrap proces fails it is recommended to wipe the node (remove all the data), and restart -the bootstrap process again. - -Manual bootstrapping -~~~~~~~~~~~~~~~~~~~~ - -It's possible to skip the bootstrapping process entirely and join the ring straight away by setting the hidden parameter -``auto_bootstrap: false``. This may be useful when restoring a node from a backup or creating a new data-center. - -Removing nodes -^^^^^^^^^^^^^^ - -You can take a node out of the cluster with ``nodetool decommission`` to a live node, or ``nodetool removenode`` (to any -other machine) to remove a dead one. This will assign the ranges the old node was responsible for to other nodes, and -replicate the appropriate data there. If decommission is used, the data will stream from the decommissioned node. If -removenode is used, the data will stream from the remaining replicas. - -No data is removed automatically from the node being decommissioned, so if you want to put the node back into service at -a different token on the ring, it should be removed manually. - -Moving nodes -^^^^^^^^^^^^ - -When ``num_tokens: 1`` it's possible to move the node position in the ring with ``nodetool move``. Moving is both a -convenience over and more efficient than decommission + bootstrap. After moving a node, ``nodetool cleanup`` should be -run to remove any unnecessary data. - -Replacing a dead node -^^^^^^^^^^^^^^^^^^^^^ - -In order to replace a dead node, start cassandra with the JVM startup flag -``-Dcassandra.replace_address_first_boot=``. Once this property is enabled the node starts in a hibernate -state, during which all the other nodes will see this node to be down. - -The replacing node will now start to bootstrap the data from the rest of the nodes in the cluster. The main difference -between normal bootstrapping of a new node is that this new node will not accept any writes during this phase. - -Once the bootstrapping is complete the node will be marked "UP", we rely on the hinted handoff's for making this node -consistent (since we don't accept writes since the start of the bootstrap). - -.. Note:: If the replacement process takes longer than ``max_hint_window_in_ms`` you **MUST** run repair to make the - replaced node consistent again, since it missed ongoing writes during bootstrapping. - -Monitoring progress -^^^^^^^^^^^^^^^^^^^ - -Bootstrap, replace, move and remove progress can be monitored using ``nodetool netstats`` which will show the progress -of the streaming operations. - -Cleanup data after range movements -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -As a safety measure, Cassandra does not automatically remove data from nodes that "lose" part of their token range due -to a range movement operation (bootstrap, move, replace). Run ``nodetool cleanup`` on the nodes that lost ranges to the -joining node when you are satisfied the new node is up and working. If you do not do this the old data will still be -counted against the load on that node. diff --git a/doc/source/tools/cqlsh.rst b/doc/source/tools/cqlsh.rst deleted file mode 100644 index 45e2db8fcd04..000000000000 --- a/doc/source/tools/cqlsh.rst +++ /dev/null @@ -1,455 +0,0 @@ -.. highlight:: none - -.. _cqlsh: - -cqlsh: the CQL shell --------------------- - -cqlsh is a command line shell for interacting with Cassandra through CQL (the Cassandra Query Language). It is shipped -with every Cassandra package, and can be found in the bin/ directory alongside the cassandra executable. cqlsh utilizes -the Python native protocol driver, and connects to the single node specified on the command line. - - -Compatibility -^^^^^^^^^^^^^ - -cqlsh is compatible with Python 2.7. - -In general, a given version of cqlsh is only guaranteed to work with the version of Cassandra that it was released with. -In some cases, cqlsh make work with older or newer versions of Cassandra, but this is not officially supported. - - -Optional Dependencies -^^^^^^^^^^^^^^^^^^^^^ - -cqlsh ships with all essential dependencies. However, there are some optional dependencies that can be installed to -improve the capabilities of cqlsh. - -pytz -~~~~ - -By default, cqlsh displays all timestamps with a UTC timezone. To support display of timestamps with another timezone, -the `pytz `__ library must be installed. See the ``timezone`` option in cqlshrc_ for -specifying a timezone to use. - -cython -~~~~~~ - -The performance of cqlsh's ``COPY`` operations can be improved by installing `cython `__. This will -compile the python modules that are central to the performance of ``COPY``. - -cqlshrc -^^^^^^^ - -The ``cqlshrc`` file holds configuration options for cqlsh. By default this is in the user's home directory at -``~/.cassandra/cqlsh``, but a custom location can be specified with the ``--cqlshrc`` option. - -Example config values and documentation can be found in the ``conf/cqlshrc.sample`` file of a tarball installation. You -can also view the latest version of `cqlshrc online `__. - - -Command Line Options -^^^^^^^^^^^^^^^^^^^^ - -Usage: - -``cqlsh [options] [host [port]]`` - -Options: - -``-C`` ``--color`` - Force color output - -``--no-color`` - Disable color output - -``--browser`` - Specify the browser to use for displaying cqlsh help. This can be one of the `supported browser names - `__ (e.g. ``firefox``) or a browser path followed by ``%s`` (e.g. - ``/usr/bin/google-chrome-stable %s``). - -``--ssl`` - Use SSL when connecting to Cassandra - -``-u`` ``--user`` - Username to authenticate against Cassandra with - -``-p`` ``--password`` - Password to authenticate against Cassandra with, should - be used in conjunction with ``--user`` - -``-k`` ``--keyspace`` - Keyspace to authenticate to, should be used in conjunction - with ``--user`` - -``-f`` ``--file`` - Execute commands from the given file, then exit - -``--debug`` - Print additional debugging information - -``--encoding`` - Specify a non-default encoding for output (defaults to UTF-8) - -``--cqlshrc`` - Specify a non-default location for the ``cqlshrc`` file - -``-e`` ``--execute`` - Execute the given statement, then exit - -``--connect-timeout`` - Specify the connection timeout in seconds (defaults to 2s) - -``--request-timeout`` - Specify the request timeout in seconds (defaults to 10s) - -``-t`` ``--tty`` - Force tty mode (command prompt) - - -Special Commands -^^^^^^^^^^^^^^^^ - -In addition to supporting regular CQL statements, cqlsh also supports a number of special commands that are not part of -CQL. These are detailed below. - -``CONSISTENCY`` -~~~~~~~~~~~~~~~ - -`Usage`: ``CONSISTENCY `` - -Sets the consistency level for operations to follow. Valid arguments include: - -- ``ANY`` -- ``ONE`` -- ``TWO`` -- ``THREE`` -- ``QUORUM`` -- ``ALL`` -- ``LOCAL_QUORUM`` -- ``LOCAL_ONE`` -- ``SERIAL`` -- ``LOCAL_SERIAL`` - -``SERIAL CONSISTENCY`` -~~~~~~~~~~~~~~~~~~~~~~ - -`Usage`: ``SERIAL CONSISTENCY `` - -Sets the serial consistency level for operations to follow. Valid arguments include: - -- ``SERIAL`` -- ``LOCAL_SERIAL`` - -The serial consistency level is only used by conditional updates (``INSERT``, ``UPDATE`` and ``DELETE`` with an ``IF`` -condition). For those, the serial consistency level defines the consistency level of the serial phase (or “paxos” phase) -while the normal consistency level defines the consistency for the “learn” phase, i.e. what type of reads will be -guaranteed to see the update right away. For example, if a conditional write has a consistency level of ``QUORUM`` (and -is successful), then a ``QUORUM`` read is guaranteed to see that write. But if the regular consistency level of that -write is ``ANY``, then only a read with a consistency level of ``SERIAL`` is guaranteed to see it (even a read with -consistency ``ALL`` is not guaranteed to be enough). - -``SHOW VERSION`` -~~~~~~~~~~~~~~~~ -Prints the cqlsh, Cassandra, CQL, and native protocol versions in use. Example:: - - cqlsh> SHOW VERSION - [cqlsh 5.0.1 | Cassandra 3.8 | CQL spec 3.4.2 | Native protocol v4] - -``SHOW HOST`` -~~~~~~~~~~~~~ - -Prints the IP address and port of the Cassandra node that cqlsh is connected to in addition to the cluster name. -Example:: - - cqlsh> SHOW HOST - Connected to Prod_Cluster at 192.0.0.1:9042. - -``SHOW SESSION`` -~~~~~~~~~~~~~~~~ - -Pretty prints a specific tracing session. - -`Usage`: ``SHOW SESSION `` - -Example usage:: - - cqlsh> SHOW SESSION 95ac6470-327e-11e6-beca-dfb660d92ad8 - - Tracing session: 95ac6470-327e-11e6-beca-dfb660d92ad8 - - activity | timestamp | source | source_elapsed | client - -----------------------------------------------------------+----------------------------+-----------+----------------+----------- - Execute CQL3 query | 2016-06-14 17:23:13.979000 | 127.0.0.1 | 0 | 127.0.0.1 - Parsing SELECT * FROM system.local; [SharedPool-Worker-1] | 2016-06-14 17:23:13.982000 | 127.0.0.1 | 3843 | 127.0.0.1 - ... - - -``SOURCE`` -~~~~~~~~~~ - -Reads the contents of a file and executes each line as a CQL statement or special cqlsh command. - -`Usage`: ``SOURCE `` - -Example usage:: - - cqlsh> SOURCE '/home/thobbs/commands.cql' - -``CAPTURE`` -~~~~~~~~~~~ - -Begins capturing command output and appending it to a specified file. Output will not be shown at the console while it -is captured. - -`Usage`:: - - CAPTURE ''; - CAPTURE OFF; - CAPTURE; - -That is, the path to the file to be appended to must be given inside a string literal. The path is interpreted relative -to the current working directory. The tilde shorthand notation (``'~/mydir'``) is supported for referring to ``$HOME``. - -Only query result output is captured. Errors and output from cqlsh-only commands will still be shown in the cqlsh -session. - -To stop capturing output and show it in the cqlsh session again, use ``CAPTURE OFF``. - -To inspect the current capture configuration, use ``CAPTURE`` with no arguments. - -``HELP`` -~~~~~~~~ - -Gives information about cqlsh commands. To see available topics, enter ``HELP`` without any arguments. To see help on a -topic, use ``HELP ``. Also see the ``--browser`` argument for controlling what browser is used to display help. - -``TRACING`` -~~~~~~~~~~~ - -Enables or disables tracing for queries. When tracing is enabled, once a query completes, a trace of the events during -the query will be printed. - -`Usage`:: - - TRACING ON - TRACING OFF - -``PAGING`` -~~~~~~~~~~ - -Enables paging, disables paging, or sets the page size for read queries. When paging is enabled, only one page of data -will be fetched at a time and a prompt will appear to fetch the next page. Generally, it's a good idea to leave paging -enabled in an interactive session to avoid fetching and printing large amounts of data at once. - -`Usage`:: - - PAGING ON - PAGING OFF - PAGING - -``EXPAND`` -~~~~~~~~~~ - -Enables or disables vertical printing of rows. Enabling ``EXPAND`` is useful when many columns are fetched, or the -contents of a single column are large. - -`Usage`:: - - EXPAND ON - EXPAND OFF - -``LOGIN`` -~~~~~~~~~ - -Authenticate as a specified Cassandra user for the current session. - -`Usage`:: - - LOGIN [] - -``EXIT`` -~~~~~~~~~ - -Ends the current session and terminates the cqlsh process. - -`Usage`:: - - EXIT - QUIT - -``CLEAR`` -~~~~~~~~~ - -Clears the console. - -`Usage`:: - - CLEAR - CLS - -``DESCRIBE`` -~~~~~~~~~~~~ - -Prints a description (typically a series of DDL statements) of a schema element or the cluster. This is useful for -dumping all or portions of the schema. - -`Usage`:: - - DESCRIBE CLUSTER - DESCRIBE SCHEMA - DESCRIBE KEYSPACES - DESCRIBE KEYSPACE - DESCRIBE TABLES - DESCRIBE TABLE
- DESCRIBE INDEX - DESCRIBE MATERIALIZED VIEW - DESCRIBE TYPES - DESCRIBE TYPE - DESCRIBE FUNCTIONS - DESCRIBE FUNCTION - DESCRIBE AGGREGATES - DESCRIBE AGGREGATE - -In any of the commands, ``DESC`` may be used in place of ``DESCRIBE``. - -The ``DESCRIBE CLUSTER`` command prints the cluster name and partitioner:: - - cqlsh> DESCRIBE CLUSTER - - Cluster: Test Cluster - Partitioner: Murmur3Partitioner - -The ``DESCRIBE SCHEMA`` command prints the DDL statements needed to recreate the entire schema. This is especially -useful for dumping the schema in order to clone a cluster or restore from a backup. - -``COPY TO`` -~~~~~~~~~~~ - -Copies data from a table to a CSV file. - -`Usage`:: - - COPY
[(, ...)] TO WITH [AND ...] - -If no columns are specified, all columns from the table will be copied to the CSV file. A subset of columns to copy may -be specified by adding a comma-separated list of column names surrounded by parenthesis after the table name. - - -The ```` should be a string literal (with single quotes) representing a path to the destination file. This -can also the special value ``STDOUT`` (without single quotes) to print the CSV to stdout. - -See :ref:`shared-copy-options` for options that apply to both ``COPY TO`` and ``COPY FROM``. - -Options for ``COPY TO`` -``````````````````````` - -``MAXREQUESTS`` - The maximum number token ranges to fetch simultaneously. Defaults to 6. - -``PAGESIZE`` - The number of rows to fetch in a single page. Defaults to 1000. - -``PAGETIMEOUT`` - By default the page timeout is 10 seconds per 1000 entries - in the page size or 10 seconds if pagesize is smaller. - -``BEGINTOKEN``, ``ENDTOKEN`` - Token range to export. Defaults to exporting the full ring. - -``MAXOUTPUTSIZE`` - The maximum size of the output file measured in number of lines; - beyond this maximum the output file will be split into segments. - -1 means unlimited, and is the default. - -``ENCODING`` - The encoding used for characters. Defaults to ``utf8``. - -``COPY FROM`` -~~~~~~~~~~~~~ -Copies data from a CSV file to table. - -`Usage`:: - - COPY
[(, ...)] FROM WITH [AND ...] - -If no columns are specified, all columns from the CSV file will be copied to the table. A subset -of columns to copy may be specified by adding a comma-separated list of column names surrounded -by parenthesis after the table name. - -The ```` should be a string literal (with single quotes) representing a path to the -source file. This can also the special value ``STDIN`` (without single quotes) to read the -CSV data from stdin. - -See :ref:`shared-copy-options` for options that apply to both ``COPY TO`` and ``COPY FROM``. - -Options for ``COPY TO`` -``````````````````````` - -``INGESTRATE`` - The maximum number of rows to process per second. Defaults to 100000. - -``MAXROWS`` - The maximum number of rows to import. -1 means unlimited, and is the default. - -``SKIPROWS`` - A number of initial rows to skip. Defaults to 0. - -``SKIPCOLS`` - A comma-separated list of column names to ignore. By default, no columns are skipped. - -``MAXPARSEERRORS`` - The maximum global number of parsing errors to ignore. -1 means unlimited, and is the default. - -``MAXINSERTERRORS`` - The maximum global number of insert errors to ignore. -1 means unlimited. The default is 1000. - -``ERRFILE`` = - A file to store all rows that could not be imported, by default this is ``import__
.err`` where ```` is - your keyspace and ``
`` is your table name. - -``MAXBATCHSIZE`` - The max number of rows inserted in a single batch. Defaults to 20. - -``MINBATCHSIZE`` - The min number of rows inserted in a single batch. Defaults to 2. - -``CHUNKSIZE`` - The number of rows that are passed to child worker processes from the main process at a time. Defaults to 1000. - -.. _shared-copy-options: - -Shared COPY Options -``````````````````` - -Options that are common to both ``COPY TO`` and ``COPY FROM``. - -``NULLVAL`` - The string placeholder for null values. Defaults to ``null``. - -``HEADER`` - For ``COPY TO``, controls whether the first line in the CSV output file will contain the column names. For COPY FROM, - specifies whether the first line in the CSV input file contains column names. Defaults to ``false``. - -``DECIMALSEP`` - The character that is used as the decimal point separator. Defaults to ``.``. - -``THOUSANDSSEP`` - The character that is used to separate thousands. Defaults to the empty string. - -``BOOLSTYlE`` - The string literal format for boolean values. Defaults to ``True,False``. - -``NUMPROCESSES`` - The number of child worker processes to create for ``COPY`` tasks. Defaults to a max of 4 for ``COPY FROM`` and 16 - for ``COPY TO``. However, at most (num_cores - 1) processes will be created. - -``MAXATTEMPTS`` - The maximum number of failed attempts to fetch a range of data (when using ``COPY TO``) or insert a chunk of data - (when using ``COPY FROM``) before giving up. Defaults to 5. - -``REPORTFREQUENCY`` - How often status updates are refreshed, in seconds. Defaults to 0.25. - -``RATEFILE`` - An optional file to output rate statistics to. By default, statistics are not output to a file. diff --git a/doc/source/tools/index.rst b/doc/source/tools/index.rst deleted file mode 100644 index 5a5e4d5aec16..000000000000 --- a/doc/source/tools/index.rst +++ /dev/null @@ -1,26 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -Cassandra Tools -=============== - -This section describes the command line tools provided with Apache Cassandra. - -.. toctree:: - :maxdepth: 1 - - cqlsh - nodetool diff --git a/doc/source/tools/nodetool.rst b/doc/source/tools/nodetool.rst deleted file mode 100644 index e37303110eac..000000000000 --- a/doc/source/tools/nodetool.rst +++ /dev/null @@ -1,22 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -.. _nodetool: - -Nodetool --------- - -.. todo:: Try to autogenerate this from Nodetool’s help. diff --git a/doc/source/troubleshooting/index.rst b/doc/source/troubleshooting/index.rst deleted file mode 100644 index 2e5cf106de3c..000000000000 --- a/doc/source/troubleshooting/index.rst +++ /dev/null @@ -1,20 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -Troubleshooting -=============== - -.. TODO: todo