diff --git a/.build/build-rat.xml b/.build/build-rat.xml
index 600946d18e3..a1a17cdadc4 100644
--- a/.build/build-rat.xml
+++ b/.build/build-rat.xml
@@ -53,6 +53,7 @@
+
@@ -69,6 +70,8 @@
+
+
diff --git a/.gitignore b/.gitignore
index a634687d8f7..7d5e24d9e2b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -69,8 +69,9 @@ Thumbs.db
.ant_targets
# Generated files from the documentation
-doc/source/configuration/cassandra_config_file.rst
-doc/source/tools/nodetool
+doc/modules/cassandra/pages/configuration/cass_yaml_file.adoc
+doc/modules/cassandra/pages/tools/nodetool/
+doc/modules/cassandra/examples/TEXT/NODETOOL/
# Python virtual environment
venv/
diff --git a/build.xml b/build.xml
index 54fa1045df7..17deaed2646 100644
--- a/build.xml
+++ b/build.xml
@@ -423,13 +423,14 @@
-
+
-
+
-
-
-
+
+
+
+
diff --git a/doc/Dockerfile b/doc/Dockerfile
deleted file mode 100644
index ed60904ba71..00000000000
--- a/doc/Dockerfile
+++ /dev/null
@@ -1,22 +0,0 @@
-# Dockerfile for building the Cassandra documentation.
-# If wanting to regenerate the documentation from scratch,
-# run `ant realclean` from the root directory of this project.
-
-FROM python:2.7
-
-WORKDIR /usr/src/code
-
-RUN pip install --no-cache-dir sphinx sphinx_rtd_theme
-
-RUN apt-get update && apt-get install -y software-properties-common
-
-RUN wget -qO - https://adoptopenjdk.jfrog.io/adoptopenjdk/api/gpg/key/public | apt-key add - \
- && add-apt-repository --yes https://adoptopenjdk.jfrog.io/adoptopenjdk/deb/ \
- && apt-get update \
- && apt-get install -y adoptopenjdk-11-hotspot ant
-
-
-RUN apt-get clean
-
-CMD CASSANDRA_USE_JDK11=true ant realclean gen-doc \
- && echo "The locally built documentation can be found here:\n\n build/html/index.html\n\n"
diff --git a/doc/Makefile b/doc/Makefile
index 17ef395090e..43acc1ee7c5 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -1,296 +1,26 @@
-# Makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line.
-SPHINXOPTS =
-SPHINXBUILD = sphinx-build
-PAPER =
-BUILDDIR = build
-
-# Internal variables.
-PAPEROPT_a4 = -D latex_paper_size=a4
-PAPEROPT_letter = -D latex_paper_size=letter
-ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
-# the i18n builder cannot share the environment and doctrees with the others
-I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
-
-YAML_DOC_INPUT=../conf/cassandra.yaml
-YAML_DOC_OUTPUT=source/configuration/cassandra_config_file.rst
-
-MAKE_CASSANDRA_YAML = python convert_yaml_to_rst.py $(YAML_DOC_INPUT) $(YAML_DOC_OUTPUT)
-
-GENERATE_NODETOOL_DOCS = python gen-nodetool-docs.py
-
-WEB_SITE_PRESENCE_FILE='source/.build_for_website'
-
-.PHONY: help
-help:
- @echo "Please use \`make ' where is one of"
- @echo " html to make standalone HTML files"
- @echo " website to make HTML files for the Cassandra website"
- @echo " dirhtml to make HTML files named index.html in directories"
- @echo " singlehtml to make a single large HTML file"
- @echo " pickle to make pickle files"
- @echo " json to make JSON files"
- @echo " htmlhelp to make HTML files and a HTML help project"
- @echo " qthelp to make HTML files and a qthelp project"
- @echo " applehelp to make an Apple Help Book"
- @echo " devhelp to make HTML files and a Devhelp project"
- @echo " epub to make an epub"
- @echo " epub3 to make an epub3"
- @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
- @echo " latexpdf to make LaTeX files and run them through pdflatex"
- @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
- @echo " text to make text files"
- @echo " man to make manual pages"
- @echo " texinfo to make Texinfo files"
- @echo " info to make Texinfo files and run them through makeinfo"
- @echo " gettext to make PO message catalogs"
- @echo " changes to make an overview of all changed/added/deprecated items"
- @echo " xml to make Docutils-native XML files"
- @echo " pseudoxml to make pseudoxml-XML files for display purposes"
- @echo " linkcheck to check all external links for integrity"
- @echo " doctest to run all doctests embedded in the documentation (if enabled)"
- @echo " coverage to run coverage check of the documentation (if enabled)"
- @echo " dummy to check syntax errors of document sources"
-
-.PHONY: clean
-clean:
- rm -rf $(BUILDDIR)/*
- rm -f $(YAML_DOC_OUTPUT)
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+GENERATE_NODETOOL_DOCS = ./scripts/gen-nodetool-docs.py
+MAKE_CASSANDRA_YAML = ./scripts/convert_yaml_to_adoc.py ../conf/cassandra.yaml ./modules/cassandra/pages/configuration/cass_yaml_file.adoc
.PHONY: html
html:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
- @echo
- @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
-
-.PHONY: website
-website: clean
- @touch $(WEB_SITE_PRESENCE_FILE)
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
- @rm $(WEB_SITE_PRESENCE_FILE)
- @echo
- @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
-
-.PHONY: dirhtml
-dirhtml:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
- @echo
- @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
-
-.PHONY: singlehtml
-singlehtml:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
- @echo
- @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
-
-.PHONY: pickle
-pickle:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
- @echo
- @echo "Build finished; now you can process the pickle files."
-
-.PHONY: json
-json:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
- @echo
- @echo "Build finished; now you can process the JSON files."
-
-.PHONY: htmlhelp
-htmlhelp:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
- @echo
- @echo "Build finished; now you can run HTML Help Workshop with the" \
- ".hhp project file in $(BUILDDIR)/htmlhelp."
-
-.PHONY: qthelp
-qthelp:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
- @echo
- @echo "Build finished; now you can run "qcollectiongenerator" with the" \
- ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
- @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/ApacheCassandraDocumentation.qhcp"
- @echo "To view the help file:"
- @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/ApacheCassandraDocumentation.qhc"
-
-.PHONY: applehelp
-applehelp:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
- @echo
- @echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
- @echo "N.B. You won't be able to view it unless you put it in" \
- "~/Library/Documentation/Help or install it in your application" \
- "bundle."
-
-.PHONY: devhelp
-devhelp:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
- @echo
- @echo "Build finished."
- @echo "To view the help file:"
- @echo "# mkdir -p $$HOME/.local/share/devhelp/ApacheCassandraDocumentation"
- @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/ApacheCassandraDocumentation"
- @echo "# devhelp"
-
-.PHONY: epub
-epub:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
- @echo
- @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
-
-.PHONY: epub3
-epub3:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
- @echo
- @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
-
-.PHONY: latex
-latex:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo
- @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
- @echo "Run \`make' in that directory to run these through (pdf)latex" \
- "(use \`make latexpdf' here to do that automatically)."
-
-.PHONY: latexpdf
-latexpdf:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo "Running LaTeX files through pdflatex..."
- $(MAKE) -C $(BUILDDIR)/latex all-pdf
- @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
-
-.PHONY: latexpdfja
-latexpdfja:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo "Running LaTeX files through platex and dvipdfmx..."
- $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
- @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
-
-.PHONY: text
-text:
- $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
- @echo
- @echo "Build finished. The text files are in $(BUILDDIR)/text."
-
-.PHONY: man
-man:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
- @echo
- @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
-
-.PHONY: texinfo
-texinfo:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
- @echo
- @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
- @echo "Run \`make' in that directory to run these through makeinfo" \
- "(use \`make info' here to do that automatically)."
-
-.PHONY: info
-info:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
- @echo "Running Texinfo files through makeinfo..."
- make -C $(BUILDDIR)/texinfo info
- @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
-
-.PHONY: gettext
-gettext:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
- @echo
- @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
-
-.PHONY: changes
-changes:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
- @echo
- @echo "The overview file is in $(BUILDDIR)/changes."
-
-.PHONY: linkcheck
-linkcheck:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
- @echo
- @echo "Link check complete; look for any errors in the above output " \
- "or in $(BUILDDIR)/linkcheck/output.txt."
-
-.PHONY: doctest
-doctest:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
- @echo "Testing of doctests in the sources finished, look at the " \
- "results in $(BUILDDIR)/doctest/output.txt."
-
-.PHONY: coverage
-coverage:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
- @echo "Testing of coverage in the sources finished, look at the " \
- "results in $(BUILDDIR)/coverage/python.txt."
-
-.PHONY: xml
-xml:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
- @echo
- @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
-
-.PHONY: pseudoxml
-pseudoxml:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
- @echo
- @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
-
-.PHONY: dummy
-dummy:
- $(MAKE_CASSANDRA_YAML)
- $(GENERATE_NODETOOL_DOCS)
- $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
- @echo
- @echo "Build finished. Dummy builder generates no files."
+ @# hack until a local basic antora build is put in
+
+.PHONY: gen-asciidoc
+gen-asciidoc:
+ @mkdir -p modules/cassandra/pages/tools/nodetool
+ @mkdir -p modules/cassandra/examples/TEXT/NODETOOL
+ python3 $(GENERATE_NODETOOL_DOCS)
+ python3 $(MAKE_CASSANDRA_YAML)
diff --git a/doc/README.md b/doc/README.md
index 25ca702cf2c..608d236cb75 100644
--- a/doc/README.md
+++ b/doc/README.md
@@ -23,52 +23,39 @@ Apache Cassandra documentation directory
This directory contains the documentation maintained in-tree for Apache
Cassandra. This directory contains the following documents:
-- The source of the official Cassandra documentation, in the `source/`
+- The source of the official Cassandra documentation, in the `source/modules`
subdirectory. See below for more details on how to edit/build that
documentation.
- The specification(s) for the supported versions of native transport protocol.
-- Additional documentation on the SASI implementation (`SASI.md`). TODO: we
- should probably move the first half of that documentation to the general
- documentation, and the implementation explanation parts into the wiki.
Official documentation
----------------------
The source for the official documentation for Apache Cassandra can be found in
-the `source` subdirectory. The documentation uses [sphinx](http://www.sphinx-doc.org/)
-and is thus written in [reStructuredText](http://docutils.sourceforge.net/rst.html).
+the `modules/cassandra/pages` subdirectory. The documentation uses [antora](http://www.antora.org/)
+and is thus written in [asciidoc](http://asciidoc.org).
-To build the HTML documentation, you will need to first install sphinx and the
-[sphinx ReadTheDocs theme](https://pypi.org/project/sphinx_rtd_theme/).
-When using Python 3.6 on Windows, use `py -m pip install sphinx sphinx_rtd_theme`, on unix
-use:
+To generate the asciidoc files for cassandra.yaml and the nodetool commands, run (from project root):
+```bash
+ant gen-asciidoc
```
-pip install sphinx sphinx_rtd_theme
+or (from this directory):
+
+```bash
+make gen-asciidoc
```
-The documentation can then be built from this directory by calling `make html`
-(or `make.bat html` on windows). Alternatively, the top-level `ant gen-doc`
-target can be used. When using Python 3.6 on Windows, use `sphinx_build -b html source build`.
-To build the documentation with Docker Compose, run:
+(The following has not yet been implemented, for now see the build instructions in the [cassandra-website](https://github.com/apache/cassandra-website) repo.)
+To build the documentation, run (from project root):
```bash
-cd ./doc
-
-# build the Docker image
-docker-compose build build-docs
-
-# build the documentation
-docker-compose run build-docs
+ant gen-doc
```
-
-To regenerate the documentation from scratch, run:
+or (from this directory):
```bash
-# return to the root directory of the Cassandra project
-cd ..
-
-# remove all generated documentation files based on the source code
-ant realclean
+make html
```
+
diff --git a/doc/antora.yml b/doc/antora.yml
new file mode 100644
index 00000000000..f9fa1480d18
--- /dev/null
+++ b/doc/antora.yml
@@ -0,0 +1,9 @@
+name: Cassandra
+version: 'trunk'
+display_version: 'trunk'
+asciidoc:
+ attributes:
+ cass_url: 'http://cassandra.apache.org/'
+nav:
+- modules/ROOT/nav.adoc
+- modules/cassandra/nav.adoc
diff --git a/doc/cql3/CQL.textile b/doc/cql3/CQL.textile
index c60800ed06e..fae2119ae76 100644
--- a/doc/cql3/CQL.textile
+++ b/doc/cql3/CQL.textile
@@ -416,7 +416,6 @@ bc(syntax)..
| ADD ( ( , )* )
| DROP
| DROP ( ( , )* )
- | DROP COMPACT STORAGE
| WITH ( AND )*
p.
__Sample:__
@@ -435,7 +434,6 @@ The @ALTER@ statement is used to manipulate table definitions. It allows for add
The @@ is the table name optionally preceded by the keyspace name. The @@ defines the alteration to perform:
* @ADD@: Adds a new column to the table. The @@ for the new column must not conflict with an existing column. Moreover, columns cannot be added to tables defined with the @COMPACT STORAGE@ option.
* @DROP@: Removes a column from the table. Dropped columns will immediately become unavailable in the queries and will not be included in compacted sstables in the future. If a column is readded, queries won't return values written before the column was last dropped. It is assumed that timestamps represent actual time, so if this is not your case, you should NOT readd previously dropped columns. Columns can't be dropped from tables defined with the @COMPACT STORAGE@ option.
-* @DROP COMPACT STORAGE@: Removes Thrift compatibility mode from the table.
* @WITH@: Allows to update the options of the table. The "supported @@":#createTableOptions (and syntax) are the same as for the @CREATE TABLE@ statement except that @COMPACT STORAGE@ is not supported. Note that setting any @compaction@ sub-options has the effect of erasing all previous @compaction@ options, so you need to re-specify all the sub-options if you want to keep them. The same note applies to the set of @compression@ sub-options.
h4. CQL type compatibility:
diff --git a/doc/docker-compose.yml b/doc/docker-compose.yml
deleted file mode 100644
index b1477b6a8bf..00000000000
--- a/doc/docker-compose.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# docker-compose.yml for building the Cassandra documentation.
-
-version: '2.0'
-
-services:
- build-docs:
- build: .
- volumes:
- - ..:/usr/src/code
- environment:
- - SKIP_NODETOOL # set this to skip nodetool build, saves a lot of time when debugging html
diff --git a/doc/make.bat b/doc/make.bat
deleted file mode 100644
index cbd1d1dbbce..00000000000
--- a/doc/make.bat
+++ /dev/null
@@ -1,299 +0,0 @@
-@ECHO OFF
-
-REM
-REM Licensed to the Apache Software Foundation (ASF) under one
-REM or more contributor license agreements. See the NOTICE file
-REM distributed with this work for additional information
-REM regarding copyright ownership. The ASF licenses this file
-REM to you under the Apache License, Version 2.0 (the
-REM "License"); you may not use this file except in compliance
-REM with the License. You may obtain a copy of the License at
-REM
-REM http://www.apache.org/licenses/LICENSE-2.0
-REM
-REM Unless required by applicable law or agreed to in writing, software
-REM distributed under the License is distributed on an "AS IS" BASIS,
-REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-REM See the License for the specific language governing permissions and
-REM limitations under the License.
-REM
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
- set SPHINXBUILD=sphinx-build
-)
-set BUILDDIR=build
-set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
-set I18NSPHINXOPTS=%SPHINXOPTS% .
-if NOT "%PAPER%" == "" (
- set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
- set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
-)
-
-if "%1" == "" goto help
-
-if "%1" == "help" (
- :help
- echo.Please use `make ^` where ^ is one of
- echo. html to make standalone HTML files
- echo. dirhtml to make HTML files named index.html in directories
- echo. singlehtml to make a single large HTML file
- echo. pickle to make pickle files
- echo. json to make JSON files
- echo. htmlhelp to make HTML files and a HTML help project
- echo. qthelp to make HTML files and a qthelp project
- echo. devhelp to make HTML files and a Devhelp project
- echo. epub to make an epub
- echo. epub3 to make an epub3
- echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
- echo. text to make text files
- echo. man to make manual pages
- echo. texinfo to make Texinfo files
- echo. gettext to make PO message catalogs
- echo. changes to make an overview over all changed/added/deprecated items
- echo. xml to make Docutils-native XML files
- echo. pseudoxml to make pseudoxml-XML files for display purposes
- echo. linkcheck to check all external links for integrity
- echo. doctest to run all doctests embedded in the documentation if enabled
- echo. coverage to run coverage check of the documentation if enabled
- echo. dummy to check syntax errors of document sources
- goto end
-)
-
-if "%1" == "clean" (
- for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
- del /q /s %BUILDDIR%\*
- goto end
-)
-
-
-REM Check if sphinx-build is available and fallback to Python version if any
-%SPHINXBUILD% 1>NUL 2>NUL
-if errorlevel 9009 goto sphinx_python
-goto sphinx_ok
-
-:sphinx_python
-
-set SPHINXBUILD=python -m sphinx.__init__
-%SPHINXBUILD% 2> nul
-if errorlevel 9009 (
- echo.
- echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
- echo.installed, then set the SPHINXBUILD environment variable to point
- echo.to the full path of the 'sphinx-build' executable. Alternatively you
- echo.may add the Sphinx directory to PATH.
- echo.
- echo.If you don't have Sphinx installed, grab it from
- echo.http://sphinx-doc.org/
- exit /b 1
-)
-
-:sphinx_ok
-
-
-if "%1" == "html" (
- %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The HTML pages are in %BUILDDIR%/html.
- goto end
-)
-
-if "%1" == "dirhtml" (
- %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
- goto end
-)
-
-if "%1" == "singlehtml" (
- %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
- goto end
-)
-
-if "%1" == "pickle" (
- %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished; now you can process the pickle files.
- goto end
-)
-
-if "%1" == "json" (
- %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished; now you can process the JSON files.
- goto end
-)
-
-if "%1" == "htmlhelp" (
- %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished; now you can run HTML Help Workshop with the ^
-.hhp project file in %BUILDDIR%/htmlhelp.
- goto end
-)
-
-if "%1" == "qthelp" (
- %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished; now you can run "qcollectiongenerator" with the ^
-.qhcp project file in %BUILDDIR%/qthelp, like this:
- echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Foo.qhcp
- echo.To view the help file:
- echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Foo.ghc
- goto end
-)
-
-if "%1" == "devhelp" (
- %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished.
- goto end
-)
-
-if "%1" == "epub" (
- %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The epub file is in %BUILDDIR%/epub.
- goto end
-)
-
-if "%1" == "epub3" (
- %SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The epub3 file is in %BUILDDIR%/epub3.
- goto end
-)
-
-if "%1" == "latex" (
- %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
- goto end
-)
-
-if "%1" == "latexpdf" (
- %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
- cd %BUILDDIR%/latex
- make all-pdf
- cd %~dp0
- echo.
- echo.Build finished; the PDF files are in %BUILDDIR%/latex.
- goto end
-)
-
-if "%1" == "latexpdfja" (
- %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
- cd %BUILDDIR%/latex
- make all-pdf-ja
- cd %~dp0
- echo.
- echo.Build finished; the PDF files are in %BUILDDIR%/latex.
- goto end
-)
-
-if "%1" == "text" (
- %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The text files are in %BUILDDIR%/text.
- goto end
-)
-
-if "%1" == "man" (
- %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The manual pages are in %BUILDDIR%/man.
- goto end
-)
-
-if "%1" == "texinfo" (
- %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
- goto end
-)
-
-if "%1" == "gettext" (
- %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
- goto end
-)
-
-if "%1" == "changes" (
- %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
- if errorlevel 1 exit /b 1
- echo.
- echo.The overview file is in %BUILDDIR%/changes.
- goto end
-)
-
-if "%1" == "linkcheck" (
- %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
- if errorlevel 1 exit /b 1
- echo.
- echo.Link check complete; look for any errors in the above output ^
-or in %BUILDDIR%/linkcheck/output.txt.
- goto end
-)
-
-if "%1" == "doctest" (
- %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
- if errorlevel 1 exit /b 1
- echo.
- echo.Testing of doctests in the sources finished, look at the ^
-results in %BUILDDIR%/doctest/output.txt.
- goto end
-)
-
-if "%1" == "coverage" (
- %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
- if errorlevel 1 exit /b 1
- echo.
- echo.Testing of coverage in the sources finished, look at the ^
-results in %BUILDDIR%/coverage/python.txt.
- goto end
-)
-
-if "%1" == "xml" (
- %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The XML files are in %BUILDDIR%/xml.
- goto end
-)
-
-if "%1" == "pseudoxml" (
- %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
- goto end
-)
-
-if "%1" == "dummy" (
- %SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. Dummy builder generates no files.
- goto end
-)
-
-:end
diff --git a/doc/modules/ROOT/nav.adoc b/doc/modules/ROOT/nav.adoc
new file mode 100644
index 00000000000..74c129c5a2b
--- /dev/null
+++ b/doc/modules/ROOT/nav.adoc
@@ -0,0 +1,4 @@
+* xref:index.adoc[Main]
+** xref:master@_:ROOT:glossary.adoc[Glossary]
+** xref:master@_:ROOT:bugs.adoc[How to report bugs]
+** xref:master@_:ROOT:contactus.adoc[Contact us]
diff --git a/doc/modules/ROOT/pages/index.adoc b/doc/modules/ROOT/pages/index.adoc
new file mode 100644
index 00000000000..6a0c745a76d
--- /dev/null
+++ b/doc/modules/ROOT/pages/index.adoc
@@ -0,0 +1,50 @@
+= Welcome to Apache Cassandra's documentation!
+
+:description: Starting page for Apache Cassandra documentation.
+:keywords: Apache, Cassandra, NoSQL, database
+:cass-url: http://cassandra.apache.org
+:cass-contrib-url: https://wiki.apache.org/cassandra/HowToContribute
+
+This is the official documentation for {cass-url}[Apache Cassandra].
+If you would like to contribute to this documentation, you are welcome
+to do so by submitting your contribution like any other patch following
+{cass-contrib-url}[these instructions].
+
+== Main documentation
+
+[cols="a,a"]
+|===
+
+| xref:cassandra:getting_started/index.adoc[Getting started] | Newbie starting point
+
+| xref:cassandra:new/index.adoc[What's new in 4.0] | What's new in Cassandra 4.0
+
+| xref:cassandra:architecture/index.adoc[Architecture] | Cassandra's big picture
+
+| xref:cassandra:data_modeling/index.adoc[Data modeling] | Hint: it's not relational
+
+| xref:cassandra:cql/index.adoc[Cassandra Query Language (CQL)] | CQL reference documentation
+
+| xref:cassandra:configuration/index.adoc[Configuration] | Cassandra's handles and knobs
+
+| xref:cassandra:operating/index.adoc[Operation] | The operator's corner
+
+| xref:cassandra:tools/index.adoc[Tools] | cqlsh, nodetool, and others
+
+| xref:cassandra:troubleshooting/index.adoc[Troubleshooting] | What to look for when you have a problem
+
+| xref:cassandra:faq/index.adoc[FAQ] | Frequently asked questions
+
+| xref:cassandra:plugins/index.adoc[Plug-ins] | Third-party plug-ins
+
+| xref:master@_:ROOT:native_protocol.adoc[Native Protocols] | Native Cassandra protocol specifications
+
+|===
+
+== Meta information
+* xref:master@_:ROOT:bugs.adoc[Reporting bugs]
+* xref:master@_:ROOT:contactus.adoc[Contact us]
+* xref:master@_:ROOT:development/index.adoc[Contributing code]
+* xref:master@_:ROOT:docdev/index.adoc[Contributing to the docs]
+* xref:master@_:ROOT:community.adoc[Community]
+* xref:master@_:ROOT:download.adoc[Download]
diff --git a/doc/source/operating/Figure_1_backups.jpg b/doc/modules/cassandra/assets/images/Figure_1_backups.jpg
similarity index 100%
rename from doc/source/operating/Figure_1_backups.jpg
rename to doc/modules/cassandra/assets/images/Figure_1_backups.jpg
diff --git a/doc/source/data_modeling/images/Figure_1_data_model.jpg b/doc/modules/cassandra/assets/images/Figure_1_data_model.jpg
similarity index 100%
rename from doc/source/data_modeling/images/Figure_1_data_model.jpg
rename to doc/modules/cassandra/assets/images/Figure_1_data_model.jpg
diff --git a/doc/source/architecture/Figure_1_guarantees.jpg b/doc/modules/cassandra/assets/images/Figure_1_guarantees.jpg
similarity index 100%
rename from doc/source/architecture/Figure_1_guarantees.jpg
rename to doc/modules/cassandra/assets/images/Figure_1_guarantees.jpg
diff --git a/doc/source/operating/Figure_1_read_repair.jpg b/doc/modules/cassandra/assets/images/Figure_1_read_repair.jpg
similarity index 100%
rename from doc/source/operating/Figure_1_read_repair.jpg
rename to doc/modules/cassandra/assets/images/Figure_1_read_repair.jpg
diff --git a/doc/source/data_modeling/images/Figure_2_data_model.jpg b/doc/modules/cassandra/assets/images/Figure_2_data_model.jpg
similarity index 100%
rename from doc/source/data_modeling/images/Figure_2_data_model.jpg
rename to doc/modules/cassandra/assets/images/Figure_2_data_model.jpg
diff --git a/doc/source/operating/Figure_2_read_repair.jpg b/doc/modules/cassandra/assets/images/Figure_2_read_repair.jpg
similarity index 100%
rename from doc/source/operating/Figure_2_read_repair.jpg
rename to doc/modules/cassandra/assets/images/Figure_2_read_repair.jpg
diff --git a/doc/source/operating/Figure_3_read_repair.jpg b/doc/modules/cassandra/assets/images/Figure_3_read_repair.jpg
similarity index 100%
rename from doc/source/operating/Figure_3_read_repair.jpg
rename to doc/modules/cassandra/assets/images/Figure_3_read_repair.jpg
diff --git a/doc/source/operating/Figure_4_read_repair.jpg b/doc/modules/cassandra/assets/images/Figure_4_read_repair.jpg
similarity index 100%
rename from doc/source/operating/Figure_4_read_repair.jpg
rename to doc/modules/cassandra/assets/images/Figure_4_read_repair.jpg
diff --git a/doc/source/operating/Figure_5_read_repair.jpg b/doc/modules/cassandra/assets/images/Figure_5_read_repair.jpg
similarity index 100%
rename from doc/source/operating/Figure_5_read_repair.jpg
rename to doc/modules/cassandra/assets/images/Figure_5_read_repair.jpg
diff --git a/doc/source/operating/Figure_6_read_repair.jpg b/doc/modules/cassandra/assets/images/Figure_6_read_repair.jpg
similarity index 100%
rename from doc/source/operating/Figure_6_read_repair.jpg
rename to doc/modules/cassandra/assets/images/Figure_6_read_repair.jpg
diff --git a/doc/source/data_modeling/images/data_modeling_chebotko_logical.png b/doc/modules/cassandra/assets/images/data_modeling_chebotko_logical.png
similarity index 100%
rename from doc/source/data_modeling/images/data_modeling_chebotko_logical.png
rename to doc/modules/cassandra/assets/images/data_modeling_chebotko_logical.png
diff --git a/doc/source/data_modeling/images/data_modeling_chebotko_physical.png b/doc/modules/cassandra/assets/images/data_modeling_chebotko_physical.png
similarity index 100%
rename from doc/source/data_modeling/images/data_modeling_chebotko_physical.png
rename to doc/modules/cassandra/assets/images/data_modeling_chebotko_physical.png
diff --git a/doc/source/data_modeling/images/data_modeling_hotel_bucketing.png b/doc/modules/cassandra/assets/images/data_modeling_hotel_bucketing.png
similarity index 100%
rename from doc/source/data_modeling/images/data_modeling_hotel_bucketing.png
rename to doc/modules/cassandra/assets/images/data_modeling_hotel_bucketing.png
diff --git a/doc/source/data_modeling/images/data_modeling_hotel_erd.png b/doc/modules/cassandra/assets/images/data_modeling_hotel_erd.png
similarity index 100%
rename from doc/source/data_modeling/images/data_modeling_hotel_erd.png
rename to doc/modules/cassandra/assets/images/data_modeling_hotel_erd.png
diff --git a/doc/source/data_modeling/images/data_modeling_hotel_logical.png b/doc/modules/cassandra/assets/images/data_modeling_hotel_logical.png
similarity index 100%
rename from doc/source/data_modeling/images/data_modeling_hotel_logical.png
rename to doc/modules/cassandra/assets/images/data_modeling_hotel_logical.png
diff --git a/doc/source/data_modeling/images/data_modeling_hotel_physical.png b/doc/modules/cassandra/assets/images/data_modeling_hotel_physical.png
similarity index 100%
rename from doc/source/data_modeling/images/data_modeling_hotel_physical.png
rename to doc/modules/cassandra/assets/images/data_modeling_hotel_physical.png
diff --git a/doc/source/data_modeling/images/data_modeling_hotel_queries.png b/doc/modules/cassandra/assets/images/data_modeling_hotel_queries.png
similarity index 100%
rename from doc/source/data_modeling/images/data_modeling_hotel_queries.png
rename to doc/modules/cassandra/assets/images/data_modeling_hotel_queries.png
diff --git a/doc/source/data_modeling/images/data_modeling_hotel_relational.png b/doc/modules/cassandra/assets/images/data_modeling_hotel_relational.png
similarity index 100%
rename from doc/source/data_modeling/images/data_modeling_hotel_relational.png
rename to doc/modules/cassandra/assets/images/data_modeling_hotel_relational.png
diff --git a/doc/source/data_modeling/images/data_modeling_reservation_logical.png b/doc/modules/cassandra/assets/images/data_modeling_reservation_logical.png
similarity index 100%
rename from doc/source/data_modeling/images/data_modeling_reservation_logical.png
rename to doc/modules/cassandra/assets/images/data_modeling_reservation_logical.png
diff --git a/doc/source/data_modeling/images/data_modeling_reservation_physical.png b/doc/modules/cassandra/assets/images/data_modeling_reservation_physical.png
similarity index 100%
rename from doc/source/data_modeling/images/data_modeling_reservation_physical.png
rename to doc/modules/cassandra/assets/images/data_modeling_reservation_physical.png
diff --git a/doc/source/development/images/docs_commit.png b/doc/modules/cassandra/assets/images/docs_commit.png
similarity index 100%
rename from doc/source/development/images/docs_commit.png
rename to doc/modules/cassandra/assets/images/docs_commit.png
diff --git a/doc/source/development/images/docs_create_branch.png b/doc/modules/cassandra/assets/images/docs_create_branch.png
similarity index 100%
rename from doc/source/development/images/docs_create_branch.png
rename to doc/modules/cassandra/assets/images/docs_create_branch.png
diff --git a/doc/source/development/images/docs_create_file.png b/doc/modules/cassandra/assets/images/docs_create_file.png
similarity index 100%
rename from doc/source/development/images/docs_create_file.png
rename to doc/modules/cassandra/assets/images/docs_create_file.png
diff --git a/doc/source/development/images/docs_editor.png b/doc/modules/cassandra/assets/images/docs_editor.png
similarity index 100%
rename from doc/source/development/images/docs_editor.png
rename to doc/modules/cassandra/assets/images/docs_editor.png
diff --git a/doc/source/development/images/docs_fork.png b/doc/modules/cassandra/assets/images/docs_fork.png
similarity index 100%
rename from doc/source/development/images/docs_fork.png
rename to doc/modules/cassandra/assets/images/docs_fork.png
diff --git a/doc/source/development/images/docs_pr.png b/doc/modules/cassandra/assets/images/docs_pr.png
similarity index 100%
rename from doc/source/development/images/docs_pr.png
rename to doc/modules/cassandra/assets/images/docs_pr.png
diff --git a/doc/source/development/images/docs_preview.png b/doc/modules/cassandra/assets/images/docs_preview.png
similarity index 100%
rename from doc/source/development/images/docs_preview.png
rename to doc/modules/cassandra/assets/images/docs_preview.png
diff --git a/doc/source/development/images/eclipse_debug0.png b/doc/modules/cassandra/assets/images/eclipse_debug0.png
similarity index 100%
rename from doc/source/development/images/eclipse_debug0.png
rename to doc/modules/cassandra/assets/images/eclipse_debug0.png
diff --git a/doc/source/development/images/eclipse_debug1.png b/doc/modules/cassandra/assets/images/eclipse_debug1.png
similarity index 100%
rename from doc/source/development/images/eclipse_debug1.png
rename to doc/modules/cassandra/assets/images/eclipse_debug1.png
diff --git a/doc/source/development/images/eclipse_debug2.png b/doc/modules/cassandra/assets/images/eclipse_debug2.png
similarity index 100%
rename from doc/source/development/images/eclipse_debug2.png
rename to doc/modules/cassandra/assets/images/eclipse_debug2.png
diff --git a/doc/source/development/images/eclipse_debug3.png b/doc/modules/cassandra/assets/images/eclipse_debug3.png
similarity index 100%
rename from doc/source/development/images/eclipse_debug3.png
rename to doc/modules/cassandra/assets/images/eclipse_debug3.png
diff --git a/doc/source/development/images/eclipse_debug4.png b/doc/modules/cassandra/assets/images/eclipse_debug4.png
similarity index 100%
rename from doc/source/development/images/eclipse_debug4.png
rename to doc/modules/cassandra/assets/images/eclipse_debug4.png
diff --git a/doc/source/development/images/eclipse_debug5.png b/doc/modules/cassandra/assets/images/eclipse_debug5.png
similarity index 100%
rename from doc/source/development/images/eclipse_debug5.png
rename to doc/modules/cassandra/assets/images/eclipse_debug5.png
diff --git a/doc/source/development/images/eclipse_debug6.png b/doc/modules/cassandra/assets/images/eclipse_debug6.png
similarity index 100%
rename from doc/source/development/images/eclipse_debug6.png
rename to doc/modules/cassandra/assets/images/eclipse_debug6.png
diff --git a/doc/source/tools/example-stress-graph.png b/doc/modules/cassandra/assets/images/example-stress-graph.png
similarity index 100%
rename from doc/source/tools/example-stress-graph.png
rename to doc/modules/cassandra/assets/images/example-stress-graph.png
diff --git a/doc/source/tools/generatetokens.rst b/doc/modules/cassandra/assets/images/generatetokens.rst
similarity index 100%
rename from doc/source/tools/generatetokens.rst
rename to doc/modules/cassandra/assets/images/generatetokens.rst
diff --git a/doc/source/operating/images/hints.svg b/doc/modules/cassandra/assets/images/hints.svg
similarity index 100%
rename from doc/source/operating/images/hints.svg
rename to doc/modules/cassandra/assets/images/hints.svg
diff --git a/doc/source/architecture/images/ring.svg b/doc/modules/cassandra/assets/images/ring.svg
similarity index 100%
rename from doc/source/architecture/images/ring.svg
rename to doc/modules/cassandra/assets/images/ring.svg
diff --git a/doc/source/architecture/images/vnodes.svg b/doc/modules/cassandra/assets/images/vnodes.svg
similarity index 100%
rename from doc/source/architecture/images/vnodes.svg
rename to doc/modules/cassandra/assets/images/vnodes.svg
diff --git a/doc/source/development/license_compliance.rst b/doc/modules/cassandra/assets/license_compliance.rst
similarity index 100%
rename from doc/source/development/license_compliance.rst
rename to doc/modules/cassandra/assets/license_compliance.rst
diff --git a/doc/modules/cassandra/examples/BASH/add_repo_keys.sh b/doc/modules/cassandra/examples/BASH/add_repo_keys.sh
new file mode 100644
index 00000000000..cdb5881e563
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/add_repo_keys.sh
@@ -0,0 +1 @@
+$ curl https://www.apache.org/dist/cassandra/KEYS | sudo apt-key add -
diff --git a/doc/modules/cassandra/examples/BASH/apt-get_cass.sh b/doc/modules/cassandra/examples/BASH/apt-get_cass.sh
new file mode 100644
index 00000000000..9614b29ab2b
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/apt-get_cass.sh
@@ -0,0 +1 @@
+$ sudo apt-get install cassandra
diff --git a/doc/modules/cassandra/examples/BASH/apt-get_update.sh b/doc/modules/cassandra/examples/BASH/apt-get_update.sh
new file mode 100644
index 00000000000..b50b7ac768e
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/apt-get_update.sh
@@ -0,0 +1 @@
+$ sudo apt-get update
diff --git a/doc/modules/cassandra/examples/BASH/check_backups.sh b/doc/modules/cassandra/examples/BASH/check_backups.sh
new file mode 100644
index 00000000000..212c3d2c7d4
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/check_backups.sh
@@ -0,0 +1 @@
+$ cd ./cassandra/data/data/cqlkeyspace/t-d132e240c21711e9bbee19821dcea330/backups && ls -l
diff --git a/doc/modules/cassandra/examples/BASH/cqlsh_localhost.sh b/doc/modules/cassandra/examples/BASH/cqlsh_localhost.sh
new file mode 100644
index 00000000000..7bc1c39525d
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/cqlsh_localhost.sh
@@ -0,0 +1 @@
+$ bin/cqlsh localhost
diff --git a/doc/modules/cassandra/examples/BASH/curl_install.sh b/doc/modules/cassandra/examples/BASH/curl_install.sh
new file mode 100644
index 00000000000..1f1c2c9f45e
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/curl_install.sh
@@ -0,0 +1 @@
+$ curl -OL http://apache.mirror.digitalpacific.com.au/cassandra/4.0.0/apache-cassandra-4.0.0-bin.tar.gz
diff --git a/doc/modules/cassandra/examples/BASH/curl_verify_sha.sh b/doc/modules/cassandra/examples/BASH/curl_verify_sha.sh
new file mode 100644
index 00000000000..dcb5314e147
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/curl_verify_sha.sh
@@ -0,0 +1 @@
+$ curl -L https://downloads.apache.org/cassandra/4.0.0/apache-cassandra-4.0.0-bin.tar.gz.sha256
diff --git a/doc/modules/cassandra/examples/BASH/docker_cqlsh.sh b/doc/modules/cassandra/examples/BASH/docker_cqlsh.sh
new file mode 100644
index 00000000000..92a4a8f356d
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/docker_cqlsh.sh
@@ -0,0 +1 @@
+docker exec -it cass_cluster cqlsh
diff --git a/doc/modules/cassandra/examples/BASH/docker_pull.sh b/doc/modules/cassandra/examples/BASH/docker_pull.sh
new file mode 100644
index 00000000000..a732b1a1128
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/docker_pull.sh
@@ -0,0 +1 @@
+docker pull cassandra:latest
diff --git a/doc/modules/cassandra/examples/BASH/docker_run.sh b/doc/modules/cassandra/examples/BASH/docker_run.sh
new file mode 100644
index 00000000000..00e75fae69c
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/docker_run.sh
@@ -0,0 +1 @@
+docker run --name cass_cluster cassandra:latest
diff --git a/doc/modules/cassandra/examples/BASH/find_backups.sh b/doc/modules/cassandra/examples/BASH/find_backups.sh
new file mode 100644
index 00000000000..56744bb3f18
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/find_backups.sh
@@ -0,0 +1 @@
+$ find -name backups
diff --git a/doc/modules/cassandra/examples/BASH/find_snapshots.sh b/doc/modules/cassandra/examples/BASH/find_snapshots.sh
new file mode 100644
index 00000000000..7abae2b42f0
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/find_snapshots.sh
@@ -0,0 +1 @@
+$ find -name snapshots
diff --git a/doc/modules/cassandra/examples/BASH/find_sstables.sh b/doc/modules/cassandra/examples/BASH/find_sstables.sh
new file mode 100644
index 00000000000..51569035c17
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/find_sstables.sh
@@ -0,0 +1 @@
+find /var/lib/cassandra/data/ -type f | grep -v -- -ib- | grep -v "/snapshots"
diff --git a/doc/modules/cassandra/examples/BASH/find_two_snapshots.sh b/doc/modules/cassandra/examples/BASH/find_two_snapshots.sh
new file mode 100644
index 00000000000..6e97b4b72de
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/find_two_snapshots.sh
@@ -0,0 +1 @@
+$ cd ./cassandra/data/data/catalogkeyspace/journal-296a2d30c22a11e9b1350d927649052c/snapshots && ls -l
diff --git a/doc/modules/cassandra/examples/BASH/flush_and_check.sh b/doc/modules/cassandra/examples/BASH/flush_and_check.sh
new file mode 100644
index 00000000000..5f966e3c5a0
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/flush_and_check.sh
@@ -0,0 +1,2 @@
+$ nodetool flush cqlkeyspace t
+$ cd ./cassandra/data/data/cqlkeyspace/t-d132e240c21711e9bbee19821dcea330/backups && ls -l
diff --git a/doc/modules/cassandra/examples/BASH/get_deb_package.sh b/doc/modules/cassandra/examples/BASH/get_deb_package.sh
new file mode 100644
index 00000000000..891bed64272
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/get_deb_package.sh
@@ -0,0 +1,2 @@
+$ echo "deb http://www.apache.org/dist/cassandra/debian 40x main" | sudo tee -a /etc/apt/sources.list.d/cassandra.sources.list
+deb http://www.apache.org/dist/cassandra/debian 40x main
diff --git a/doc/modules/cassandra/examples/BASH/java_verify.sh b/doc/modules/cassandra/examples/BASH/java_verify.sh
new file mode 100644
index 00000000000..da7832fdea8
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/java_verify.sh
@@ -0,0 +1 @@
+$ java -version
diff --git a/doc/modules/cassandra/examples/BASH/nodetool_clearsnapshot.sh b/doc/modules/cassandra/examples/BASH/nodetool_clearsnapshot.sh
new file mode 100644
index 00000000000..a327ad17f45
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/nodetool_clearsnapshot.sh
@@ -0,0 +1 @@
+$ nodetool clearsnapshot -t magazine cqlkeyspace
diff --git a/doc/modules/cassandra/examples/BASH/nodetool_clearsnapshot_all.sh b/doc/modules/cassandra/examples/BASH/nodetool_clearsnapshot_all.sh
new file mode 100644
index 00000000000..a22841d7446
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/nodetool_clearsnapshot_all.sh
@@ -0,0 +1 @@
+$ nodetool clearsnapshot -all cqlkeyspace
diff --git a/doc/modules/cassandra/examples/BASH/nodetool_flush.sh b/doc/modules/cassandra/examples/BASH/nodetool_flush.sh
new file mode 100644
index 00000000000..960b852961a
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/nodetool_flush.sh
@@ -0,0 +1,3 @@
+$ nodetool flush cqlkeyspace t
+$ nodetool flush cqlkeyspace t2
+$ nodetool flush catalogkeyspace journal magazine
diff --git a/doc/modules/cassandra/examples/BASH/nodetool_flush_table.sh b/doc/modules/cassandra/examples/BASH/nodetool_flush_table.sh
new file mode 100644
index 00000000000..2c236de27b7
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/nodetool_flush_table.sh
@@ -0,0 +1 @@
+$ nodetool flush cqlkeyspace t
diff --git a/doc/modules/cassandra/examples/BASH/nodetool_list_snapshots.sh b/doc/modules/cassandra/examples/BASH/nodetool_list_snapshots.sh
new file mode 100644
index 00000000000..76633f0767d
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/nodetool_list_snapshots.sh
@@ -0,0 +1 @@
+$ nodetool listsnapshots
diff --git a/doc/modules/cassandra/examples/BASH/nodetool_snapshot.sh b/doc/modules/cassandra/examples/BASH/nodetool_snapshot.sh
new file mode 100644
index 00000000000..c74e46795e5
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/nodetool_snapshot.sh
@@ -0,0 +1 @@
+$ nodetool help snapshot
diff --git a/doc/modules/cassandra/examples/BASH/nodetool_status.sh b/doc/modules/cassandra/examples/BASH/nodetool_status.sh
new file mode 100644
index 00000000000..a9b768d9a0c
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/nodetool_status.sh
@@ -0,0 +1 @@
+$ bin/nodetool status
diff --git a/doc/modules/cassandra/examples/BASH/nodetool_status_nobin.sh b/doc/modules/cassandra/examples/BASH/nodetool_status_nobin.sh
new file mode 100644
index 00000000000..d7adbd3d4da
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/nodetool_status_nobin.sh
@@ -0,0 +1 @@
+$ nodetool status
diff --git a/doc/modules/cassandra/examples/BASH/run_cqlsh.sh b/doc/modules/cassandra/examples/BASH/run_cqlsh.sh
new file mode 100644
index 00000000000..ae8cbbdf801
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/run_cqlsh.sh
@@ -0,0 +1 @@
+$ bin/cqlsh
diff --git a/doc/modules/cassandra/examples/BASH/run_cqlsh_nobin.sh b/doc/modules/cassandra/examples/BASH/run_cqlsh_nobin.sh
new file mode 100644
index 00000000000..5517fbf5322
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/run_cqlsh_nobin.sh
@@ -0,0 +1 @@
+$ cqlsh
diff --git a/doc/modules/cassandra/examples/BASH/snapshot_backup2.sh b/doc/modules/cassandra/examples/BASH/snapshot_backup2.sh
new file mode 100644
index 00000000000..6d29f0aaa4f
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/snapshot_backup2.sh
@@ -0,0 +1 @@
+$ nodetool snapshot --tag catalog-ks catalogkeyspace
diff --git a/doc/modules/cassandra/examples/BASH/snapshot_both_backups.sh b/doc/modules/cassandra/examples/BASH/snapshot_both_backups.sh
new file mode 100644
index 00000000000..0966070dbe9
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/snapshot_both_backups.sh
@@ -0,0 +1 @@
+$ nodetool snapshot --tag catalog-cql-ks catalogkeyspace, cqlkeyspace
diff --git a/doc/modules/cassandra/examples/BASH/snapshot_files.sh b/doc/modules/cassandra/examples/BASH/snapshot_files.sh
new file mode 100644
index 00000000000..916f0e5bbc7
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/snapshot_files.sh
@@ -0,0 +1 @@
+$ cd catalog-ks && ls -l
diff --git a/doc/modules/cassandra/examples/BASH/snapshot_mult_ks.sh b/doc/modules/cassandra/examples/BASH/snapshot_mult_ks.sh
new file mode 100644
index 00000000000..fed3d3c1b89
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/snapshot_mult_ks.sh
@@ -0,0 +1 @@
+$ nodetool snapshot --kt-list catalogkeyspace.journal,cqlkeyspace.t --tag multi-ks
diff --git a/doc/modules/cassandra/examples/BASH/snapshot_mult_tables.sh b/doc/modules/cassandra/examples/BASH/snapshot_mult_tables.sh
new file mode 100644
index 00000000000..ad3a0d2b734
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/snapshot_mult_tables.sh
@@ -0,0 +1 @@
+$ nodetool snapshot --kt-list cqlkeyspace.t,cqlkeyspace.t2 --tag multi-table
diff --git a/doc/modules/cassandra/examples/BASH/snapshot_mult_tables_again.sh b/doc/modules/cassandra/examples/BASH/snapshot_mult_tables_again.sh
new file mode 100644
index 00000000000..f676f5bca01
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/snapshot_mult_tables_again.sh
@@ -0,0 +1 @@
+$ nodetool snapshot --kt-list cqlkeyspace.t, cqlkeyspace.t2 --tag multi-table-2
diff --git a/doc/modules/cassandra/examples/BASH/snapshot_one_table.sh b/doc/modules/cassandra/examples/BASH/snapshot_one_table.sh
new file mode 100644
index 00000000000..05484a9deab
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/snapshot_one_table.sh
@@ -0,0 +1 @@
+$ nodetool snapshot --tag --table --
diff --git a/doc/modules/cassandra/examples/BASH/snapshot_one_table2.sh b/doc/modules/cassandra/examples/BASH/snapshot_one_table2.sh
new file mode 100644
index 00000000000..73877102764
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/snapshot_one_table2.sh
@@ -0,0 +1 @@
+$ nodetool snapshot --tag magazine --table magazine catalogkeyspace
diff --git a/doc/modules/cassandra/examples/BASH/start_tarball.sh b/doc/modules/cassandra/examples/BASH/start_tarball.sh
new file mode 100644
index 00000000000..7ec066f95ad
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/start_tarball.sh
@@ -0,0 +1 @@
+$ cd apache-cassandra-4.0.0/ && bin/cassandra
diff --git a/doc/modules/cassandra/examples/BASH/tail_syslog.sh b/doc/modules/cassandra/examples/BASH/tail_syslog.sh
new file mode 100644
index 00000000000..b47575035db
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/tail_syslog.sh
@@ -0,0 +1 @@
+$ tail -f logs/system.log
diff --git a/doc/modules/cassandra/examples/BASH/tail_syslog_package.sh b/doc/modules/cassandra/examples/BASH/tail_syslog_package.sh
new file mode 100644
index 00000000000..c9f00ede057
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/tail_syslog_package.sh
@@ -0,0 +1 @@
+$ tail -f /var/log/cassandra/system.log
diff --git a/doc/modules/cassandra/examples/BASH/tarball.sh b/doc/modules/cassandra/examples/BASH/tarball.sh
new file mode 100644
index 00000000000..4b25ecff09c
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/tarball.sh
@@ -0,0 +1 @@
+$ tar xzvf apache-cassandra-4.0.0-bin.tar.gz
diff --git a/doc/modules/cassandra/examples/BASH/verify_gpg.sh b/doc/modules/cassandra/examples/BASH/verify_gpg.sh
new file mode 100644
index 00000000000..3046e578f16
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/verify_gpg.sh
@@ -0,0 +1 @@
+$ gpg --print-md SHA256 apache-cassandra-4.0.0-bin.tar.gz
diff --git a/doc/modules/cassandra/examples/BASH/yum_cass.sh b/doc/modules/cassandra/examples/BASH/yum_cass.sh
new file mode 100644
index 00000000000..cd8217b112e
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/yum_cass.sh
@@ -0,0 +1 @@
+$ sudo yum install cassandra
diff --git a/doc/modules/cassandra/examples/BASH/yum_start.sh b/doc/modules/cassandra/examples/BASH/yum_start.sh
new file mode 100644
index 00000000000..4930d1ab11f
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/yum_start.sh
@@ -0,0 +1 @@
+$ sudo service cassandra start
diff --git a/doc/modules/cassandra/examples/BASH/yum_update.sh b/doc/modules/cassandra/examples/BASH/yum_update.sh
new file mode 100644
index 00000000000..2e815b2a065
--- /dev/null
+++ b/doc/modules/cassandra/examples/BASH/yum_update.sh
@@ -0,0 +1 @@
+$ sudo yum update
diff --git a/doc/modules/cassandra/examples/BNF/alter_ks.bnf b/doc/modules/cassandra/examples/BNF/alter_ks.bnf
new file mode 100644
index 00000000000..5f82d34e43a
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/alter_ks.bnf
@@ -0,0 +1,2 @@
+alter_keyspace_statement::= ALTER KEYSPACE keyspace_name
+ WITH options
diff --git a/doc/modules/cassandra/examples/BNF/alter_mv_statement.bnf b/doc/modules/cassandra/examples/BNF/alter_mv_statement.bnf
new file mode 100644
index 00000000000..ff97edb9617
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/alter_mv_statement.bnf
@@ -0,0 +1 @@
+alter_materialized_view_statement::= ALTER MATERIALIZED VIEW view_name WITH table_options
diff --git a/doc/modules/cassandra/examples/BNF/alter_role_statement.bnf b/doc/modules/cassandra/examples/BNF/alter_role_statement.bnf
new file mode 100644
index 00000000000..36958d7fa90
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/alter_role_statement.bnf
@@ -0,0 +1 @@
+alter_role_statement ::= ALTER ROLE role_name WITH role_options
diff --git a/doc/modules/cassandra/examples/BNF/alter_table.bnf b/doc/modules/cassandra/examples/BNF/alter_table.bnf
new file mode 100644
index 00000000000..bf1b4b7ab53
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/alter_table.bnf
@@ -0,0 +1,4 @@
+alter_table_statement::= ALTER TABLE table_name alter_table_instruction
+alter_table_instruction::= ADD column_name cql_type ( ',' column_name cql_type )*
+ | DROP column_name ( column_name )*
+ | WITH options
diff --git a/doc/modules/cassandra/examples/BNF/alter_udt_statement.bnf b/doc/modules/cassandra/examples/BNF/alter_udt_statement.bnf
new file mode 100644
index 00000000000..4f409e609e5
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/alter_udt_statement.bnf
@@ -0,0 +1,3 @@
+alter_type_statement::= ALTER TYPE udt_name alter_type_modification
+alter_type_modification::= ADD field_definition
+ | RENAME identifier TO identifier( identifier TO identifier )*
diff --git a/doc/modules/cassandra/examples/BNF/alter_user_statement.bnf b/doc/modules/cassandra/examples/BNF/alter_user_statement.bnf
new file mode 100644
index 00000000000..129607c1bca
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/alter_user_statement.bnf
@@ -0,0 +1 @@
+alter_user_statement ::= ALTER USER role_name [ WITH PASSWORD string] [ user_option]
diff --git a/doc/modules/cassandra/examples/BNF/batch_statement.bnf b/doc/modules/cassandra/examples/BNF/batch_statement.bnf
new file mode 100644
index 00000000000..2cc2559bfe4
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/batch_statement.bnf
@@ -0,0 +1,5 @@
+batch_statement ::= BEGIN [ UNLOGGED | COUNTER ] BATCH
+ [ USING update_parameter( AND update_parameter)* ]
+ modification_statement ( ';' modification_statement )*
+ APPLY BATCH
+modification_statement ::= insert_statement | update_statement | delete_statement
diff --git a/doc/modules/cassandra/examples/BNF/collection_literal.bnf b/doc/modules/cassandra/examples/BNF/collection_literal.bnf
new file mode 100644
index 00000000000..83a46a24052
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/collection_literal.bnf
@@ -0,0 +1,4 @@
+collection_literal::= map_literal | set_literal | list_literal
+map_literal::= '\{' [ term ':' term (',' term : term)* ] '}'
+set_literal::= '\{' [ term (',' term)* ] '}'
+list_literal::= '[' [ term (',' term)* ] ']'
diff --git a/doc/modules/cassandra/examples/BNF/collection_type.bnf b/doc/modules/cassandra/examples/BNF/collection_type.bnf
new file mode 100644
index 00000000000..37e6cd1de89
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/collection_type.bnf
@@ -0,0 +1,3 @@
+collection_type::= MAP '<' cql_type',' cql_type'>'
+ | SET '<' cql_type '>'
+ | LIST '<' cql_type'>'
diff --git a/doc/modules/cassandra/examples/BNF/column.bnf b/doc/modules/cassandra/examples/BNF/column.bnf
new file mode 100644
index 00000000000..136a45c7e5d
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/column.bnf
@@ -0,0 +1 @@
+column_name::= identifier
diff --git a/doc/modules/cassandra/examples/BNF/constant.bnf b/doc/modules/cassandra/examples/BNF/constant.bnf
new file mode 100644
index 00000000000..4a2953aa21f
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/constant.bnf
@@ -0,0 +1,8 @@
+constant::= string | integer | float | boolean | uuid | blob | NULL
+string::= ''' (any character where ' can appear if doubled)+ ''' : '$$' (any character other than '$$') '$$'
+integer::= re('-?[0-9]+')
+float::= re('-?[0-9]+(.[0-9]*)?([eE][+-]?[0-9+])?') | NAN | INFINITY
+boolean::= TRUE | FALSE
+uuid::= hex\{8}-hex\{4}-hex\{4}-hex\{4}-hex\{12}
+hex::= re("[0-9a-fA-F]")
+blob::= '0' ('x' | 'X') hex+
diff --git a/doc/modules/cassandra/examples/BNF/cql_statement.bnf b/doc/modules/cassandra/examples/BNF/cql_statement.bnf
new file mode 100644
index 00000000000..8d4ae214830
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/cql_statement.bnf
@@ -0,0 +1,48 @@
+cql_statement::= statement [ ';' ]
+statement:=: ddl_statement :
+ | dml_statement
+ | secondary_index_statement
+ | materialized_view_statement
+ | role_or_permission_statement
+ | udf_statement
+ | udt_statement
+ | trigger_statement
+ddl_statement::= use_statement
+ | create_keyspace_statement
+ | alter_keyspace_statement
+ | drop_keyspace_statement
+ | create_table_statement
+ | alter_table_statement
+ | drop_table_statement
+ | truncate_statement
+dml_statement::= select_statement
+ | insert_statement
+ | update_statement
+ | delete_statement
+ | batch_statement
+secondary_index_statement::= create_index_statement
+ | drop_index_statement
+materialized_view_statement::= create_materialized_view_statement
+ | drop_materialized_view_statement
+role_or_permission_statement::= create_role_statement
+ | alter_role_statement
+ | drop_role_statement
+ | grant_role_statement
+ | revoke_role_statement
+ | list_roles_statement
+ | grant_permission_statement
+ | revoke_permission_statement
+ | list_permissions_statement
+ | create_user_statement
+ | alter_user_statement
+ | drop_user_statement
+ | list_users_statement
+udf_statement::= create_function_statement
+ | drop_function_statement
+ | create_aggregate_statement
+ | drop_aggregate_statement
+udt_statement::= create_type_statement
+ | alter_type_statement
+ | drop_type_statement
+trigger_statement::= create_trigger_statement
+ | drop_trigger_statement
diff --git a/doc/modules/cassandra/examples/BNF/cql_type.bnf b/doc/modules/cassandra/examples/BNF/cql_type.bnf
new file mode 100644
index 00000000000..4e2e5d1765d
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/cql_type.bnf
@@ -0,0 +1 @@
+cql_type::= native_type| collection_type| user_defined_type | tuple_type | custom_type
diff --git a/doc/modules/cassandra/examples/BNF/create_aggregate_statement.bnf b/doc/modules/cassandra/examples/BNF/create_aggregate_statement.bnf
new file mode 100644
index 00000000000..c0126a23ffd
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/create_aggregate_statement.bnf
@@ -0,0 +1,6 @@
+create_aggregate_statement ::= CREATE [ OR REPLACE ] AGGREGATE [ IF NOT EXISTS ]
+ function_name '(' arguments_signature')'
+ SFUNC function_name
+ STYPE cql_type:
+ [ FINALFUNC function_name]
+ [ INITCOND term ]
diff --git a/doc/modules/cassandra/examples/BNF/create_function_statement.bnf b/doc/modules/cassandra/examples/BNF/create_function_statement.bnf
new file mode 100644
index 00000000000..0da769a11fb
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/create_function_statement.bnf
@@ -0,0 +1,6 @@
+create_function_statement::= CREATE [ OR REPLACE ] FUNCTION [ IF NOT EXISTS]
+ function_name '(' arguments_declaration ')'
+ [ CALLED | RETURNS NULL ] ON NULL INPUT
+ RETURNS cql_type
+ LANGUAGE identifier
+ AS string arguments_declaration: identifier cql_type ( ',' identifier cql_type )*
diff --git a/doc/modules/cassandra/examples/BNF/create_index_statement.bnf b/doc/modules/cassandra/examples/BNF/create_index_statement.bnf
new file mode 100644
index 00000000000..6e769472434
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/create_index_statement.bnf
@@ -0,0 +1,5 @@
+create_index_statement::= CREATE [ CUSTOM ] INDEX [ IF NOT EXISTS ] [ index_name ]
+ ON table_name '(' index_identifier ')'
+ [ USING string [ WITH OPTIONS = map_literal ] ]
+index_identifier::= column_name
+ | ( KEYS | VALUES | ENTRIES | FULL ) '(' column_name ')'
diff --git a/doc/modules/cassandra/examples/BNF/create_ks.bnf b/doc/modules/cassandra/examples/BNF/create_ks.bnf
new file mode 100644
index 00000000000..ba3e240e0fa
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/create_ks.bnf
@@ -0,0 +1,2 @@
+create_keyspace_statement::= CREATE KEYSPACE [ IF NOT EXISTS ] keyspace_name
+ WITH options
diff --git a/doc/modules/cassandra/examples/BNF/create_mv_statement.bnf b/doc/modules/cassandra/examples/BNF/create_mv_statement.bnf
new file mode 100644
index 00000000000..9bdb60dc5b1
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/create_mv_statement.bnf
@@ -0,0 +1,4 @@
+create_materialized_view_statement::= CREATE MATERIALIZED VIEW [ IF NOT EXISTS ] view_name
+ AS select_statement
+ PRIMARY KEY '(' primary_key')'
+ WITH table_options
diff --git a/doc/modules/cassandra/examples/BNF/create_role_statement.bnf b/doc/modules/cassandra/examples/BNF/create_role_statement.bnf
new file mode 100644
index 00000000000..bc93fbca3bc
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/create_role_statement.bnf
@@ -0,0 +1,9 @@
+create_role_statement ::= CREATE ROLE [ IF NOT EXISTS ] role_name
+ [ WITH role_options# ]
+role_options ::= role_option ( AND role_option)*
+role_option ::= PASSWORD '=' string
+ | LOGIN '=' boolean
+ | SUPERUSER '=' boolean
+ | OPTIONS '=' map_literal
+ | ACCESS TO DATACENTERS set_literal
+ | ACCESS TO ALL DATACENTERS
diff --git a/doc/modules/cassandra/examples/BNF/create_table.bnf b/doc/modules/cassandra/examples/BNF/create_table.bnf
new file mode 100644
index 00000000000..840573c7b08
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/create_table.bnf
@@ -0,0 +1,12 @@
+create_table_statement::= CREATE TABLE [ IF NOT EXISTS ] table_name '('
+ column_definition ( ',' column_definition )*
+ [ ',' PRIMARY KEY '(' primary_key ')' ]
+ ')' [ WITH table_options ]
+column_definition::= column_name cql_type [ STATIC ] [ PRIMARY KEY]
+primary_key::= partition_key [ ',' clustering_columns ]
+partition_key::= column_name | '(' column_name ( ',' column_name )* ')'
+clustering_columns::= column_name ( ',' column_name )*
+table_options:=: COMPACT STORAGE [ AND table_options ]
+ | CLUSTERING ORDER BY '(' clustering_order ')'
+ [ AND table_options ] | options
+clustering_order::= column_name (ASC | DESC) ( ',' column_name (ASC | DESC) )*
diff --git a/doc/modules/cassandra/examples/BNF/create_trigger_statement.bnf b/doc/modules/cassandra/examples/BNF/create_trigger_statement.bnf
new file mode 100644
index 00000000000..f7442da15d7
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/create_trigger_statement.bnf
@@ -0,0 +1,3 @@
+create_trigger_statement ::= CREATE TRIGGER [ IF NOT EXISTS ] trigger_name
+ ON table_name
+ USING string
diff --git a/doc/modules/cassandra/examples/BNF/create_type.bnf b/doc/modules/cassandra/examples/BNF/create_type.bnf
new file mode 100644
index 00000000000..aebe9ebfbac
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/create_type.bnf
@@ -0,0 +1,3 @@
+create_type_statement::= CREATE TYPE [ IF NOT EXISTS ] udt_name
+ '(' field_definition ( ',' field_definition)* ')'
+field_definition::= identifier cql_type
diff --git a/doc/modules/cassandra/examples/BNF/create_user_statement.bnf b/doc/modules/cassandra/examples/BNF/create_user_statement.bnf
new file mode 100644
index 00000000000..19f9903921e
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/create_user_statement.bnf
@@ -0,0 +1,4 @@
+create_user_statement ::= CREATE USER [ IF NOT EXISTS ] role_name
+ [ WITH PASSWORD string ]
+ [ user_option ]
+user_option: SUPERUSER | NOSUPERUSER
diff --git a/doc/modules/cassandra/examples/BNF/custom_type.bnf b/doc/modules/cassandra/examples/BNF/custom_type.bnf
new file mode 100644
index 00000000000..ce4890f6176
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/custom_type.bnf
@@ -0,0 +1 @@
+custom_type::= string
diff --git a/doc/modules/cassandra/examples/BNF/delete_statement.bnf b/doc/modules/cassandra/examples/BNF/delete_statement.bnf
new file mode 100644
index 00000000000..5f456ba2ded
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/delete_statement.bnf
@@ -0,0 +1,5 @@
+delete_statement::= DELETE [ simple_selection ( ',' simple_selection ) ]
+ FROM table_name
+ [ USING update_parameter ( AND update_parameter# )* ]
+ WHERE where_clause
+ [ IF ( EXISTS | condition ( AND condition)*) ]
diff --git a/doc/modules/cassandra/examples/BNF/drop_aggregate_statement.bnf b/doc/modules/cassandra/examples/BNF/drop_aggregate_statement.bnf
new file mode 100644
index 00000000000..28e8a4fcb93
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/drop_aggregate_statement.bnf
@@ -0,0 +1,2 @@
+drop_aggregate_statement::= DROP AGGREGATE [ IF EXISTS ] function_name[ '(' arguments_signature ')'
+]
diff --git a/doc/modules/cassandra/examples/BNF/drop_function_statement.bnf b/doc/modules/cassandra/examples/BNF/drop_function_statement.bnf
new file mode 100644
index 00000000000..2639bd0d66f
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/drop_function_statement.bnf
@@ -0,0 +1,2 @@
+drop_function_statement::= DROP FUNCTION [ IF EXISTS ] function_name [ '(' arguments_signature ')' ]
+arguments_signature::= cql_type ( ',' cql_type )*
diff --git a/doc/modules/cassandra/examples/BNF/drop_index_statement.bnf b/doc/modules/cassandra/examples/BNF/drop_index_statement.bnf
new file mode 100644
index 00000000000..49f36d1eb32
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/drop_index_statement.bnf
@@ -0,0 +1 @@
+drop_index_statement::= DROP INDEX [ IF EXISTS ] index_name
diff --git a/doc/modules/cassandra/examples/BNF/drop_ks.bnf b/doc/modules/cassandra/examples/BNF/drop_ks.bnf
new file mode 100644
index 00000000000..4e21b7bbce3
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/drop_ks.bnf
@@ -0,0 +1 @@
+drop_keyspace_statement::= DROP KEYSPACE [ IF EXISTS ] keyspace_name
diff --git a/doc/modules/cassandra/examples/BNF/drop_mv_statement.bnf b/doc/modules/cassandra/examples/BNF/drop_mv_statement.bnf
new file mode 100644
index 00000000000..1a9d8dc980e
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/drop_mv_statement.bnf
@@ -0,0 +1 @@
+drop_materialized_view_statement::= DROP MATERIALIZED VIEW [ IF EXISTS ] view_name;
diff --git a/doc/modules/cassandra/examples/BNF/drop_role_statement.bnf b/doc/modules/cassandra/examples/BNF/drop_role_statement.bnf
new file mode 100644
index 00000000000..15e1791d72c
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/drop_role_statement.bnf
@@ -0,0 +1 @@
+drop_role_statement ::= DROP ROLE [ IF EXISTS ] role_name
diff --git a/doc/modules/cassandra/examples/BNF/drop_table.bnf b/doc/modules/cassandra/examples/BNF/drop_table.bnf
new file mode 100644
index 00000000000..cabd17a42cf
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/drop_table.bnf
@@ -0,0 +1 @@
+drop_table_statement::= DROP TABLE [ IF EXISTS ] table_name
diff --git a/doc/modules/cassandra/examples/BNF/drop_trigger_statement.bnf b/doc/modules/cassandra/examples/BNF/drop_trigger_statement.bnf
new file mode 100644
index 00000000000..c1d3e594230
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/drop_trigger_statement.bnf
@@ -0,0 +1 @@
+drop_trigger_statement ::= DROP TRIGGER [ IF EXISTS ] trigger_nameON table_name
diff --git a/doc/modules/cassandra/examples/BNF/drop_udt_statement.bnf b/doc/modules/cassandra/examples/BNF/drop_udt_statement.bnf
new file mode 100644
index 00000000000..276b57c60b8
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/drop_udt_statement.bnf
@@ -0,0 +1 @@
+drop_type_statement::= DROP TYPE [ IF EXISTS ] udt_name
diff --git a/doc/modules/cassandra/examples/BNF/drop_user_statement.bnf b/doc/modules/cassandra/examples/BNF/drop_user_statement.bnf
new file mode 100644
index 00000000000..9b226083d1a
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/drop_user_statement.bnf
@@ -0,0 +1 @@
+drop_user_statement ::= DROP USER [ IF EXISTS ] role_name
diff --git a/doc/modules/cassandra/examples/BNF/function.bnf b/doc/modules/cassandra/examples/BNF/function.bnf
new file mode 100644
index 00000000000..7e054306ed7
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/function.bnf
@@ -0,0 +1 @@
+function_name ::= [ keyspace_name'.' ] name
diff --git a/doc/modules/cassandra/examples/BNF/grant_permission_statement.bnf b/doc/modules/cassandra/examples/BNF/grant_permission_statement.bnf
new file mode 100644
index 00000000000..40f1df32703
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/grant_permission_statement.bnf
@@ -0,0 +1,12 @@
+grant_permission_statement ::= GRANT permissions ON resource TO role_name
+permissions ::= ALL [ PERMISSIONS ] | permission [ PERMISSION ]
+permission ::= CREATE | ALTER | DROP | SELECT | MODIFY | AUTHORIZE | DESCRIBE | EXECUTE
+resource ::= ALL KEYSPACES
+ | KEYSPACE keyspace_name
+ | [ TABLE ] table_name
+ | ALL ROLES
+ | ROLE role_name
+ | ALL FUNCTIONS [ IN KEYSPACE keyspace_name ]
+ | FUNCTION function_name '(' [ cql_type( ',' cql_type )* ] ')'
+ | ALL MBEANS
+ | ( MBEAN | MBEANS ) string
diff --git a/doc/modules/cassandra/examples/BNF/grant_role_statement.bnf b/doc/modules/cassandra/examples/BNF/grant_role_statement.bnf
new file mode 100644
index 00000000000..d965cc2658b
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/grant_role_statement.bnf
@@ -0,0 +1 @@
+grant_role_statement ::= GRANT role_name TO role_name
diff --git a/doc/modules/cassandra/examples/BNF/identifier.bnf b/doc/modules/cassandra/examples/BNF/identifier.bnf
new file mode 100644
index 00000000000..7bc34314f93
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/identifier.bnf
@@ -0,0 +1,3 @@
+identifier::= unquoted_identifier | quoted_identifier
+unquoted_identifier::= re('[a-zA-Z][link:[a-zA-Z0-9]]*')
+quoted_identifier::= '"' (any character where " can appear if doubled)+ '"'
diff --git a/doc/modules/cassandra/examples/BNF/index_name.bnf b/doc/modules/cassandra/examples/BNF/index_name.bnf
new file mode 100644
index 00000000000..c322755839a
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/index_name.bnf
@@ -0,0 +1 @@
+index_name::= re('[a-zA-Z_0-9]+')
diff --git a/doc/modules/cassandra/examples/BNF/insert_statement.bnf b/doc/modules/cassandra/examples/BNF/insert_statement.bnf
new file mode 100644
index 00000000000..ed80c3ed05b
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/insert_statement.bnf
@@ -0,0 +1,6 @@
+insert_statement::= INSERT INTO table_name ( names_values | json_clause )
+ [ IF NOT EXISTS ]
+ [ USING update_parameter ( AND update_parameter )* ]
+names_values::= names VALUES tuple_literal
+json_clause::= JSON string [ DEFAULT ( NULL | UNSET ) ]
+names::= '(' column_name ( ',' column_name )* ')'
diff --git a/doc/modules/cassandra/examples/BNF/ks_table.bnf b/doc/modules/cassandra/examples/BNF/ks_table.bnf
new file mode 100644
index 00000000000..20ee6dababc
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/ks_table.bnf
@@ -0,0 +1,5 @@
+keyspace_name::= name
+table_name::= [keyspace_name '.' ] name
+name::= unquoted_name | quoted_name
+unquoted_name::= re('[a-zA-Z_0-9]\{1, 48}')
+quoted_name::= '"' unquoted_name '"'
diff --git a/doc/modules/cassandra/examples/BNF/list_permissions_statement.bnf b/doc/modules/cassandra/examples/BNF/list_permissions_statement.bnf
new file mode 100644
index 00000000000..a11e2cc01d7
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/list_permissions_statement.bnf
@@ -0,0 +1 @@
+list_permissions_statement ::= LIST permissions [ ON resource] [ OF role_name[ NORECURSIVE ] ]
diff --git a/doc/modules/cassandra/examples/BNF/list_roles_statement.bnf b/doc/modules/cassandra/examples/BNF/list_roles_statement.bnf
new file mode 100644
index 00000000000..bbe3d9b500e
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/list_roles_statement.bnf
@@ -0,0 +1 @@
+list_roles_statement ::= LIST ROLES [ OF role_name] [ NORECURSIVE ]
diff --git a/doc/modules/cassandra/examples/BNF/list_users_statement.bnf b/doc/modules/cassandra/examples/BNF/list_users_statement.bnf
new file mode 100644
index 00000000000..5750de6c0ca
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/list_users_statement.bnf
@@ -0,0 +1 @@
+list_users_statement::= LIST USERS
diff --git a/doc/modules/cassandra/examples/BNF/native_type.bnf b/doc/modules/cassandra/examples/BNF/native_type.bnf
new file mode 100644
index 00000000000..c4e9c268db3
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/native_type.bnf
@@ -0,0 +1,4 @@
+native_type::= ASCII | BIGINT | BLOB | BOOLEAN | COUNTER | DATE
+| DECIMAL | DOUBLE | DURATION | FLOAT | INET | INT |
+SMALLINT | TEXT | TIME | TIMESTAMP | TIMEUUID | TINYINT |
+UUID | VARCHAR | VARINT
diff --git a/doc/modules/cassandra/examples/BNF/options.bnf b/doc/modules/cassandra/examples/BNF/options.bnf
new file mode 100644
index 00000000000..9887165a263
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/options.bnf
@@ -0,0 +1,4 @@
+options::= option ( AND option )*
+option::= identifier '=' ( identifier
+ | constant
+ | map_literal )
diff --git a/doc/modules/cassandra/examples/BNF/revoke_permission_statement.bnf b/doc/modules/cassandra/examples/BNF/revoke_permission_statement.bnf
new file mode 100644
index 00000000000..fd061f9394d
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/revoke_permission_statement.bnf
@@ -0,0 +1 @@
+revoke_permission_statement ::= REVOKE permissions ON resource FROM role_name
diff --git a/doc/modules/cassandra/examples/BNF/revoke_role_statement.bnf b/doc/modules/cassandra/examples/BNF/revoke_role_statement.bnf
new file mode 100644
index 00000000000..c344eb006f2
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/revoke_role_statement.bnf
@@ -0,0 +1 @@
+revoke_role_statement ::= REVOKE role_name FROM role_name
diff --git a/doc/modules/cassandra/examples/BNF/role_name.bnf b/doc/modules/cassandra/examples/BNF/role_name.bnf
new file mode 100644
index 00000000000..103f84bd26f
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/role_name.bnf
@@ -0,0 +1 @@
+role_name ::= identifier | string
diff --git a/doc/modules/cassandra/examples/BNF/select_statement.bnf b/doc/modules/cassandra/examples/BNF/select_statement.bnf
new file mode 100644
index 00000000000..f53da41da57
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/select_statement.bnf
@@ -0,0 +1,21 @@
+select_statement::= SELECT [ JSON | DISTINCT ] ( select_clause | '*' )
+ FROM `table_name`
+ [ WHERE `where_clause` ]
+ [ GROUP BY `group_by_clause` ]
+ [ ORDER BY `ordering_clause` ]
+ [ PER PARTITION LIMIT (`integer` | `bind_marker`) ]
+ [ LIMIT (`integer` | `bind_marker`) ]
+ [ ALLOW FILTERING ]
+select_clause::= `selector` [ AS `identifier` ] ( ',' `selector` [ AS `identifier` ] )
+selector::== `column_name`
+ | `term`
+ | CAST '(' `selector` AS `cql_type` ')'
+ | `function_name` '(' [ `selector` ( ',' `selector` )_ ] ')'
+ | COUNT '(' '_' ')'
+where_clause::= `relation` ( AND `relation` )*
+relation::= column_name operator term
+ '(' column_name ( ',' column_name )* ')' operator tuple_literal
+ TOKEN '(' column_name# ( ',' column_name )* ')' operator term
+operator::= '=' | '<' | '>' | '<=' | '>=' | '!=' | IN | CONTAINS | CONTAINS KEY
+group_by_clause::= column_name ( ',' column_name )*
+ordering_clause::= column_name [ ASC | DESC ] ( ',' column_name [ ASC | DESC ] )*
diff --git a/doc/modules/cassandra/examples/BNF/term.bnf b/doc/modules/cassandra/examples/BNF/term.bnf
new file mode 100644
index 00000000000..504c4c40d8e
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/term.bnf
@@ -0,0 +1,6 @@
+term::= constant | literal | function_call | arithmetic_operation | type_hint | bind_marker
+literal::= collection_literal | udt_literal | tuple_literal
+function_call::= identifier '(' [ term (',' term)* ] ')'
+arithmetic_operation::= '-' term | term ('+' | '-' | '*' | '/' | '%') term
+type_hint::= '(' cql_type ')' term
+bind_marker::= '?' | ':' identifier
diff --git a/doc/modules/cassandra/examples/BNF/trigger_name.bnf b/doc/modules/cassandra/examples/BNF/trigger_name.bnf
new file mode 100644
index 00000000000..18a4a7e2223
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/trigger_name.bnf
@@ -0,0 +1 @@
+trigger_name ::= identifier
diff --git a/doc/modules/cassandra/examples/BNF/truncate_table.bnf b/doc/modules/cassandra/examples/BNF/truncate_table.bnf
new file mode 100644
index 00000000000..9c7d3012a2a
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/truncate_table.bnf
@@ -0,0 +1 @@
+truncate_statement::= TRUNCATE [ TABLE ] table_name
diff --git a/doc/modules/cassandra/examples/BNF/tuple.bnf b/doc/modules/cassandra/examples/BNF/tuple.bnf
new file mode 100644
index 00000000000..f339d575846
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/tuple.bnf
@@ -0,0 +1,2 @@
+tuple_type::= TUPLE '<' cql_type( ',' cql_type)* '>'
+tuple_literal::= '(' term( ',' term )* ')'
diff --git a/doc/modules/cassandra/examples/BNF/udt.bnf b/doc/modules/cassandra/examples/BNF/udt.bnf
new file mode 100644
index 00000000000..c06a5f638b7
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/udt.bnf
@@ -0,0 +1,2 @@
+user_defined_type::= udt_name
+udt_name::= [ keyspace_name '.' ] identifier
diff --git a/doc/modules/cassandra/examples/BNF/udt_literal.bnf b/doc/modules/cassandra/examples/BNF/udt_literal.bnf
new file mode 100644
index 00000000000..8c996e5ed14
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/udt_literal.bnf
@@ -0,0 +1 @@
+udt_literal::= '{' identifier ':' term ( ',' identifier ':' term)* '}'
diff --git a/doc/modules/cassandra/examples/BNF/update_statement.bnf b/doc/modules/cassandra/examples/BNF/update_statement.bnf
new file mode 100644
index 00000000000..1a9bdb48544
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/update_statement.bnf
@@ -0,0 +1,13 @@
+update_statement ::= UPDATE table_name
+ [ USING update_parameter ( AND update_parameter )* ]
+ SET assignment( ',' assignment )*
+ WHERE where_clause
+ [ IF ( EXISTS | condition ( AND condition)*) ]
+update_parameter ::= ( TIMESTAMP | TTL ) ( integer | bind_marker )
+assignment: simple_selection'=' term
+ `| column_name'=' column_name ( '+' | '-' ) term
+ | column_name'=' list_literal'+' column_name
+simple_selection ::= column_name
+ | column_name '[' term']'
+ | column_name'.' field_name
+condition ::= `simple_selection operator term
diff --git a/doc/modules/cassandra/examples/BNF/use_ks.bnf b/doc/modules/cassandra/examples/BNF/use_ks.bnf
new file mode 100644
index 00000000000..0347e52d5a2
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/use_ks.bnf
@@ -0,0 +1 @@
+use_statement::= USE keyspace_name
diff --git a/doc/modules/cassandra/examples/BNF/view_name.bnf b/doc/modules/cassandra/examples/BNF/view_name.bnf
new file mode 100644
index 00000000000..69253677dd8
--- /dev/null
+++ b/doc/modules/cassandra/examples/BNF/view_name.bnf
@@ -0,0 +1 @@
+view_name::= re('[a-zA-Z_0-9]+')
diff --git a/doc/modules/cassandra/examples/CQL/allow_filtering.cql b/doc/modules/cassandra/examples/CQL/allow_filtering.cql
new file mode 100644
index 00000000000..c3bf3c69e16
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/allow_filtering.cql
@@ -0,0 +1,9 @@
+CREATE TABLE users (
+ username text PRIMARY KEY,
+ firstname text,
+ lastname text,
+ birth_year int,
+ country text
+);
+
+CREATE INDEX ON users(birth_year);
diff --git a/doc/modules/cassandra/examples/CQL/alter_ks.cql b/doc/modules/cassandra/examples/CQL/alter_ks.cql
new file mode 100644
index 00000000000..319ed241b1d
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/alter_ks.cql
@@ -0,0 +1,2 @@
+ALTER KEYSPACE excelsior
+ WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 4};
diff --git a/doc/modules/cassandra/examples/CQL/alter_role.cql b/doc/modules/cassandra/examples/CQL/alter_role.cql
new file mode 100644
index 00000000000..c5f7d3d3991
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/alter_role.cql
@@ -0,0 +1 @@
+ALTER ROLE bob WITH PASSWORD = 'PASSWORD_B' AND SUPERUSER = false;
diff --git a/doc/modules/cassandra/examples/CQL/alter_table_add_column.cql b/doc/modules/cassandra/examples/CQL/alter_table_add_column.cql
new file mode 100644
index 00000000000..e7703ed6ec5
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/alter_table_add_column.cql
@@ -0,0 +1 @@
+ALTER TABLE addamsFamily ADD gravesite varchar;
diff --git a/doc/modules/cassandra/examples/CQL/alter_table_spec_retry.cql b/doc/modules/cassandra/examples/CQL/alter_table_spec_retry.cql
new file mode 100644
index 00000000000..bb9aa618402
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/alter_table_spec_retry.cql
@@ -0,0 +1 @@
+ALTER TABLE users WITH speculative_retry = '10ms';
diff --git a/doc/modules/cassandra/examples/CQL/alter_table_spec_retry_percent.cql b/doc/modules/cassandra/examples/CQL/alter_table_spec_retry_percent.cql
new file mode 100644
index 00000000000..a5351c68feb
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/alter_table_spec_retry_percent.cql
@@ -0,0 +1 @@
+ALTER TABLE users WITH speculative_retry = '99PERCENTILE';
diff --git a/doc/modules/cassandra/examples/CQL/alter_table_with_comment.cql b/doc/modules/cassandra/examples/CQL/alter_table_with_comment.cql
new file mode 100644
index 00000000000..9b82d7243f2
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/alter_table_with_comment.cql
@@ -0,0 +1,2 @@
+ALTER TABLE addamsFamily
+ WITH comment = 'A most excellent and useful table';
diff --git a/doc/modules/cassandra/examples/CQL/alter_user.cql b/doc/modules/cassandra/examples/CQL/alter_user.cql
new file mode 100644
index 00000000000..97de7ba1dd7
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/alter_user.cql
@@ -0,0 +1,2 @@
+ALTER USER alice WITH PASSWORD 'PASSWORD_A';
+ALTER USER bob SUPERUSER;
diff --git a/doc/modules/cassandra/examples/CQL/as.cql b/doc/modules/cassandra/examples/CQL/as.cql
new file mode 100644
index 00000000000..a8b9f035689
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/as.cql
@@ -0,0 +1,13 @@
+// Without alias
+SELECT intAsBlob(4) FROM t;
+
+// intAsBlob(4)
+// --------------
+// 0x00000004
+
+// With alias
+SELECT intAsBlob(4) AS four FROM t;
+
+// four
+// ------------
+// 0x00000004
diff --git a/doc/modules/cassandra/examples/CQL/autoexpand_exclude_dc.cql b/doc/modules/cassandra/examples/CQL/autoexpand_exclude_dc.cql
new file mode 100644
index 00000000000..c320c52fc15
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/autoexpand_exclude_dc.cql
@@ -0,0 +1,4 @@
+CREATE KEYSPACE excalibur
+ WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor' : 3, 'DC2': 0};
+
+DESCRIBE KEYSPACE excalibur;
diff --git a/doc/modules/cassandra/examples/CQL/autoexpand_ks.cql b/doc/modules/cassandra/examples/CQL/autoexpand_ks.cql
new file mode 100644
index 00000000000..d5bef55acad
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/autoexpand_ks.cql
@@ -0,0 +1,4 @@
+CREATE KEYSPACE excalibur
+ WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor' : 3};
+
+DESCRIBE KEYSPACE excalibur;
diff --git a/doc/modules/cassandra/examples/CQL/autoexpand_ks_override.cql b/doc/modules/cassandra/examples/CQL/autoexpand_ks_override.cql
new file mode 100644
index 00000000000..d6800fbe051
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/autoexpand_ks_override.cql
@@ -0,0 +1,4 @@
+CREATE KEYSPACE excalibur
+ WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor' : 3, 'DC2': 2};
+
+DESCRIBE KEYSPACE excalibur;
diff --git a/doc/modules/cassandra/examples/CQL/avg.cql b/doc/modules/cassandra/examples/CQL/avg.cql
new file mode 100644
index 00000000000..2882327520e
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/avg.cql
@@ -0,0 +1 @@
+SELECT AVG (players) FROM plays;
diff --git a/doc/modules/cassandra/examples/CQL/batch_statement.cql b/doc/modules/cassandra/examples/CQL/batch_statement.cql
new file mode 100644
index 00000000000..e9148e82410
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/batch_statement.cql
@@ -0,0 +1,6 @@
+BEGIN BATCH
+ INSERT INTO users (userid, password, name) VALUES ('user2', 'ch@ngem3b', 'second user');
+ UPDATE users SET password = 'ps22dhds' WHERE userid = 'user3';
+ INSERT INTO users (userid, password) VALUES ('user4', 'ch@ngem3c');
+ DELETE name FROM users WHERE userid = 'user1';
+APPLY BATCH;
diff --git a/doc/modules/cassandra/examples/CQL/caching_option.cql b/doc/modules/cassandra/examples/CQL/caching_option.cql
new file mode 100644
index 00000000000..b48b171ec32
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/caching_option.cql
@@ -0,0 +1,6 @@
+CREATE TABLE simple (
+id int,
+key text,
+value text,
+PRIMARY KEY (key, value)
+) WITH caching = {'keys': 'ALL', 'rows_per_partition': 10};
diff --git a/doc/modules/cassandra/examples/CQL/chunk_length.cql b/doc/modules/cassandra/examples/CQL/chunk_length.cql
new file mode 100644
index 00000000000..b3504fe0409
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/chunk_length.cql
@@ -0,0 +1,6 @@
+CREATE TABLE simple (
+ id int,
+ key text,
+ value text,
+ PRIMARY KEY (key, value)
+) WITH compression = {'class': 'LZ4Compressor', 'chunk_length_in_kb': 4};
diff --git a/doc/modules/cassandra/examples/CQL/count.cql b/doc/modules/cassandra/examples/CQL/count.cql
new file mode 100644
index 00000000000..1993c0e4a67
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/count.cql
@@ -0,0 +1,2 @@
+SELECT COUNT (*) FROM plays;
+SELECT COUNT (1) FROM plays;
diff --git a/doc/modules/cassandra/examples/CQL/count_nonnull.cql b/doc/modules/cassandra/examples/CQL/count_nonnull.cql
new file mode 100644
index 00000000000..6543b996326
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/count_nonnull.cql
@@ -0,0 +1 @@
+SELECT COUNT (scores) FROM plays;
diff --git a/doc/modules/cassandra/examples/CQL/create_function.cql b/doc/modules/cassandra/examples/CQL/create_function.cql
new file mode 100644
index 00000000000..e7d5823a0b3
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/create_function.cql
@@ -0,0 +1,15 @@
+CREATE OR REPLACE FUNCTION somefunction(somearg int, anotherarg text, complexarg frozen, listarg list)
+ RETURNS NULL ON NULL INPUT
+ RETURNS text
+ LANGUAGE java
+ AS $$
+ // some Java code
+ $$;
+
+CREATE FUNCTION IF NOT EXISTS akeyspace.fname(someArg int)
+ CALLED ON NULL INPUT
+ RETURNS text
+ LANGUAGE java
+ AS $$
+ // some Java code
+ $$;
diff --git a/doc/modules/cassandra/examples/CQL/create_index.cql b/doc/modules/cassandra/examples/CQL/create_index.cql
new file mode 100644
index 00000000000..f84452aa1d5
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/create_index.cql
@@ -0,0 +1,8 @@
+CREATE INDEX userIndex ON NerdMovies (user);
+CREATE INDEX ON Mutants (abilityId);
+CREATE INDEX ON users (keys(favs));
+CREATE CUSTOM INDEX ON users (email)
+ USING 'path.to.the.IndexClass';
+CREATE CUSTOM INDEX ON users (email)
+ USING 'path.to.the.IndexClass'
+ WITH OPTIONS = {'storage': '/mnt/ssd/indexes/'};
diff --git a/doc/modules/cassandra/examples/CQL/create_ks.cql b/doc/modules/cassandra/examples/CQL/create_ks.cql
new file mode 100644
index 00000000000..e81d7f7bf36
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/create_ks.cql
@@ -0,0 +1,6 @@
+CREATE KEYSPACE excelsior
+ WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 3};
+
+CREATE KEYSPACE excalibur
+ WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1' : 1, 'DC2' : 3}
+ AND durable_writes = false;
diff --git a/doc/modules/cassandra/examples/CQL/create_ks2_backup.cql b/doc/modules/cassandra/examples/CQL/create_ks2_backup.cql
new file mode 100644
index 00000000000..52f9308f975
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/create_ks2_backup.cql
@@ -0,0 +1,2 @@
+CREATE KEYSPACE catalogkeyspace
+ WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 3};
diff --git a/doc/modules/cassandra/examples/CQL/create_ks_backup.cql b/doc/modules/cassandra/examples/CQL/create_ks_backup.cql
new file mode 100644
index 00000000000..593490474a5
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/create_ks_backup.cql
@@ -0,0 +1,2 @@
+CREATE KEYSPACE cqlkeyspace
+ WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 3};
diff --git a/doc/modules/cassandra/examples/CQL/create_ks_trans_repl.cql b/doc/modules/cassandra/examples/CQL/create_ks_trans_repl.cql
new file mode 100644
index 00000000000..afff433eec8
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/create_ks_trans_repl.cql
@@ -0,0 +1,2 @@
+CREATE KEYSPACE some_keyspace
+ WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1' : '3/1'', 'DC2' : '5/2'};
diff --git a/doc/modules/cassandra/examples/CQL/create_mv_statement.cql b/doc/modules/cassandra/examples/CQL/create_mv_statement.cql
new file mode 100644
index 00000000000..0792c3e027d
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/create_mv_statement.cql
@@ -0,0 +1,5 @@
+CREATE MATERIALIZED VIEW monkeySpecies_by_population AS
+ SELECT * FROM monkeySpecies
+ WHERE population IS NOT NULL AND species IS NOT NULL
+ PRIMARY KEY (population, species)
+ WITH comment='Allow query by population instead of species';
diff --git a/doc/modules/cassandra/examples/CQL/create_role.cql b/doc/modules/cassandra/examples/CQL/create_role.cql
new file mode 100644
index 00000000000..c8d0d640de5
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/create_role.cql
@@ -0,0 +1,6 @@
+CREATE ROLE new_role;
+CREATE ROLE alice WITH PASSWORD = 'password_a' AND LOGIN = true;
+CREATE ROLE bob WITH PASSWORD = 'password_b' AND LOGIN = true AND SUPERUSER = true;
+CREATE ROLE carlos WITH OPTIONS = { 'custom_option1' : 'option1_value', 'custom_option2' : 99 };
+CREATE ROLE alice WITH PASSWORD = 'password_a' AND LOGIN = true AND ACCESS TO DATACENTERS {'DC1', 'DC3'};
+CREATE ROLE alice WITH PASSWORD = 'password_a' AND LOGIN = true AND ACCESS TO ALL DATACENTERS;
diff --git a/doc/modules/cassandra/examples/CQL/create_role_ifnotexists.cql b/doc/modules/cassandra/examples/CQL/create_role_ifnotexists.cql
new file mode 100644
index 00000000000..0b9600f9c4c
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/create_role_ifnotexists.cql
@@ -0,0 +1,2 @@
+CREATE ROLE other_role;
+CREATE ROLE IF NOT EXISTS other_role;
diff --git a/doc/modules/cassandra/examples/CQL/create_static_column.cql b/doc/modules/cassandra/examples/CQL/create_static_column.cql
new file mode 100644
index 00000000000..95e8ff21ec2
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/create_static_column.cql
@@ -0,0 +1,7 @@
+CREATE TABLE t (
+ pk int,
+ t int,
+ v text,
+ s text static,
+ PRIMARY KEY (pk, t)
+);
diff --git a/doc/modules/cassandra/examples/CQL/create_table.cql b/doc/modules/cassandra/examples/CQL/create_table.cql
new file mode 100644
index 00000000000..57b557dace8
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/create_table.cql
@@ -0,0 +1,23 @@
+CREATE TABLE monkey_species (
+ species text PRIMARY KEY,
+ common_name text,
+ population varint,
+ average_size int
+) WITH comment='Important biological records';
+
+CREATE TABLE timeline (
+ userid uuid,
+ posted_month int,
+ posted_time uuid,
+ body text,
+ posted_by text,
+ PRIMARY KEY (userid, posted_month, posted_time)
+) WITH compaction = { 'class' : 'LeveledCompactionStrategy' };
+
+CREATE TABLE loads (
+ machine inet,
+ cpu int,
+ mtime timeuuid,
+ load float,
+ PRIMARY KEY ((machine, cpu), mtime)
+) WITH CLUSTERING ORDER BY (mtime DESC);
diff --git a/doc/modules/cassandra/examples/CQL/create_table2_backup.cql b/doc/modules/cassandra/examples/CQL/create_table2_backup.cql
new file mode 100644
index 00000000000..f3393008f52
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/create_table2_backup.cql
@@ -0,0 +1,14 @@
+USE catalogkeyspace;
+CREATE TABLE journal (
+ id int,
+ name text,
+ publisher text,
+ PRIMARY KEY (id)
+);
+
+CREATE TABLE magazine (
+ id int,
+ name text,
+ publisher text,
+ PRIMARY KEY (id)
+);
diff --git a/doc/modules/cassandra/examples/CQL/create_table_backup.cql b/doc/modules/cassandra/examples/CQL/create_table_backup.cql
new file mode 100644
index 00000000000..c80b99969d3
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/create_table_backup.cql
@@ -0,0 +1,13 @@
+USE cqlkeyspace;
+CREATE TABLE t (
+ id int,
+ k int,
+ v text,
+ PRIMARY KEY (id)
+);
+CREATE TABLE t2 (
+ id int,
+ k int,
+ v text,
+ PRIMARY KEY (id)
+);
diff --git a/doc/modules/cassandra/examples/CQL/create_table_clustercolumn.cql b/doc/modules/cassandra/examples/CQL/create_table_clustercolumn.cql
new file mode 100644
index 00000000000..f7de266b1b8
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/create_table_clustercolumn.cql
@@ -0,0 +1,7 @@
+CREATE TABLE t2 (
+ a int,
+ b int,
+ c int,
+ d int,
+ PRIMARY KEY (a, b, c)
+);
diff --git a/doc/modules/cassandra/examples/CQL/create_table_compound_pk.cql b/doc/modules/cassandra/examples/CQL/create_table_compound_pk.cql
new file mode 100644
index 00000000000..eb199c73146
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/create_table_compound_pk.cql
@@ -0,0 +1,7 @@
+CREATE TABLE t (
+ a int,
+ b int,
+ c int,
+ d int,
+ PRIMARY KEY ((a, b), c, d)
+);
diff --git a/doc/modules/cassandra/examples/CQL/create_table_simple.cql b/doc/modules/cassandra/examples/CQL/create_table_simple.cql
new file mode 100644
index 00000000000..0ebe7475bc4
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/create_table_simple.cql
@@ -0,0 +1,4 @@
+CREATE TABLE users (
+ userid text PRIMARY KEY,
+ username text,
+);
diff --git a/doc/modules/cassandra/examples/CQL/create_table_single_pk.cql b/doc/modules/cassandra/examples/CQL/create_table_single_pk.cql
new file mode 100644
index 00000000000..ce6fff8d720
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/create_table_single_pk.cql
@@ -0,0 +1 @@
+CREATE TABLE t (k text PRIMARY KEY);
diff --git a/doc/modules/cassandra/examples/CQL/create_trigger.cql b/doc/modules/cassandra/examples/CQL/create_trigger.cql
new file mode 100644
index 00000000000..9bbf2f24057
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/create_trigger.cql
@@ -0,0 +1 @@
+CREATE TRIGGER myTrigger ON myTable USING 'org.apache.cassandra.triggers.InvertedIndex';
diff --git a/doc/modules/cassandra/examples/CQL/create_user.cql b/doc/modules/cassandra/examples/CQL/create_user.cql
new file mode 100644
index 00000000000..b6531ebbc48
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/create_user.cql
@@ -0,0 +1,2 @@
+CREATE USER alice WITH PASSWORD 'password_a' SUPERUSER;
+CREATE USER bob WITH PASSWORD 'password_b' NOSUPERUSER;
diff --git a/doc/modules/cassandra/examples/CQL/create_user_role.cql b/doc/modules/cassandra/examples/CQL/create_user_role.cql
new file mode 100644
index 00000000000..810f76ca9c3
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/create_user_role.cql
@@ -0,0 +1,14 @@
+CREATE USER alice WITH PASSWORD 'password_a' SUPERUSER;
+CREATE ROLE alice WITH PASSWORD = 'password_a' AND LOGIN = true AND SUPERUSER = true;
+
+CREATE USER IF NOT EXISTS alice WITH PASSWORD 'password_a' SUPERUSER;
+CREATE ROLE IF NOT EXISTS alice WITH PASSWORD = 'password_a' AND LOGIN = true AND SUPERUSER = true;
+
+CREATE USER alice WITH PASSWORD 'password_a' NOSUPERUSER;
+CREATE ROLE alice WITH PASSWORD = 'password_a' AND LOGIN = true AND SUPERUSER = false;
+
+CREATE USER alice WITH PASSWORD 'password_a' NOSUPERUSER;
+CREATE ROLE alice WITH PASSWORD = 'password_a' AND LOGIN = true;
+
+CREATE USER alice WITH PASSWORD 'password_a';
+CREATE ROLE alice WITH PASSWORD = 'password_a' AND LOGIN = true;
diff --git a/doc/modules/cassandra/examples/CQL/currentdate.cql b/doc/modules/cassandra/examples/CQL/currentdate.cql
new file mode 100644
index 00000000000..0bed1b2b9e8
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/currentdate.cql
@@ -0,0 +1 @@
+SELECT * FROM myTable WHERE date >= currentDate() - 2d;
diff --git a/doc/modules/cassandra/examples/CQL/datetime_arithmetic.cql b/doc/modules/cassandra/examples/CQL/datetime_arithmetic.cql
new file mode 100644
index 00000000000..310bf3bab69
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/datetime_arithmetic.cql
@@ -0,0 +1 @@
+SELECT * FROM myTable WHERE t = '2017-01-01' - 2d;
diff --git a/doc/modules/cassandra/examples/CQL/delete_all_elements_list.cql b/doc/modules/cassandra/examples/CQL/delete_all_elements_list.cql
new file mode 100644
index 00000000000..3d026683b39
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/delete_all_elements_list.cql
@@ -0,0 +1 @@
+UPDATE plays SET scores = scores - [ 12, 21 ] WHERE id = '123-afde';
diff --git a/doc/modules/cassandra/examples/CQL/delete_element_list.cql b/doc/modules/cassandra/examples/CQL/delete_element_list.cql
new file mode 100644
index 00000000000..26b3e58f00b
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/delete_element_list.cql
@@ -0,0 +1 @@
+DELETE scores[1] FROM plays WHERE id = '123-afde';
diff --git a/doc/modules/cassandra/examples/CQL/delete_map.cql b/doc/modules/cassandra/examples/CQL/delete_map.cql
new file mode 100644
index 00000000000..e16b1340553
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/delete_map.cql
@@ -0,0 +1,2 @@
+DELETE favs['author'] FROM users WHERE id = 'jsmith';
+UPDATE users SET favs = favs - { 'movie', 'band'} WHERE id = 'jsmith';
diff --git a/doc/modules/cassandra/examples/CQL/delete_set.cql b/doc/modules/cassandra/examples/CQL/delete_set.cql
new file mode 100644
index 00000000000..308da3ceace
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/delete_set.cql
@@ -0,0 +1 @@
+UPDATE images SET tags = tags - { 'cat' } WHERE name = 'cat.jpg';
diff --git a/doc/modules/cassandra/examples/CQL/delete_statement.cql b/doc/modules/cassandra/examples/CQL/delete_statement.cql
new file mode 100644
index 00000000000..b574e7167d6
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/delete_statement.cql
@@ -0,0 +1,5 @@
+DELETE FROM NerdMovies USING TIMESTAMP 1240003134
+ WHERE movie = 'Serenity';
+
+DELETE phone FROM Users
+ WHERE userid IN (C73DE1D3-AF08-40F3-B124-3FF3E5109F22, B70DE1D0-9908-4AE3-BE34-5573E5B09F14);
diff --git a/doc/modules/cassandra/examples/CQL/drop_aggregate.cql b/doc/modules/cassandra/examples/CQL/drop_aggregate.cql
new file mode 100644
index 00000000000..f05b69ae8b1
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/drop_aggregate.cql
@@ -0,0 +1,4 @@
+DROP AGGREGATE myAggregate;
+DROP AGGREGATE myKeyspace.anAggregate;
+DROP AGGREGATE someAggregate ( int );
+DROP AGGREGATE someAggregate ( text );
diff --git a/doc/modules/cassandra/examples/CQL/drop_function.cql b/doc/modules/cassandra/examples/CQL/drop_function.cql
new file mode 100644
index 00000000000..6d444c17066
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/drop_function.cql
@@ -0,0 +1,4 @@
+DROP FUNCTION myfunction;
+DROP FUNCTION mykeyspace.afunction;
+DROP FUNCTION afunction ( int );
+DROP FUNCTION afunction ( text );
diff --git a/doc/modules/cassandra/examples/CQL/drop_ks.cql b/doc/modules/cassandra/examples/CQL/drop_ks.cql
new file mode 100644
index 00000000000..46a920dbbd7
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/drop_ks.cql
@@ -0,0 +1 @@
+DROP KEYSPACE excelsior;
diff --git a/doc/modules/cassandra/examples/CQL/drop_trigger.cql b/doc/modules/cassandra/examples/CQL/drop_trigger.cql
new file mode 100644
index 00000000000..05a7a95c117
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/drop_trigger.cql
@@ -0,0 +1 @@
+DROP TRIGGER myTrigger ON myTable;
diff --git a/doc/modules/cassandra/examples/CQL/function_dollarsign.cql b/doc/modules/cassandra/examples/CQL/function_dollarsign.cql
new file mode 100644
index 00000000000..878d04449e6
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/function_dollarsign.cql
@@ -0,0 +1,15 @@
+CREATE FUNCTION some_function ( arg int )
+ RETURNS NULL ON NULL INPUT
+ RETURNS int
+ LANGUAGE java
+ AS $$ return arg; $$;
+
+SELECT some_function(column) FROM atable ...;
+UPDATE atable SET col = some_function(?) ...;
+
+CREATE TYPE custom_type (txt text, i int);
+CREATE FUNCTION fct_using_udt ( udtarg frozen )
+ RETURNS NULL ON NULL INPUT
+ RETURNS text
+ LANGUAGE java
+ AS $$ return udtarg.getString("txt"); $$;
diff --git a/doc/modules/cassandra/examples/CQL/function_overload.cql b/doc/modules/cassandra/examples/CQL/function_overload.cql
new file mode 100644
index 00000000000..d70e8e9ff74
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/function_overload.cql
@@ -0,0 +1,2 @@
+CREATE FUNCTION sample ( arg int ) ...;
+CREATE FUNCTION sample ( arg text ) ...;
diff --git a/doc/modules/cassandra/examples/CQL/function_udfcontext.cql b/doc/modules/cassandra/examples/CQL/function_udfcontext.cql
new file mode 100644
index 00000000000..87f89fef6e6
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/function_udfcontext.cql
@@ -0,0 +1,11 @@
+CREATE TYPE custom_type (txt text, i int);
+CREATE FUNCTION fct\_using\_udt ( somearg int )
+ RETURNS NULL ON NULL INPUT
+ RETURNS custom_type
+ LANGUAGE java
+ AS $$
+ UDTValue udt = udfContext.newReturnUDTValue();
+ udt.setString("txt", "some string");
+ udt.setInt("i", 42);
+ return udt;
+ $$;
diff --git a/doc/modules/cassandra/examples/CQL/grant_describe.cql b/doc/modules/cassandra/examples/CQL/grant_describe.cql
new file mode 100644
index 00000000000..72181456509
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/grant_describe.cql
@@ -0,0 +1 @@
+GRANT DESCRIBE ON ALL ROLES TO role_admin;
diff --git a/doc/modules/cassandra/examples/CQL/grant_drop.cql b/doc/modules/cassandra/examples/CQL/grant_drop.cql
new file mode 100644
index 00000000000..745369d4298
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/grant_drop.cql
@@ -0,0 +1 @@
+GRANT DROP ON keyspace1.table1 TO schema_owner;
diff --git a/doc/modules/cassandra/examples/CQL/grant_execute.cql b/doc/modules/cassandra/examples/CQL/grant_execute.cql
new file mode 100644
index 00000000000..96b34de99dc
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/grant_execute.cql
@@ -0,0 +1 @@
+GRANT EXECUTE ON FUNCTION keyspace1.user_function( int ) TO report_writer;
diff --git a/doc/modules/cassandra/examples/CQL/grant_modify.cql b/doc/modules/cassandra/examples/CQL/grant_modify.cql
new file mode 100644
index 00000000000..7f9a30b225d
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/grant_modify.cql
@@ -0,0 +1 @@
+GRANT MODIFY ON KEYSPACE keyspace1 TO data_writer;
diff --git a/doc/modules/cassandra/examples/CQL/grant_perm.cql b/doc/modules/cassandra/examples/CQL/grant_perm.cql
new file mode 100644
index 00000000000..1dc9a7b18dd
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/grant_perm.cql
@@ -0,0 +1 @@
+GRANT SELECT ON ALL KEYSPACES TO data_reader;
diff --git a/doc/modules/cassandra/examples/CQL/grant_role.cql b/doc/modules/cassandra/examples/CQL/grant_role.cql
new file mode 100644
index 00000000000..1adffb30928
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/grant_role.cql
@@ -0,0 +1 @@
+GRANT report_writer TO alice;
diff --git a/doc/modules/cassandra/examples/CQL/insert_data2_backup.cql b/doc/modules/cassandra/examples/CQL/insert_data2_backup.cql
new file mode 100644
index 00000000000..35e20a3bd20
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/insert_data2_backup.cql
@@ -0,0 +1,5 @@
+INSERT INTO journal (id, name, publisher) VALUES (0, 'Apache Cassandra Magazine', 'Apache Cassandra');
+INSERT INTO journal (id, name, publisher) VALUES (1, 'Couchbase Magazine', 'Couchbase');
+
+INSERT INTO magazine (id, name, publisher) VALUES (0, 'Apache Cassandra Magazine', 'Apache Cassandra');
+INSERT INTO magazine (id, name, publisher) VALUES (1, 'Couchbase Magazine', 'Couchbase');
diff --git a/doc/modules/cassandra/examples/CQL/insert_data_backup.cql b/doc/modules/cassandra/examples/CQL/insert_data_backup.cql
new file mode 100644
index 00000000000..15eb37575f1
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/insert_data_backup.cql
@@ -0,0 +1,6 @@
+INSERT INTO t (id, k, v) VALUES (0, 0, 'val0');
+INSERT INTO t (id, k, v) VALUES (1, 1, 'val1');
+
+INSERT INTO t2 (id, k, v) VALUES (0, 0, 'val0');
+INSERT INTO t2 (id, k, v) VALUES (1, 1, 'val1');
+INSERT INTO t2 (id, k, v) VALUES (2, 2, 'val2');
diff --git a/doc/modules/cassandra/examples/CQL/insert_duration.cql b/doc/modules/cassandra/examples/CQL/insert_duration.cql
new file mode 100644
index 00000000000..b52801bbc2a
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/insert_duration.cql
@@ -0,0 +1,6 @@
+INSERT INTO RiderResults (rider, race, result)
+ VALUES ('Christopher Froome', 'Tour de France', 89h4m48s);
+INSERT INTO RiderResults (rider, race, result)
+ VALUES ('BARDET Romain', 'Tour de France', PT89H8M53S);
+INSERT INTO RiderResults (rider, race, result)
+ VALUES ('QUINTANA Nairo', 'Tour de France', P0000-00-00T89:09:09);
diff --git a/doc/modules/cassandra/examples/CQL/insert_json.cql b/doc/modules/cassandra/examples/CQL/insert_json.cql
new file mode 100644
index 00000000000..d3a5deca8b8
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/insert_json.cql
@@ -0,0 +1 @@
+INSERT INTO mytable JSON '{ "\"myKey\"": 0, "value": 0}';
diff --git a/doc/modules/cassandra/examples/CQL/insert_statement.cql b/doc/modules/cassandra/examples/CQL/insert_statement.cql
new file mode 100644
index 00000000000..0f7a9435df3
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/insert_statement.cql
@@ -0,0 +1,5 @@
+INSERT INTO NerdMovies (movie, director, main_actor, year)
+ VALUES ('Serenity', 'Joss Whedon', 'Nathan Fillion', 2005)
+ USING TTL 86400;
+
+INSERT INTO NerdMovies JSON '{"movie": "Serenity", "director": "Joss Whedon", "year": 2005}';
diff --git a/doc/modules/cassandra/examples/CQL/insert_static_data.cql b/doc/modules/cassandra/examples/CQL/insert_static_data.cql
new file mode 100644
index 00000000000..c6a588f9598
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/insert_static_data.cql
@@ -0,0 +1,2 @@
+INSERT INTO t (pk, t, v, s) VALUES (0, 0, 'val0', 'static0');
+INSERT INTO t (pk, t, v, s) VALUES (0, 1, 'val1', 'static1');
diff --git a/doc/modules/cassandra/examples/CQL/insert_table_cc_addl.cql b/doc/modules/cassandra/examples/CQL/insert_table_cc_addl.cql
new file mode 100644
index 00000000000..f574d539120
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/insert_table_cc_addl.cql
@@ -0,0 +1 @@
+INSERT INTO t3 (a,b,c,d) VALUES (0,0,0,9);
diff --git a/doc/modules/cassandra/examples/CQL/insert_table_clustercolumn.cql b/doc/modules/cassandra/examples/CQL/insert_table_clustercolumn.cql
new file mode 100644
index 00000000000..449f921d5ca
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/insert_table_clustercolumn.cql
@@ -0,0 +1,5 @@
+INSERT INTO t2 (a, b, c, d) VALUES (0,0,0,0);
+INSERT INTO t2 (a, b, c, d) VALUES (0,0,1,1);
+INSERT INTO t2 (a, b, c, d) VALUES (0,1,2,2);
+INSERT INTO t2 (a, b, c, d) VALUES (0,1,3,3);
+INSERT INTO t2 (a, b, c, d) VALUES (1,1,4,4);
diff --git a/doc/modules/cassandra/examples/CQL/insert_table_clustercolumn2.cql b/doc/modules/cassandra/examples/CQL/insert_table_clustercolumn2.cql
new file mode 100644
index 00000000000..a048c9f7fb0
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/insert_table_clustercolumn2.cql
@@ -0,0 +1,5 @@
+INSERT INTO t3 (a, b, c, d) VALUES (0,0,0,0);
+INSERT INTO t3 (a, b, c, d) VALUES (0,0,1,1);
+INSERT INTO t3 (a, b, c, d) VALUES (0,1,2,2);
+INSERT INTO t3 (a, b, c, d) VALUES (0,1,3,3);
+INSERT INTO t3 (a, b, c, d) VALUES (1,1,4,4);
diff --git a/doc/modules/cassandra/examples/CQL/insert_table_compound_pk.cql b/doc/modules/cassandra/examples/CQL/insert_table_compound_pk.cql
new file mode 100644
index 00000000000..3ce1953fe86
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/insert_table_compound_pk.cql
@@ -0,0 +1,5 @@
+INSERT INTO t (a, b, c, d) VALUES (0,0,0,0);
+INSERT INTO t (a, b, c, d) VALUES (0,0,1,1);
+INSERT INTO t (a, b, c, d) VALUES (0,1,2,2);
+INSERT INTO t (a, b, c, d) VALUES (0,1,3,3);
+INSERT INTO t (a, b, c, d) VALUES (1,1,4,4);
diff --git a/doc/modules/cassandra/examples/CQL/insert_udt.cql b/doc/modules/cassandra/examples/CQL/insert_udt.cql
new file mode 100644
index 00000000000..5c6f1766ef4
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/insert_udt.cql
@@ -0,0 +1,17 @@
+INSERT INTO user (name, addresses)
+ VALUES ('z3 Pr3z1den7', {
+ 'home' : {
+ street: '1600 Pennsylvania Ave NW',
+ city: 'Washington',
+ zip: '20500',
+ phones: { 'cell' : { country_code: 1, number: '202 456-1111' },
+ 'landline' : { country_code: 1, number: '...' } }
+ },
+ 'work' : {
+ street: '1600 Pennsylvania Ave NW',
+ city: 'Washington',
+ zip: '20500',
+ phones: { 'fax' : { country_code: 1, number: '...' } }
+ }
+ }
+);
diff --git a/doc/modules/cassandra/examples/CQL/list.cql b/doc/modules/cassandra/examples/CQL/list.cql
new file mode 100644
index 00000000000..4d1ef13f863
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/list.cql
@@ -0,0 +1,12 @@
+CREATE TABLE plays (
+ id text PRIMARY KEY,
+ game text,
+ players int,
+ scores list // A list of integers
+)
+
+INSERT INTO plays (id, game, players, scores)
+ VALUES ('123-afde', 'quake', 3, [17, 4, 2]);
+
+// Replace the existing list entirely
+UPDATE plays SET scores = [ 3, 9, 4] WHERE id = '123-afde';
diff --git a/doc/modules/cassandra/examples/CQL/list_all_perm.cql b/doc/modules/cassandra/examples/CQL/list_all_perm.cql
new file mode 100644
index 00000000000..efbcfc86e74
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/list_all_perm.cql
@@ -0,0 +1 @@
+LIST ALL PERMISSIONS ON keyspace1.table1 OF bob;
diff --git a/doc/modules/cassandra/examples/CQL/list_perm.cql b/doc/modules/cassandra/examples/CQL/list_perm.cql
new file mode 100644
index 00000000000..094bf093350
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/list_perm.cql
@@ -0,0 +1 @@
+LIST ALL PERMISSIONS OF alice;
diff --git a/doc/modules/cassandra/examples/CQL/list_roles.cql b/doc/modules/cassandra/examples/CQL/list_roles.cql
new file mode 100644
index 00000000000..5c0f0631aca
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/list_roles.cql
@@ -0,0 +1 @@
+LIST ROLES;
diff --git a/doc/modules/cassandra/examples/CQL/list_roles_nonrecursive.cql b/doc/modules/cassandra/examples/CQL/list_roles_nonrecursive.cql
new file mode 100644
index 00000000000..eea62189445
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/list_roles_nonrecursive.cql
@@ -0,0 +1 @@
+LIST ROLES OF bob NORECURSIVE;
diff --git a/doc/modules/cassandra/examples/CQL/list_roles_of.cql b/doc/modules/cassandra/examples/CQL/list_roles_of.cql
new file mode 100644
index 00000000000..c338ca3452e
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/list_roles_of.cql
@@ -0,0 +1 @@
+LIST ROLES OF alice;
diff --git a/doc/modules/cassandra/examples/CQL/list_select_perm.cql b/doc/modules/cassandra/examples/CQL/list_select_perm.cql
new file mode 100644
index 00000000000..c085df47ce9
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/list_select_perm.cql
@@ -0,0 +1 @@
+LIST SELECT PERMISSIONS OF carlos;
diff --git a/doc/modules/cassandra/examples/CQL/map.cql b/doc/modules/cassandra/examples/CQL/map.cql
new file mode 100644
index 00000000000..ca9ca5e2e4d
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/map.cql
@@ -0,0 +1,11 @@
+CREATE TABLE users (
+ id text PRIMARY KEY,
+ name text,
+ favs map // A map of text keys, and text values
+);
+
+INSERT INTO users (id, name, favs)
+ VALUES ('jsmith', 'John Smith', { 'fruit' : 'Apple', 'band' : 'Beatles' });
+
+// Replace the existing map entirely.
+UPDATE users SET favs = { 'fruit' : 'Banana' } WHERE id = 'jsmith';
diff --git a/doc/modules/cassandra/examples/CQL/min_max.cql b/doc/modules/cassandra/examples/CQL/min_max.cql
new file mode 100644
index 00000000000..3f31cc5bac4
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/min_max.cql
@@ -0,0 +1 @@
+SELECT MIN (players), MAX (players) FROM plays WHERE game = 'quake';
diff --git a/doc/modules/cassandra/examples/CQL/mv_table_def.cql b/doc/modules/cassandra/examples/CQL/mv_table_def.cql
new file mode 100644
index 00000000000..106fe118139
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/mv_table_def.cql
@@ -0,0 +1,8 @@
+CREATE TABLE t (
+ k int,
+ c1 int,
+ c2 int,
+ v1 int,
+ v2 int,
+ PRIMARY KEY (k, c1, c2)
+);
diff --git a/doc/modules/cassandra/examples/CQL/mv_table_error.cql b/doc/modules/cassandra/examples/CQL/mv_table_error.cql
new file mode 100644
index 00000000000..e7560f92a16
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/mv_table_error.cql
@@ -0,0 +1,13 @@
+// Error: cannot include both v1 and v2 in the primary key as both are not in the base table primary key
+
+CREATE MATERIALIZED VIEW mv1 AS
+ SELECT * FROM t
+ WHERE k IS NOT NULL AND c1 IS NOT NULL AND c2 IS NOT NULL AND v1 IS NOT NULL
+ PRIMARY KEY (v1, v2, k, c1, c2);
+
+// Error: must include k in the primary as it's a base table primary key column
+
+CREATE MATERIALIZED VIEW mv1 AS
+ SELECT * FROM t
+ WHERE c1 IS NOT NULL AND c2 IS NOT NULL
+ PRIMARY KEY (c1, c2);
diff --git a/doc/modules/cassandra/examples/CQL/mv_table_from_base.cql b/doc/modules/cassandra/examples/CQL/mv_table_from_base.cql
new file mode 100644
index 00000000000..bd2f9f2d5e8
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/mv_table_from_base.cql
@@ -0,0 +1,9 @@
+CREATE MATERIALIZED VIEW mv1 AS
+ SELECT * FROM t
+ WHERE k IS NOT NULL AND c1 IS NOT NULL AND c2 IS NOT NULL
+ PRIMARY KEY (c1, k, c2);
+
+CREATE MATERIALIZED VIEW mv1 AS
+ SELECT * FROM t
+ WHERE k IS NOT NULL AND c1 IS NOT NULL AND c2 IS NOT NULL
+ PRIMARY KEY (v1, k, c1, c2);
diff --git a/doc/modules/cassandra/examples/CQL/no_revoke.cql b/doc/modules/cassandra/examples/CQL/no_revoke.cql
new file mode 100644
index 00000000000..b6a044cf203
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/no_revoke.cql
@@ -0,0 +1,5 @@
+* `system_schema.keyspaces`
+* `system_schema.columns`
+* `system_schema.tables`
+* `system.local`
+* `system.peers`
diff --git a/doc/modules/cassandra/examples/CQL/query_allow_filtering.cql b/doc/modules/cassandra/examples/CQL/query_allow_filtering.cql
new file mode 100644
index 00000000000..c4aaf394ec2
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/query_allow_filtering.cql
@@ -0,0 +1,5 @@
+// All users are returned
+SELECT * FROM users;
+
+// All users with a particular birth year are returned
+SELECT * FROM users WHERE birth_year = 1981;
diff --git a/doc/modules/cassandra/examples/CQL/query_fail_allow_filtering.cql b/doc/modules/cassandra/examples/CQL/query_fail_allow_filtering.cql
new file mode 100644
index 00000000000..2e6c63bffd2
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/query_fail_allow_filtering.cql
@@ -0,0 +1 @@
+SELECT * FROM users WHERE birth_year = 1981 AND country = 'FR';
diff --git a/doc/modules/cassandra/examples/CQL/query_nofail_allow_filtering.cql b/doc/modules/cassandra/examples/CQL/query_nofail_allow_filtering.cql
new file mode 100644
index 00000000000..88aed561954
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/query_nofail_allow_filtering.cql
@@ -0,0 +1 @@
+SELECT * FROM users WHERE birth_year = 1981 AND country = 'FR' ALLOW FILTERING;
diff --git a/doc/modules/cassandra/examples/CQL/rename_udt_field.cql b/doc/modules/cassandra/examples/CQL/rename_udt_field.cql
new file mode 100644
index 00000000000..7718788b3e8
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/rename_udt_field.cql
@@ -0,0 +1 @@
+ALTER TYPE address RENAME zip TO zipcode;
diff --git a/doc/modules/cassandra/examples/CQL/revoke_perm.cql b/doc/modules/cassandra/examples/CQL/revoke_perm.cql
new file mode 100644
index 00000000000..d4ac1edb140
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/revoke_perm.cql
@@ -0,0 +1,5 @@
+REVOKE SELECT ON ALL KEYSPACES FROM data_reader;
+REVOKE MODIFY ON KEYSPACE keyspace1 FROM data_writer;
+REVOKE DROP ON keyspace1.table1 FROM schema_owner;
+REVOKE EXECUTE ON FUNCTION keyspace1.user_function( int ) FROM report_writer;
+REVOKE DESCRIBE ON ALL ROLES FROM role_admin;
diff --git a/doc/modules/cassandra/examples/CQL/revoke_role.cql b/doc/modules/cassandra/examples/CQL/revoke_role.cql
new file mode 100644
index 00000000000..acf50666017
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/revoke_role.cql
@@ -0,0 +1 @@
+REVOKE report_writer FROM alice;
diff --git a/doc/modules/cassandra/examples/CQL/role_error.cql b/doc/modules/cassandra/examples/CQL/role_error.cql
new file mode 100644
index 00000000000..fa061a2ea9f
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/role_error.cql
@@ -0,0 +1,6 @@
+GRANT role_a TO role_b;
+GRANT role_b TO role_a;
+
+GRANT role_a TO role_b;
+GRANT role_b TO role_c;
+GRANT role_c TO role_a;
diff --git a/doc/modules/cassandra/examples/CQL/select_data2_backup.cql b/doc/modules/cassandra/examples/CQL/select_data2_backup.cql
new file mode 100644
index 00000000000..7a409d75264
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/select_data2_backup.cql
@@ -0,0 +1,2 @@
+SELECT * FROM catalogkeyspace.journal;
+SELECT * FROM catalogkeyspace.magazine;
diff --git a/doc/modules/cassandra/examples/CQL/select_data_backup.cql b/doc/modules/cassandra/examples/CQL/select_data_backup.cql
new file mode 100644
index 00000000000..4468467a5ce
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/select_data_backup.cql
@@ -0,0 +1,2 @@
+SELECT * FROM t;
+SELECT * FROM t2;
diff --git a/doc/modules/cassandra/examples/CQL/select_range.cql b/doc/modules/cassandra/examples/CQL/select_range.cql
new file mode 100644
index 00000000000..fcf3bd58333
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/select_range.cql
@@ -0,0 +1 @@
+SELECT * FROM t2 WHERE a = 0 AND b > 0 and b <= 3;
diff --git a/doc/modules/cassandra/examples/CQL/select_statement.cql b/doc/modules/cassandra/examples/CQL/select_statement.cql
new file mode 100644
index 00000000000..cee5a191638
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/select_statement.cql
@@ -0,0 +1,11 @@
+SELECT name, occupation FROM users WHERE userid IN (199, 200, 207);
+SELECT JSON name, occupation FROM users WHERE userid = 199;
+SELECT name AS user_name, occupation AS user_occupation FROM users;
+
+SELECT time, value
+FROM events
+WHERE event_type = 'myEvent'
+ AND time > '2011-02-03'
+ AND time <= '2012-01-01'
+
+SELECT COUNT (*) AS user_count FROM users;
diff --git a/doc/modules/cassandra/examples/CQL/select_static_data.cql b/doc/modules/cassandra/examples/CQL/select_static_data.cql
new file mode 100644
index 00000000000..8bca9375bfe
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/select_static_data.cql
@@ -0,0 +1 @@
+SELECT * FROM t;
diff --git a/doc/modules/cassandra/examples/CQL/select_table_clustercolumn.cql b/doc/modules/cassandra/examples/CQL/select_table_clustercolumn.cql
new file mode 100644
index 00000000000..60bb2cf95b0
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/select_table_clustercolumn.cql
@@ -0,0 +1 @@
+SELECT * FROM t2;
diff --git a/doc/modules/cassandra/examples/CQL/select_table_compound_pk.cql b/doc/modules/cassandra/examples/CQL/select_table_compound_pk.cql
new file mode 100644
index 00000000000..8bca9375bfe
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/select_table_compound_pk.cql
@@ -0,0 +1 @@
+SELECT * FROM t;
diff --git a/doc/modules/cassandra/examples/CQL/set.cql b/doc/modules/cassandra/examples/CQL/set.cql
new file mode 100644
index 00000000000..607981b8be5
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/set.cql
@@ -0,0 +1,11 @@
+CREATE TABLE images (
+ name text PRIMARY KEY,
+ owner text,
+ tags set // A set of text values
+);
+
+INSERT INTO images (name, owner, tags)
+ VALUES ('cat.jpg', 'jsmith', { 'pet', 'cute' });
+
+// Replace the existing set entirely
+UPDATE images SET tags = { 'kitten', 'cat', 'lol' } WHERE name = 'cat.jpg';
diff --git a/doc/modules/cassandra/examples/CQL/spec_retry_values.cql b/doc/modules/cassandra/examples/CQL/spec_retry_values.cql
new file mode 100644
index 00000000000..bcd8d26dfce
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/spec_retry_values.cql
@@ -0,0 +1,6 @@
+min(99percentile,50ms)
+max(99p,50MS)
+MAX(99P,50ms)
+MIN(99.9PERCENTILE,50ms)
+max(90percentile,100MS)
+MAX(100.0PERCENTILE,60ms)
diff --git a/doc/modules/cassandra/examples/CQL/sum.cql b/doc/modules/cassandra/examples/CQL/sum.cql
new file mode 100644
index 00000000000..bccfcbc81ec
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/sum.cql
@@ -0,0 +1 @@
+SELECT SUM (players) FROM plays;
diff --git a/doc/modules/cassandra/examples/CQL/table_for_where.cql b/doc/modules/cassandra/examples/CQL/table_for_where.cql
new file mode 100644
index 00000000000..f5ed5001ebf
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/table_for_where.cql
@@ -0,0 +1,9 @@
+CREATE TABLE posts (
+ userid text,
+ blog_title text,
+ posted_at timestamp,
+ entry_title text,
+ content text,
+ category int,
+ PRIMARY KEY (userid, blog_title, posted_at)
+);
diff --git a/doc/modules/cassandra/examples/CQL/timeuuid_min_max.cql b/doc/modules/cassandra/examples/CQL/timeuuid_min_max.cql
new file mode 100644
index 00000000000..81353f53d8b
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/timeuuid_min_max.cql
@@ -0,0 +1,3 @@
+SELECT * FROM myTable
+ WHERE t > maxTimeuuid('2013-01-01 00:05+0000')
+ AND t < minTimeuuid('2013-02-02 10:00+0000');
diff --git a/doc/modules/cassandra/examples/CQL/timeuuid_now.cql b/doc/modules/cassandra/examples/CQL/timeuuid_now.cql
new file mode 100644
index 00000000000..54c2cc4817f
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/timeuuid_now.cql
@@ -0,0 +1 @@
+SELECT * FROM myTable WHERE t = now();
diff --git a/doc/modules/cassandra/examples/CQL/token.cql b/doc/modules/cassandra/examples/CQL/token.cql
new file mode 100644
index 00000000000..b5c7f8b82bd
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/token.cql
@@ -0,0 +1,2 @@
+SELECT * FROM posts
+ WHERE token(userid) > token('tom') AND token(userid) < token('bob');
diff --git a/doc/modules/cassandra/examples/CQL/tuple.cql b/doc/modules/cassandra/examples/CQL/tuple.cql
new file mode 100644
index 00000000000..b612d078aa1
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/tuple.cql
@@ -0,0 +1,6 @@
+CREATE TABLE durations (
+ event text,
+ duration tuple,
+);
+
+INSERT INTO durations (event, duration) VALUES ('ev1', (3, 'hours'));
diff --git a/doc/modules/cassandra/examples/CQL/uda.cql b/doc/modules/cassandra/examples/CQL/uda.cql
new file mode 100644
index 00000000000..b40dd113f04
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/uda.cql
@@ -0,0 +1,41 @@
+CREATE OR REPLACE FUNCTION test.averageState(state tuple, val int)
+ CALLED ON NULL INPUT
+ RETURNS tuple
+ LANGUAGE java
+ AS $$
+ if (val != null) {
+ state.setInt(0, state.getInt(0)+1);
+ state.setLong(1, state.getLong(1)+val.intValue());
+ }
+ return state;
+ $$;
+
+CREATE OR REPLACE FUNCTION test.averageFinal (state tuple)
+ CALLED ON NULL INPUT
+ RETURNS double
+ LANGUAGE java
+ AS $$
+ double r = 0;
+ if (state.getInt(0) == 0) return null;
+ r = state.getLong(1);
+ r /= state.getInt(0);
+ return Double.valueOf(r);
+ $$;
+
+CREATE OR REPLACE AGGREGATE test.average(int)
+ SFUNC averageState
+ STYPE tuple
+ FINALFUNC averageFinal
+ INITCOND (0, 0);
+
+CREATE TABLE test.atable (
+ pk int PRIMARY KEY,
+ val int
+);
+
+INSERT INTO test.atable (pk, val) VALUES (1,1);
+INSERT INTO test.atable (pk, val) VALUES (2,2);
+INSERT INTO test.atable (pk, val) VALUES (3,3);
+INSERT INTO test.atable (pk, val) VALUES (4,4);
+
+SELECT test.average(val) FROM atable;
diff --git a/doc/modules/cassandra/examples/CQL/udt.cql b/doc/modules/cassandra/examples/CQL/udt.cql
new file mode 100644
index 00000000000..defcc821e62
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/udt.cql
@@ -0,0 +1,16 @@
+CREATE TYPE phone (
+ country_code int,
+ number text,
+);
+
+CREATE TYPE address (
+ street text,
+ city text,
+ zip text,
+ phones map
+);
+
+CREATE TABLE user (
+ name text PRIMARY KEY,
+ addresses map>
+);
diff --git a/doc/modules/cassandra/examples/CQL/update_list.cql b/doc/modules/cassandra/examples/CQL/update_list.cql
new file mode 100644
index 00000000000..70aacf55d94
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/update_list.cql
@@ -0,0 +1,2 @@
+UPDATE plays SET players = 5, scores = scores + [ 14, 21 ] WHERE id = '123-afde';
+UPDATE plays SET players = 6, scores = [ 3 ] + scores WHERE id = '123-afde';
diff --git a/doc/modules/cassandra/examples/CQL/update_map.cql b/doc/modules/cassandra/examples/CQL/update_map.cql
new file mode 100644
index 00000000000..870f46343b5
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/update_map.cql
@@ -0,0 +1,2 @@
+UPDATE users SET favs['author'] = 'Ed Poe' WHERE id = 'jsmith';
+UPDATE users SET favs = favs + { 'movie' : 'Cassablanca', 'band' : 'ZZ Top' } WHERE id = 'jsmith';
diff --git a/doc/modules/cassandra/examples/CQL/update_particular_list_element.cql b/doc/modules/cassandra/examples/CQL/update_particular_list_element.cql
new file mode 100644
index 00000000000..604ad34cc97
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/update_particular_list_element.cql
@@ -0,0 +1 @@
+UPDATE plays SET scores[1] = 7 WHERE id = '123-afde';
diff --git a/doc/modules/cassandra/examples/CQL/update_set.cql b/doc/modules/cassandra/examples/CQL/update_set.cql
new file mode 100644
index 00000000000..16e6eb23e4b
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/update_set.cql
@@ -0,0 +1 @@
+UPDATE images SET tags = tags + { 'gray', 'cuddly' } WHERE name = 'cat.jpg';
diff --git a/doc/modules/cassandra/examples/CQL/update_statement.cql b/doc/modules/cassandra/examples/CQL/update_statement.cql
new file mode 100644
index 00000000000..7e1cfa76fec
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/update_statement.cql
@@ -0,0 +1,10 @@
+UPDATE NerdMovies USING TTL 400
+ SET director = 'Joss Whedon',
+ main_actor = 'Nathan Fillion',
+ year = 2005
+ WHERE movie = 'Serenity';
+
+UPDATE UserActions
+ SET total = total + 2
+ WHERE user = B70DE1D0-9908-4AE3-BE34-5573E5B09F14
+ AND action = 'click';
diff --git a/doc/modules/cassandra/examples/CQL/update_ttl_map.cql b/doc/modules/cassandra/examples/CQL/update_ttl_map.cql
new file mode 100644
index 00000000000..d2db9bdcdf1
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/update_ttl_map.cql
@@ -0,0 +1 @@
+UPDATE users USING TTL 10 SET favs['color'] = 'green' WHERE id = 'jsmith';
diff --git a/doc/modules/cassandra/examples/CQL/use_ks.cql b/doc/modules/cassandra/examples/CQL/use_ks.cql
new file mode 100644
index 00000000000..b3aaaf3ea84
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/use_ks.cql
@@ -0,0 +1 @@
+USE excelsior;
diff --git a/doc/modules/cassandra/examples/CQL/where.cql b/doc/modules/cassandra/examples/CQL/where.cql
new file mode 100644
index 00000000000..22d4bca3c4c
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/where.cql
@@ -0,0 +1,4 @@
+SELECT entry_title, content FROM posts
+ WHERE userid = 'john doe'
+ AND blog_title='John''s Blog'
+ AND posted_at >= '2012-01-01' AND posted_at < '2012-01-31';
diff --git a/doc/modules/cassandra/examples/CQL/where_fail.cql b/doc/modules/cassandra/examples/CQL/where_fail.cql
new file mode 100644
index 00000000000..57413dfb0d0
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/where_fail.cql
@@ -0,0 +1,5 @@
+// Needs a blog_title to be set to select ranges of posted_at
+
+SELECT entry_title, content FROM posts
+ WHERE userid = 'john doe'
+ AND posted_at >= '2012-01-01' AND posted_at < '2012-01-31';
diff --git a/doc/modules/cassandra/examples/CQL/where_group_cluster_columns.cql b/doc/modules/cassandra/examples/CQL/where_group_cluster_columns.cql
new file mode 100644
index 00000000000..1efb55ecd79
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/where_group_cluster_columns.cql
@@ -0,0 +1,3 @@
+SELECT * FROM posts
+ WHERE userid = 'john doe'
+ AND (blog_title, posted_at) > ('John''s Blog', '2012-01-01');
diff --git a/doc/modules/cassandra/examples/CQL/where_in_tuple.cql b/doc/modules/cassandra/examples/CQL/where_in_tuple.cql
new file mode 100644
index 00000000000..1d558046dc3
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/where_in_tuple.cql
@@ -0,0 +1,3 @@
+SELECT * FROM posts
+ WHERE userid = 'john doe'
+ AND (blog_title, posted_at) IN (('John''s Blog', '2012-01-01'), ('Extreme Chess', '2014-06-01'));
diff --git a/doc/modules/cassandra/examples/CQL/where_no_group_cluster_columns.cql b/doc/modules/cassandra/examples/CQL/where_no_group_cluster_columns.cql
new file mode 100644
index 00000000000..6681ba5c85e
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/where_no_group_cluster_columns.cql
@@ -0,0 +1,4 @@
+SELECT * FROM posts
+ WHERE userid = 'john doe'
+ AND blog_title > 'John''s Blog'
+ AND posted_at > '2012-01-01';
diff --git a/doc/modules/cassandra/examples/JAVA/udf_imports.java b/doc/modules/cassandra/examples/JAVA/udf_imports.java
new file mode 100644
index 00000000000..6b883bf32e3
--- /dev/null
+++ b/doc/modules/cassandra/examples/JAVA/udf_imports.java
@@ -0,0 +1,8 @@
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import org.apache.cassandra.cql3.functions.UDFContext;
+import com.datastax.driver.core.TypeCodec;
+import com.datastax.driver.core.TupleValue;
+import com.datastax.driver.core.UDTValue;
diff --git a/doc/modules/cassandra/examples/JAVA/udfcontext.java b/doc/modules/cassandra/examples/JAVA/udfcontext.java
new file mode 100644
index 00000000000..65e0c7fc0b3
--- /dev/null
+++ b/doc/modules/cassandra/examples/JAVA/udfcontext.java
@@ -0,0 +1,11 @@
+public interface UDFContext
+{
+ UDTValue newArgUDTValue(String argName);
+ UDTValue newArgUDTValue(int argNum);
+ UDTValue newReturnUDTValue();
+ UDTValue newUDTValue(String udtName);
+ TupleValue newArgTupleValue(String argName);
+ TupleValue newArgTupleValue(int argNum);
+ TupleValue newReturnTupleValue();
+ TupleValue newTupleValue(String cqlDefinition);
+}
diff --git a/doc/modules/cassandra/examples/RESULTS/add_repo_keys.result b/doc/modules/cassandra/examples/RESULTS/add_repo_keys.result
new file mode 100644
index 00000000000..4736ecea23f
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/add_repo_keys.result
@@ -0,0 +1,4 @@
+% Total % Received % Xferd Average Speed Time Time Time Current
+ Dload Upload Total Spent Left Speed
+100 266k 100 266k 0 0 320k 0 --:--:-- --:--:-- --:--:-- 320k
+OK
diff --git a/doc/modules/cassandra/examples/RESULTS/add_yum_repo.result b/doc/modules/cassandra/examples/RESULTS/add_yum_repo.result
new file mode 100644
index 00000000000..ba06f540f34
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/add_yum_repo.result
@@ -0,0 +1,6 @@
+[cassandra]
+name=Apache Cassandra
+baseurl=https://downloads.apache.org/cassandra/redhat/40x/
+gpgcheck=1
+repo_gpgcheck=1
+gpgkey=https://downloads.apache.org/cassandra/KEYS
diff --git a/doc/modules/cassandra/examples/RESULTS/autoexpand_exclude_dc.result b/doc/modules/cassandra/examples/RESULTS/autoexpand_exclude_dc.result
new file mode 100644
index 00000000000..6d5a8a42244
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/autoexpand_exclude_dc.result
@@ -0,0 +1 @@
+CREATE KEYSPACE excalibur WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1': '3'} AND durable_writes = true;
diff --git a/doc/modules/cassandra/examples/RESULTS/autoexpand_ks.result b/doc/modules/cassandra/examples/RESULTS/autoexpand_ks.result
new file mode 100644
index 00000000000..fcc8855e4be
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/autoexpand_ks.result
@@ -0,0 +1 @@
+CREATE KEYSPACE excalibur WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1': '3', 'DC2': '3'} AND durable_writes = true;
diff --git a/doc/modules/cassandra/examples/RESULTS/autoexpand_ks_override.result b/doc/modules/cassandra/examples/RESULTS/autoexpand_ks_override.result
new file mode 100644
index 00000000000..b76189dcede
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/autoexpand_ks_override.result
@@ -0,0 +1 @@
+CREATE KEYSPACE excalibur WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1': '3', 'DC2': '2'} AND durable_writes = true;
diff --git a/doc/modules/cassandra/examples/RESULTS/cqlsh_localhost.result b/doc/modules/cassandra/examples/RESULTS/cqlsh_localhost.result
new file mode 100644
index 00000000000..b5a19082d6b
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/cqlsh_localhost.result
@@ -0,0 +1,11 @@
+Connected to Test Cluster at localhost:9042.
+[cqlsh 5.0.1 | Cassandra 3.8 | CQL spec 3.4.2 | Native protocol v4]
+Use HELP for help.
+cqlsh> SELECT cluster_name, listen_address FROM system.local;
+
+ cluster_name | listen_address
+--------------+----------------
+ Test Cluster | 127.0.0.1
+
+(1 rows)
+cqlsh>
diff --git a/doc/modules/cassandra/examples/RESULTS/find_backups.result b/doc/modules/cassandra/examples/RESULTS/find_backups.result
new file mode 100644
index 00000000000..156b5694a09
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/find_backups.result
@@ -0,0 +1,4 @@
+./cassandra/data/data/cqlkeyspace/t-d132e240c21711e9bbee19821dcea330/backups
+./cassandra/data/data/cqlkeyspace/t2-d993a390c22911e9b1350d927649052c/backups
+./cassandra/data/data/catalogkeyspace/journal-296a2d30c22a11e9b1350d927649052c/backups
+./cassandra/data/data/catalogkeyspace/magazine-446eae30c22a11e9b1350d927649052c/backups
diff --git a/doc/modules/cassandra/examples/RESULTS/find_backups_table.result b/doc/modules/cassandra/examples/RESULTS/find_backups_table.result
new file mode 100644
index 00000000000..7e01fa617b5
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/find_backups_table.result
@@ -0,0 +1 @@
+./cassandra/data/data/cqlkeyspace/t-d132e240c21711e9bbee19821dcea330/backups
diff --git a/doc/modules/cassandra/examples/RESULTS/find_two_snapshots.result b/doc/modules/cassandra/examples/RESULTS/find_two_snapshots.result
new file mode 100644
index 00000000000..9cfb693bd4b
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/find_two_snapshots.result
@@ -0,0 +1,3 @@
+total 0
+drwxrwxr-x. 2 ec2-user ec2-user 265 Aug 19 02:44 catalog-ks
+drwxrwxr-x. 2 ec2-user ec2-user 265 Aug 19 02:52 multi-ks
diff --git a/doc/modules/cassandra/examples/RESULTS/flush_and_check.result b/doc/modules/cassandra/examples/RESULTS/flush_and_check.result
new file mode 100644
index 00000000000..33863adf23c
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/flush_and_check.result
@@ -0,0 +1,9 @@
+total 36
+-rw-rw-r--. 2 ec2-user ec2-user 47 Aug 19 00:32 na-1-big-CompressionInfo.db
+-rw-rw-r--. 2 ec2-user ec2-user 43 Aug 19 00:32 na-1-big-Data.db
+-rw-rw-r--. 2 ec2-user ec2-user 10 Aug 19 00:32 na-1-big-Digest.crc32
+-rw-rw-r--. 2 ec2-user ec2-user 16 Aug 19 00:32 na-1-big-Filter.db
+-rw-rw-r--. 2 ec2-user ec2-user 8 Aug 19 00:32 na-1-big-Index.db
+-rw-rw-r--. 2 ec2-user ec2-user 4673 Aug 19 00:32 na-1-big-Statistics.db
+-rw-rw-r--. 2 ec2-user ec2-user 56 Aug 19 00:32 na-1-big-Summary.db
+-rw-rw-r--. 2 ec2-user ec2-user 92 Aug 19 00:32 na-1-big-TOC.txt
diff --git a/doc/modules/cassandra/examples/RESULTS/flush_and_check2.result b/doc/modules/cassandra/examples/RESULTS/flush_and_check2.result
new file mode 100644
index 00000000000..d89b99126fa
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/flush_and_check2.result
@@ -0,0 +1,17 @@
+total 72
+-rw-rw-r--. 2 ec2-user ec2-user 47 Aug 19 00:32 na-1-big-CompressionInfo.db
+-rw-rw-r--. 2 ec2-user ec2-user 43 Aug 19 00:32 na-1-big-Data.db
+-rw-rw-r--. 2 ec2-user ec2-user 10 Aug 19 00:32 na-1-big-Digest.crc32
+-rw-rw-r--. 2 ec2-user ec2-user 16 Aug 19 00:32 na-1-big-Filter.db
+-rw-rw-r--. 2 ec2-user ec2-user 8 Aug 19 00:32 na-1-big-Index.db
+-rw-rw-r--. 2 ec2-user ec2-user 4673 Aug 19 00:32 na-1-big-Statistics.db
+-rw-rw-r--. 2 ec2-user ec2-user 56 Aug 19 00:32 na-1-big-Summary.db
+-rw-rw-r--. 2 ec2-user ec2-user 92 Aug 19 00:32 na-1-big-TOC.txt
+-rw-rw-r--. 2 ec2-user ec2-user 47 Aug 19 00:35 na-2-big-CompressionInfo.db
+-rw-rw-r--. 2 ec2-user ec2-user 41 Aug 19 00:35 na-2-big-Data.db
+-rw-rw-r--. 2 ec2-user ec2-user 10 Aug 19 00:35 na-2-big-Digest.crc32
+-rw-rw-r--. 2 ec2-user ec2-user 16 Aug 19 00:35 na-2-big-Filter.db
+-rw-rw-r--. 2 ec2-user ec2-user 8 Aug 19 00:35 na-2-big-Index.db
+-rw-rw-r--. 2 ec2-user ec2-user 4673 Aug 19 00:35 na-2-big-Statistics.db
+-rw-rw-r--. 2 ec2-user ec2-user 56 Aug 19 00:35 na-2-big-Summary.db
+-rw-rw-r--. 2 ec2-user ec2-user 92 Aug 19 00:35 na-2-big-TOC.txt
diff --git a/doc/modules/cassandra/examples/RESULTS/insert_data2_backup.result b/doc/modules/cassandra/examples/RESULTS/insert_data2_backup.result
new file mode 100644
index 00000000000..23e3902d20c
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/insert_data2_backup.result
@@ -0,0 +1,13 @@
+id | name | publisher
+----+---------------------------+------------------
+ 1 | Couchbase Magazine | Couchbase
+ 0 | Apache Cassandra Magazine | Apache Cassandra
+
+ (2 rows)
+
+id | name | publisher
+----+---------------------------+------------------
+ 1 | Couchbase Magazine | Couchbase
+ 0 | Apache Cassandra Magazine | Apache Cassandra
+
+ (2 rows)
diff --git a/doc/modules/cassandra/examples/RESULTS/insert_table_cc_addl.result b/doc/modules/cassandra/examples/RESULTS/insert_table_cc_addl.result
new file mode 100644
index 00000000000..d9af0c6192e
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/insert_table_cc_addl.result
@@ -0,0 +1,9 @@
+ a | b | c | d
+---+---+---+---
+ 1 | 1 | 4 | 4
+ 0 | 0 | 0 | 9 <1>
+ 0 | 0 | 1 | 1
+ 0 | 1 | 2 | 2
+ 0 | 1 | 3 | 3
+
+(5 rows)
diff --git a/doc/modules/cassandra/examples/RESULTS/java_verify.result b/doc/modules/cassandra/examples/RESULTS/java_verify.result
new file mode 100644
index 00000000000..3ea962560c1
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/java_verify.result
@@ -0,0 +1,3 @@
+openjdk version "1.8.0_222"
+OpenJDK Runtime Environment (build 1.8.0_222-8u222-b10-1ubuntu1~16.04.1-b10)
+OpenJDK 64-Bit Server VM (build 25.222-b10, mixed mode)
diff --git a/doc/modules/cassandra/examples/RESULTS/no_bups.result b/doc/modules/cassandra/examples/RESULTS/no_bups.result
new file mode 100644
index 00000000000..92811047f08
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/no_bups.result
@@ -0,0 +1 @@
+total 0
diff --git a/doc/modules/cassandra/examples/RESULTS/nodetool_list_snapshots.result b/doc/modules/cassandra/examples/RESULTS/nodetool_list_snapshots.result
new file mode 100644
index 00000000000..15503eded91
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/nodetool_list_snapshots.result
@@ -0,0 +1,13 @@
+Snapshot Details:
+Snapshot name Keyspace name Column family name True size Size on disk
+multi-table cqlkeyspace t2 4.86 KiB 5.67 KiB
+multi-table cqlkeyspace t 4.89 KiB 5.7 KiB
+multi-ks cqlkeyspace t 4.89 KiB 5.7 KiB
+multi-ks catalogkeyspace journal 4.9 KiB 5.73 KiB
+magazine catalogkeyspace magazine 4.9 KiB 5.73 KiB
+multi-table-2 cqlkeyspace t2 4.86 KiB 5.67 KiB
+multi-table-2 cqlkeyspace t 4.89 KiB 5.7 KiB
+catalog-ks catalogkeyspace journal 4.9 KiB 5.73 KiB
+catalog-ks catalogkeyspace magazine 4.9 KiB 5.73 KiB
+
+Total TrueDiskSpaceUsed: 44.02 KiB
diff --git a/doc/modules/cassandra/examples/RESULTS/nodetool_snapshot_help.result b/doc/modules/cassandra/examples/RESULTS/nodetool_snapshot_help.result
new file mode 100644
index 00000000000..a58360872a2
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/nodetool_snapshot_help.result
@@ -0,0 +1,54 @@
+NAME
+ nodetool snapshot - Take a snapshot of specified keyspaces or a snapshot
+ of the specified table
+
+SYNOPSIS
+ nodetool [(-h | --host )] [(-p | --port )]
+ [(-pp | --print-port)] [(-pw | --password )]
+ [(-pwf | --password-file )]
+ [(-u | --username )] snapshot
+ [(-cf | --column-family | --table )]
+ [(-kt | --kt-list | -kc | --kc.list )]
+ [(-sf | --skip-flush)] [(-t | --tag )] [--] []
+
+OPTIONS
+ -cf , --column-family , --table
+ The table name (you must specify one and only one keyspace for using
+ this option)
+
+ -h , --host
+ Node hostname or ip address
+
+ -kt , --kt-list , -kc , --kc.list
+ The list of Keyspace.table to take snapshot.(you must not specify
+ only keyspace)
+
+ -p , --port
+ Remote jmx agent port number
+
+ -pp, --print-port
+ Operate in 4.0 mode with hosts disambiguated by port number
+
+ -pw , --password
+ Remote jmx agent password
+
+ -pwf , --password-file
+ Path to the JMX password file
+
+ -sf, --skip-flush
+ Do not flush memtables before snapshotting (snapshot will not
+ contain unflushed data)
+
+ -t , --tag
+ The name of the snapshot
+
+ -u , --username
+ Remote jmx agent username
+
+ --
+ This option can be used to separate command-line options from the
+ list of argument, (useful when arguments might be mistaken for
+ command-line options
+
+ []
+ List of keyspaces. By default, all keyspaces
diff --git a/doc/modules/cassandra/examples/RESULTS/select_data2_backup.result b/doc/modules/cassandra/examples/RESULTS/select_data2_backup.result
new file mode 100644
index 00000000000..23e3902d20c
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/select_data2_backup.result
@@ -0,0 +1,13 @@
+id | name | publisher
+----+---------------------------+------------------
+ 1 | Couchbase Magazine | Couchbase
+ 0 | Apache Cassandra Magazine | Apache Cassandra
+
+ (2 rows)
+
+id | name | publisher
+----+---------------------------+------------------
+ 1 | Couchbase Magazine | Couchbase
+ 0 | Apache Cassandra Magazine | Apache Cassandra
+
+ (2 rows)
diff --git a/doc/modules/cassandra/examples/RESULTS/select_data_backup.result b/doc/modules/cassandra/examples/RESULTS/select_data_backup.result
new file mode 100644
index 00000000000..5d6a9e33bce
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/select_data_backup.result
@@ -0,0 +1,15 @@
+id | k | v
+----+---+------
+ 1 | 1 | val1
+ 0 | 0 | val0
+
+ (2 rows)
+
+
+id | k | v
+----+---+------
+ 1 | 1 | val1
+ 0 | 0 | val0
+ 2 | 2 | val2
+
+ (3 rows)
diff --git a/doc/modules/cassandra/examples/RESULTS/select_range.result b/doc/modules/cassandra/examples/RESULTS/select_range.result
new file mode 100644
index 00000000000..a3d1c765144
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/select_range.result
@@ -0,0 +1,6 @@
+ a | b | c | d
+---+---+---+---
+ 0 | 1 | 2 | 2
+ 0 | 1 | 3 | 3
+
+(2 rows)
diff --git a/doc/modules/cassandra/examples/RESULTS/select_static_data.result b/doc/modules/cassandra/examples/RESULTS/select_static_data.result
new file mode 100644
index 00000000000..f1e8decde8c
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/select_static_data.result
@@ -0,0 +1,4 @@
+ pk | t | v | s
+ ----+---+--------+-----------
+ 0 | 0 | 'val0' | 'static1'
+ 0 | 1 | 'val1' | 'static1'
diff --git a/doc/modules/cassandra/examples/RESULTS/select_table_clustercolumn.result b/doc/modules/cassandra/examples/RESULTS/select_table_clustercolumn.result
new file mode 100644
index 00000000000..1d3899db929
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/select_table_clustercolumn.result
@@ -0,0 +1,9 @@
+ a | b | c | d
+---+---+---+---
+ 1 | 1 | 4 | 4 <1>
+ 0 | 0 | 0 | 0
+ 0 | 0 | 1 | 1
+ 0 | 1 | 2 | 2
+ 0 | 1 | 3 | 3
+
+(5 rows)
diff --git a/doc/modules/cassandra/examples/RESULTS/select_table_compound_pk.result b/doc/modules/cassandra/examples/RESULTS/select_table_compound_pk.result
new file mode 100644
index 00000000000..d098516b112
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/select_table_compound_pk.result
@@ -0,0 +1,9 @@
+ a | b | c | d
+---+---+---+---
+ 0 | 0 | 0 | 0 <1>
+ 0 | 0 | 1 | 1
+ 0 | 1 | 2 | 2 <2>
+ 0 | 1 | 3 | 3
+ 1 | 1 | 4 | 4 <3>
+
+(5 rows)
diff --git a/doc/modules/cassandra/examples/RESULTS/snapshot_all.result b/doc/modules/cassandra/examples/RESULTS/snapshot_all.result
new file mode 100644
index 00000000000..6ec55a023ca
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/snapshot_all.result
@@ -0,0 +1,4 @@
+./cassandra/data/data/cqlkeyspace/t-d132e240c21711e9bbee19821dcea330/snapshots
+./cassandra/data/data/cqlkeyspace/t2-d993a390c22911e9b1350d927649052c/snapshots
+./cassandra/data/data/catalogkeyspace/journal-296a2d30c22a11e9b1350d927649052c/snapshots
+./cassandra/data/data/catalogkeyspace/magazine-446eae30c22a11e9b1350d927649052c/snapshots
diff --git a/doc/modules/cassandra/examples/RESULTS/snapshot_backup2.result b/doc/modules/cassandra/examples/RESULTS/snapshot_backup2.result
new file mode 100644
index 00000000000..8276d520394
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/snapshot_backup2.result
@@ -0,0 +1,3 @@
+Requested creating snapshot(s) for [catalogkeyspace] with snapshot name [catalog-ks] and
+options {skipFlush=false}
+Snapshot directory: catalog-ks
diff --git a/doc/modules/cassandra/examples/RESULTS/snapshot_backup2_find.result b/doc/modules/cassandra/examples/RESULTS/snapshot_backup2_find.result
new file mode 100644
index 00000000000..88b54997689
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/snapshot_backup2_find.result
@@ -0,0 +1,2 @@
+./cassandra/data/data/catalogkeyspace/journal-296a2d30c22a11e9b1350d927649052c/snapshots
+./cassandra/data/data/catalogkeyspace/magazine-446eae30c22a11e9b1350d927649052c/snapshots
diff --git a/doc/modules/cassandra/examples/RESULTS/snapshot_files.result b/doc/modules/cassandra/examples/RESULTS/snapshot_files.result
new file mode 100644
index 00000000000..8dd91b5ce80
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/snapshot_files.result
@@ -0,0 +1,11 @@
+total 44
+-rw-rw-r--. 1 ec2-user ec2-user 31 Aug 19 02:44 manifest.jsonZ
+-rw-rw-r--. 4 ec2-user ec2-user 47 Aug 19 02:38 na-1-big-CompressionInfo.db
+-rw-rw-r--. 4 ec2-user ec2-user 97 Aug 19 02:38 na-1-big-Data.db
+-rw-rw-r--. 4 ec2-user ec2-user 10 Aug 19 02:38 na-1-big-Digest.crc32
+-rw-rw-r--. 4 ec2-user ec2-user 16 Aug 19 02:38 na-1-big-Filter.db
+-rw-rw-r--. 4 ec2-user ec2-user 16 Aug 19 02:38 na-1-big-Index.db
+-rw-rw-r--. 4 ec2-user ec2-user 4687 Aug 19 02:38 na-1-big-Statistics.db
+-rw-rw-r--. 4 ec2-user ec2-user 56 Aug 19 02:38 na-1-big-Summary.db
+-rw-rw-r--. 4 ec2-user ec2-user 92 Aug 19 02:38 na-1-big-TOC.txt
+-rw-rw-r--. 1 ec2-user ec2-user 814 Aug 19 02:44 schema.cql
diff --git a/doc/modules/cassandra/examples/RESULTS/snapshot_mult_ks.result b/doc/modules/cassandra/examples/RESULTS/snapshot_mult_ks.result
new file mode 100644
index 00000000000..61dff939e27
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/snapshot_mult_ks.result
@@ -0,0 +1,3 @@
+Requested creating snapshot(s) for [catalogkeyspace.journal,cqlkeyspace.t] with snapshot
+name [multi-ks] and options {skipFlush=false}
+Snapshot directory: multi-ks
diff --git a/doc/modules/cassandra/examples/RESULTS/snapshot_mult_tables.result b/doc/modules/cassandra/examples/RESULTS/snapshot_mult_tables.result
new file mode 100644
index 00000000000..557a6a488c3
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/snapshot_mult_tables.result
@@ -0,0 +1,3 @@
+Requested creating snapshot(s) for ["CQLKeyspace".t,"CQLKeyspace".t2] with snapshot name [multi-
+table] and options {skipFlush=false}
+Snapshot directory: multi-table
diff --git a/doc/modules/cassandra/examples/RESULTS/snapshot_mult_tables_again.result b/doc/modules/cassandra/examples/RESULTS/snapshot_mult_tables_again.result
new file mode 100644
index 00000000000..6c09e71e908
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/snapshot_mult_tables_again.result
@@ -0,0 +1,3 @@
+Requested creating snapshot(s) for ["CQLKeyspace".t,"CQLKeyspace".t2] with snapshot name [multi-
+table-2] and options {skipFlush=false}
+Snapshot directory: multi-table-2
diff --git a/doc/modules/cassandra/examples/RESULTS/snapshot_one_table2.result b/doc/modules/cassandra/examples/RESULTS/snapshot_one_table2.result
new file mode 100644
index 00000000000..c147889242c
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/snapshot_one_table2.result
@@ -0,0 +1,3 @@
+Requested creating snapshot(s) for [catalogkeyspace] with snapshot name [magazine] and
+options {skipFlush=false}
+Snapshot directory: magazine
diff --git a/doc/modules/cassandra/examples/RESULTS/tail_syslog.result b/doc/modules/cassandra/examples/RESULTS/tail_syslog.result
new file mode 100644
index 00000000000..cb32dc04388
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/tail_syslog.result
@@ -0,0 +1 @@
+INFO [main] 2019-12-17 03:03:37,526 Server.java:156 - Starting listening for CQL clients on localhost/127.0.0.1:9042 (unencrypted)...
diff --git a/doc/modules/cassandra/examples/RESULTS/verify_gpg.result b/doc/modules/cassandra/examples/RESULTS/verify_gpg.result
new file mode 100644
index 00000000000..443e23d3786
--- /dev/null
+++ b/doc/modules/cassandra/examples/RESULTS/verify_gpg.result
@@ -0,0 +1,2 @@
+apache-cassandra-4.0.0-bin.tar.gz: 28757DDE 589F7041 0F9A6A95 C39EE7E6
+ CDE63440 E2B06B91 AE6B2006 14FA364D
diff --git a/doc/modules/cassandra/examples/TEXT/tarball_install_dirs.txt b/doc/modules/cassandra/examples/TEXT/tarball_install_dirs.txt
new file mode 100644
index 00000000000..99b1a148749
--- /dev/null
+++ b/doc/modules/cassandra/examples/TEXT/tarball_install_dirs.txt
@@ -0,0 +1,11 @@
+/
+ bin/ <1>
+ conf/ <2>
+ data/ <3>
+ doc/
+ interface/
+ javadoc/
+ lib/
+ logs/ <4>
+ pylib/
+ tools/ <5>
diff --git a/doc/modules/cassandra/examples/YAML/auto_snapshot.yaml b/doc/modules/cassandra/examples/YAML/auto_snapshot.yaml
new file mode 100644
index 00000000000..8f5033df4e8
--- /dev/null
+++ b/doc/modules/cassandra/examples/YAML/auto_snapshot.yaml
@@ -0,0 +1 @@
+auto_snapshot: false
diff --git a/doc/modules/cassandra/examples/YAML/incremental_bups.yaml b/doc/modules/cassandra/examples/YAML/incremental_bups.yaml
new file mode 100644
index 00000000000..95fccdb1895
--- /dev/null
+++ b/doc/modules/cassandra/examples/YAML/incremental_bups.yaml
@@ -0,0 +1 @@
+incremental_backups: true
diff --git a/doc/modules/cassandra/examples/YAML/snapshot_before_compaction.yaml b/doc/modules/cassandra/examples/YAML/snapshot_before_compaction.yaml
new file mode 100644
index 00000000000..4ee1b17a6bc
--- /dev/null
+++ b/doc/modules/cassandra/examples/YAML/snapshot_before_compaction.yaml
@@ -0,0 +1 @@
+snapshot_before_compaction: false
diff --git a/doc/source/tools/stress-example.yaml b/doc/modules/cassandra/examples/YAML/stress-example.yaml
similarity index 63%
rename from doc/source/tools/stress-example.yaml
rename to doc/modules/cassandra/examples/YAML/stress-example.yaml
index 4a671028174..17161af27e1 100644
--- a/doc/source/tools/stress-example.yaml
+++ b/doc/modules/cassandra/examples/YAML/stress-example.yaml
@@ -1,21 +1,3 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
spacenam: example # idenitifier for this spec if running with multiple yaml files
keyspace: example
diff --git a/doc/source/tools/stress-lwt-example.yaml b/doc/modules/cassandra/examples/YAML/stress-lwt-example.yaml
similarity index 73%
rename from doc/source/tools/stress-lwt-example.yaml
rename to doc/modules/cassandra/examples/YAML/stress-lwt-example.yaml
index 1f12c2491e6..fc5db08145a 100644
--- a/doc/source/tools/stress-lwt-example.yaml
+++ b/doc/modules/cassandra/examples/YAML/stress-lwt-example.yaml
@@ -1,21 +1,3 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
# Keyspace Name
keyspace: stresscql
diff --git a/doc/modules/cassandra/nav.adoc b/doc/modules/cassandra/nav.adoc
new file mode 100644
index 00000000000..21880069c79
--- /dev/null
+++ b/doc/modules/cassandra/nav.adoc
@@ -0,0 +1,106 @@
+* Cassandra
+** xref:getting_started/index.adoc[Getting Started]
+*** xref:getting_started/installing.adoc[Installing Cassandra]
+*** xref:getting_started/configuring.adoc[Configuring Cassandra]
+*** xref:getting_started/querying.adoc[Inserting and querying]
+*** xref:getting_started/drivers.adoc[Client drivers]
+*** xref:getting_started/production.adoc[Production recommendations]
+
+** xref:new/index.adoc[What's new]
+*** xref:new/java11.adoc[Support for Java 11]
+*** xref:new/virtualtables.adoc[Virtual tables]
+*** xref:new/auditlogging.adoc[Audit logging]
+*** xref:new/fqllogging.adoc[Full query logging]
+*** xref:new/messaging.adoc[Improved internode Messaging]
+*** xref:new/streaming.adoc[Improved streaming]
+*** xref:new/transientreplication.adoc[Transient replication]
+
+** xref:architecture/index.adoc[Architecture]
+*** xref:architecture/overview.adoc[Overview]
+*** xref:architecture/dynamo.adoc[Dynamo]
+*** xref:architecture/storage_engine.adoc[Storage engine]
+*** xref:architecture/guarantees.adoc[Guarantees]
+
+** xref:data_modeling/index.adoc[Data modeling]
+*** xref:data_modeling/intro.adoc[Introduction]
+*** xref:data_modeling/data_modeling_conceptual.adoc[Conceptual data modeling]
+*** xref:data_modeling/data_modeling_rdbms.adoc[RDBMS design]
+*** xref:data_modeling/data_modeling_queries.adoc[Defining application queries]
+*** xref:data_modeling/data_modeling_logical.adoc[Logical data modeling]
+*** xref:data_modeling/data_modeling_physical.adoc[Physical data modeling]
+*** xref:data_modeling/data_modeling_refining.adoc[Evaluating and refining data models]
+*** xref:data_modeling/data_modeling_schema.adoc[Defining database schema]
+*** xref:data_modeling/data_modeling_tools.adoc[Cassandra data modeling tools]
+
+** xref:cql/index.adoc[Cassandra Query Language (CQL)]
+*** xref:cql/definitions.adoc[Definitions]
+*** xref:cql/types.adoc[Data types]
+*** xref:cql/ddl.adoc[Data definition (DDL)]
+*** xref:cql/dml.adoc[Data manipulation (DML)]
+*** xref:cql/operators.adoc[Operators]
+*** xref:cql/indexes.adoc[Secondary indexes]
+*** xref:cql/mvs.adoc[Materialized views]
+*** xref:cql/functions.adoc[Functions]
+*** xref:cql/json.adoc[JSON]
+*** xref:cql/security.adoc[Security]
+*** xref:cql/triggers.adoc[Triggers]
+*** xref:cql/appendices.adoc[Appendices]
+*** xref:cql/changes.adoc[Changes]
+*** xref:cql/SASI.adoc[SASI]
+*** xref:cql/cql_singlefile.adoc[Single file of CQL information]
+
+** xref:configuration/index.adoc[Configuration]
+*** xref:configuration/cass_yaml_file.adoc[cassandra.yaml]
+*** xref:configuration/cass_rackdc_file.adoc[cassandra-rackdc.properties]
+*** xref:configuration/cass_env_sh_file.adoc[cassandra-env.sh]
+*** xref:configuration/cass_topo_file.adoc[cassandra-topologies.properties]
+*** xref:configuration/cass_cl_archive_file.adoc[commitlog-archiving.properties]
+*** xref:configuration/cass_logback_xml_file.adoc[logback.xml]
+*** xref:configuration/cass_jvm_options_file.adoc[jvm-* files]
+
+** xref:operating/index.adoc[Operating]
+*** xref:operating/snitch.adoc[Snitches]
+*** xref:operating/topo_changes.adoc[Topology changes]
+*** xref:operating/repair.adoc[Repair]
+*** xref:operating/read_repair.adoc[Read repair]
+*** xref:operating/hints.adoc[Hints]
+*** xref:operating/bloom_filters.adoc[Bloom filters]
+*** xref:operating/compression.adoc[Compression]
+*** xref:operating/cdc.adoc[Change Data Capture (CDC)]
+*** xref:operating/backups.adoc[Backups]
+*** xref:operating/bulk_loading.adoc[Bulk loading]
+*** xref:operating/metrics.adoc[Metrics]
+*** xref:operating/security.adoc[Security]
+*** xref:operating/hardware.adoc[Hardware]
+*** xref:operating/audit_logging.adoc[Audit logging]
+*** xref:operating/compaction/index.adoc[Compaction]
+
+** xref:tools/index.adoc[Tools]
+*** xref:tools/cqlsh.adoc[cqlsh: the CQL shell]
+*** xref:tools/nodetool/nodetool.adoc[nodetool]
+*** xref:tools/sstable/index.adoc[SSTable tools]
+*** xref:tools/cassandra_stress.adoc[cassandra-stress]
+
+** xref:troubleshooting/index.adoc[Troubleshooting]
+*** xref:troubleshooting/finding_nodes.adoc[Finding misbehaving nodes]
+*** xref:troubleshooting/reading_logs.adoc[Reading Cassandra logs]
+*** xref:troubleshooting/use_nodetool.adoc[Using nodetool]
+*** xref:troubleshooting/use_tools.adoc[Using external tools to deep-dive]
+
+** xref:master@_:ROOT:development/index.adoc[Development]
+*** xref:master@_:ROOT:development/gettingstarted.adoc[Getting started]
+*** xref:master@_:ROOT:development/ide.adoc[Building and IDE integration]
+*** xref:master@_:ROOT:development/testing.adoc[Testing]
+*** xref:master@_:ROOT:development/patches.adoc[Contributing code changes]
+*** xref:master@_:ROOT:development/code_style.adoc[Code style]
+*** xref:master@_:ROOT:development/how_to_review.adoc[Review checklist]
+*** xref:master@_:ROOT:development/how_to_commit.adoc[How to commit]
+*** xref:master@_:ROOT:development/documentation.adoc[Working on documentation]
+*** xref:master@_:ROOT:development/ci.adoc[Jenkins CI environment]
+*** xref:master@_:ROOT:development/dependencies.adoc[Dependency management]
+*** xref:master@_:ROOT:development/release_process.adoc[Release process]
+
+** xref:faq/index.adoc[FAQ]
+
+** xref:plugins/index.adoc[Plug-ins]
+
diff --git a/doc/modules/cassandra/pages/architecture/dynamo.adoc b/doc/modules/cassandra/pages/architecture/dynamo.adoc
new file mode 100644
index 00000000000..e90390a7cbb
--- /dev/null
+++ b/doc/modules/cassandra/pages/architecture/dynamo.adoc
@@ -0,0 +1,531 @@
+= Dynamo
+
+Apache Cassandra relies on a number of techniques from Amazon's
+http://courses.cse.tamu.edu/caverlee/csce438/readings/dynamo-paper.pdf[Dynamo]
+distributed storage key-value system. Each node in the Dynamo system has
+three main components:
+
+* Request coordination over a partitioned dataset
+* Ring membership and failure detection
+* A local persistence (storage) engine
+
+Cassandra primarily draws from the first two clustering components,
+while using a storage engine based on a Log Structured Merge Tree
+(http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.44.2782&rep=rep1&type=pdf[LSM]).
+In particular, Cassandra relies on Dynamo style:
+
+* Dataset partitioning using consistent hashing
+* Multi-master replication using versioned data and tunable consistency
+* Distributed cluster membership and failure detection via a gossip
+protocol
+* Incremental scale-out on commodity hardware
+
+Cassandra was designed this way to meet large-scale (PiB+)
+business-critical storage requirements. In particular, as applications
+demanded full global replication of petabyte scale datasets along with
+always available low-latency reads and writes, it became imperative to
+design a new kind of database model as the relational database systems
+of the time struggled to meet the new requirements of global scale
+applications.
+
+== Dataset Partitioning: Consistent Hashing
+
+Cassandra achieves horizontal scalability by
+https://en.wikipedia.org/wiki/Partition_(database)[partitioning] all
+data stored in the system using a hash function. Each partition is
+replicated to multiple physical nodes, often across failure domains such
+as racks and even datacenters. As every replica can independently accept
+mutations to every key that it owns, every key must be versioned. Unlike
+in the original Dynamo paper where deterministic versions and vector
+clocks were used to reconcile concurrent updates to a key, Cassandra
+uses a simpler last write wins model where every mutation is timestamped
+(including deletes) and then the latest version of data is the "winning"
+value. Formally speaking, Cassandra uses a Last-Write-Wins Element-Set
+conflict-free replicated data type for each CQL row, or
+https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type LWW-Element-Set_(Last-Write-Wins-Element-Set)[LWW-Element-Set
+CRDT], to resolve conflicting mutations on replica sets.
+
+=== Consistent Hashing using a Token Ring
+
+Cassandra partitions data over storage nodes using a special form of
+hashing called
+https://en.wikipedia.org/wiki/Consistent_hashing[consistent hashing]. In
+naive data hashing, you typically allocate keys to buckets by taking a
+hash of the key modulo the number of buckets. For example, if you want
+to distribute data to 100 nodes using naive hashing you might assign
+every node to a bucket between 0 and 100, hash the input key modulo 100,
+and store the data on the associated bucket. In this naive scheme,
+however, adding a single node might invalidate almost all of the
+mappings.
+
+Cassandra instead maps every node to one or more tokens on a continuous
+hash ring, and defines ownership by hashing a key onto the ring and then
+"walking" the ring in one direction, similar to the
+https://pdos.csail.mit.edu/papers/chord:sigcomm01/chord_sigcomm.pdf[Chord]
+algorithm. The main difference of consistent hashing to naive data
+hashing is that when the number of nodes (buckets) to hash into changes,
+consistent hashing only has to move a small fraction of the keys.
+
+For example, if we have an eight node cluster with evenly spaced tokens,
+and a replication factor (RF) of 3, then to find the owning nodes for a
+key we first hash that key to generate a token (which is just the hash
+of the key), and then we "walk" the ring in a clockwise fashion until we
+encounter three distinct nodes, at which point we have found all the
+replicas of that key. This example of an eight node cluster with
+gRF=3 can be visualized as follows:
+
+image::ring.svg[image]
+
+You can see that in a Dynamo like system, ranges of keys, also known as
+*token ranges*, map to the same physical set of nodes. In this example,
+all keys that fall in the token range excluding token 1 and including
+token 2 (grange(t1, t2]) are stored on nodes 2, 3 and 4.
+
+=== Multiple Tokens per Physical Node (vnodes)
+
+Simple single token consistent hashing works well if you have many
+physical nodes to spread data over, but with evenly spaced tokens and a
+small number of physical nodes, incremental scaling (adding just a few
+nodes of capacity) is difficult because there are no token selections
+for new nodes that can leave the ring balanced. Cassandra seeks to avoid
+token imbalance because uneven token ranges lead to uneven request load.
+For example, in the previous example there is no way to add a ninth
+token without causing imbalance; instead we would have to insert `8`
+tokens in the midpoints of the existing ranges.
+
+The Dynamo paper advocates for the use of "virtual nodes" to solve this
+imbalance problem. Virtual nodes solve the problem by assigning multiple
+tokens in the token ring to each physical node. By allowing a single
+physical node to take multiple positions in the ring, we can make small
+clusters look larger and therefore even with a single physical node
+addition we can make it look like we added many more nodes, effectively
+taking many smaller pieces of data from more ring neighbors when we add
+even a single node.
+
+Cassandra introduces some nomenclature to handle these concepts:
+
+* *Token*: A single position on the dynamo style hash
+ring.
+* *Endpoint*: A single physical IP and port on the network.
+* *Host ID*: A unique identifier for a single "physical" node, usually
+present at one gEndpoint and containing one or more
+gTokens.
+* *Virtual Node* (or *vnode*): A gToken on the hash ring
+owned by the same physical node, one with the same gHost
+ID.
+
+The mapping of *Tokens* to *Endpoints* gives rise to the *Token Map*
+where Cassandra keeps track of what ring positions map to which physical
+endpoints. For example, in the following figure we can represent an
+eight node cluster using only four physical nodes by assigning two
+tokens to every node:
+
+image::vnodes.svg[image]
+
+Multiple tokens per physical node provide the following benefits:
+
+[arabic]
+. When a new node is added it accepts approximately equal amounts of
+data from other nodes in the ring, resulting in equal distribution of
+data across the cluster.
+. When a node is decommissioned, it loses data roughly equally to other
+members of the ring, again keeping equal distribution of data across the
+cluster.
+. If a node becomes unavailable, query load (especially token aware
+query load), is evenly distributed across many other nodes.
+
+Multiple tokens, however, can also have disadvantages:
+
+[arabic]
+. Every token introduces up to `2 * (RF - 1)` additional neighbors on
+the token ring, which means that there are more combinations of node
+failures where we lose availability for a portion of the token ring. The
+more tokens you have,
+https://jolynch.github.io/pdf/cassandra-availability-virtual.pdf[the
+higher the probability of an outage].
+. Cluster-wide maintenance operations are often slowed. For example, as
+the number of tokens per node is increased, the number of discrete
+repair operations the cluster must do also increases.
+. Performance of operations that span token ranges could be affected.
+
+Note that in Cassandra `2.x`, the only token allocation algorithm
+available was picking random tokens, which meant that to keep balance
+the default number of tokens per node had to be quite high, at `256`.
+This had the effect of coupling many physical endpoints together,
+increasing the risk of unavailability. That is why in `3.x +` the new
+deterministic token allocator was added which intelligently picks tokens
+such that the ring is optimally balanced while requiring a much lower
+number of tokens per physical node.
+
+== Multi-master Replication: Versioned Data and Tunable Consistency
+
+Cassandra replicates every partition of data to many nodes across the
+cluster to maintain high availability and durability. When a mutation
+occurs, the coordinator hashes the partition key to determine the token
+range the data belongs to and then replicates the mutation to the
+replicas of that data according to the
+`Replication Strategy`.
+
+All replication strategies have the notion of a *replication factor*
+(`RF`), which indicates to Cassandra how many copies of the partition
+should exist. For example with a `RF=3` keyspace, the data will be
+written to three distinct *replicas*. Replicas are always chosen such
+that they are distinct physical nodes which is achieved by skipping
+virtual nodes if needed. Replication strategies may also choose to skip
+nodes present in the same failure domain such as racks or datacenters so
+that Cassandra clusters can tolerate failures of whole racks and even
+datacenters of nodes.
+
+=== Replication Strategy
+
+Cassandra supports pluggable *replication strategies*, which determine
+which physical nodes act as replicas for a given token range. Every
+keyspace of data has its own replication strategy. All production
+deployments should use the `NetworkTopologyStrategy` while the
+`SimpleStrategy` replication strategy is useful only for testing
+clusters where you do not yet know the datacenter layout of the cluster.
+
+[[network-topology-strategy]]
+==== `NetworkTopologyStrategy`
+
+`NetworkTopologyStrategy` requires a specified replication factor
+for each datacenter in the cluster. Even if your cluster only uses a
+single datacenter, `NetworkTopologyStrategy` is recommended over
+`SimpleStrategy` to make it easier to add new physical or virtual
+datacenters to the cluster later, if required.
+
+In addition to allowing the replication factor to be specified
+individually by datacenter, `NetworkTopologyStrategy` also attempts to
+choose replicas within a datacenter from different racks as specified by
+the `Snitch`. If the number of racks is greater than or equal
+to the replication factor for the datacenter, each replica is guaranteed
+to be chosen from a different rack. Otherwise, each rack will hold at
+least one replica, but some racks may hold more than one. Note that this
+rack-aware behavior has some potentially
+https://issues.apache.org/jira/browse/CASSANDRA-3810[surprising
+implications]. For example, if there are not an even number of nodes in
+each rack, the data load on the smallest rack may be much higher.
+Similarly, if a single node is bootstrapped into a brand new rack, it
+will be considered a replica for the entire ring. For this reason, many
+operators choose to configure all nodes in a single availability zone or
+similar failure domain as a single "rack".
+
+[[simple-strategy]]
+==== `SimpleStrategy`
+
+`SimpleStrategy` allows a single integer `replication_factor` to be
+defined. This determines the number of nodes that should contain a copy
+of each row. For example, if `replication_factor` is 3, then three
+different nodes should store a copy of each row.
+
+`SimpleStrategy` treats all nodes identically, ignoring any configured
+datacenters or racks. To determine the replicas for a token range,
+Cassandra iterates through the tokens in the ring, starting with the
+token range of interest. For each token, it checks whether the owning
+node has been added to the set of replicas, and if it has not, it is
+added to the set. This process continues until `replication_factor`
+distinct nodes have been added to the set of replicas.
+
+==== Transient Replication
+
+Transient replication is an experimental feature in Cassandra {40_version} not
+present in the original Dynamo paper. This feature allows configuration of a
+subset of replicas to replicate only data that hasn't been incrementally
+repaired. This configuration decouples data redundancy from availability.
+For instance, if you have a keyspace replicated at RF=3, and alter it to
+RF=5 with two transient replicas, you go from tolerating one
+failed replica to tolerating two, without corresponding
+increase in storage usage. Now, three nodes will replicate all
+the data for a given token range, and the other two will only replicate
+data that hasn't been incrementally repaired.
+
+To use transient replication, first enable the option in
+`cassandra.yaml`. Once enabled, both `SimpleStrategy` and
+`NetworkTopologyStrategy` can be configured to transiently replicate
+data. Configure it by specifying replication factor as
+`/` in the read path and
+`Hinted handoff ` in the write path.
+
+These techniques are only best-effort, however, and to guarantee
+eventual consistency Cassandra implements `anti-entropy
+repair ` where replicas calculate hierarchical hash-trees over
+their datasets called https://en.wikipedia.org/wiki/Merkle_tree[Merkle
+trees] that can then be compared across replicas to identify mismatched
+data. Like the original Dynamo paper Cassandra supports full repairs
+where replicas hash their entire dataset, create Merkle trees, send them
+to each other and sync any ranges that don't match.
+
+Unlike the original Dynamo paper, Cassandra also implements sub-range
+repair and incremental repair. Sub-range repair allows Cassandra to
+increase the resolution of the hash trees (potentially down to the
+single partition level) by creating a larger number of trees that span
+only a portion of the data range. Incremental repair allows Cassandra to
+only repair the partitions that have changed since the last repair.
+
+=== Tunable Consistency
+
+Cassandra supports a per-operation tradeoff between consistency and
+availability through *Consistency Levels*. Cassandra's consistency
+levels are a version of Dynamo's `R + W > N` consistency mechanism where
+operators could configure the number of nodes that must participate in
+reads (`R`) and writes (`W`) to be larger than the replication factor
+(`N`). In Cassandra, you instead choose from a menu of common
+consistency levels which allow the operator to pick `R` and `W` behavior
+without knowing the replication factor. Generally writes will be visible
+to subsequent reads when the read consistency level contains enough
+nodes to guarantee a quorum intersection with the write consistency
+level.
+
+The following consistency levels are available:
+
+`ONE`::
+ Only a single replica must respond.
+`TWO`::
+ Two replicas must respond.
+`THREE`::
+ Three replicas must respond.
+`QUORUM`::
+ A majority (n/2 + 1) of the replicas must respond.
+`ALL`::
+ All of the replicas must respond.
+`LOCAL_QUORUM`::
+ A majority of the replicas in the local datacenter (whichever
+ datacenter the coordinator is in) must respond.
+`EACH_QUORUM`::
+ A majority of the replicas in each datacenter must respond.
+`LOCAL_ONE`::
+ Only a single replica must respond. In a multi-datacenter cluster,
+ this also gaurantees that read requests are not sent to replicas in a
+ remote datacenter.
+`ANY`::
+ A single replica may respond, or the coordinator may store a hint. If
+ a hint is stored, the coordinator will later attempt to replay the
+ hint and deliver the mutation to the replicas. This consistency level
+ is only accepted for write operations.
+
+Write operations *are always sent to all replicas*, regardless of
+consistency level. The consistency level simply controls how many
+responses the coordinator waits for before responding to the client.
+
+For read operations, the coordinator generally only issues read commands
+to enough replicas to satisfy the consistency level. The one exception
+to this is when speculative retry may issue a redundant read request to
+an extra replica if the original replicas have not responded within a
+specified time window.
+
+==== Picking Consistency Levels
+
+It is common to pick read and write consistency levels such that the
+replica sets overlap, resulting in all acknowledged writes being visible
+to subsequent reads. This is typically expressed in the same terms
+Dynamo does, in that `W + R > RF`, where `W` is the write consistency
+level, `R` is the read consistency level, and `RF` is the replication
+factor. For example, if `RF = 3`, a `QUORUM` request will require
+responses from at least `2/3` replicas. If `QUORUM` is used for both
+writes and reads, at least one of the replicas is guaranteed to
+participate in _both_ the write and the read request, which in turn
+guarantees that the quorums will overlap and the write will be visible
+to the read.
+
+In a multi-datacenter environment, `LOCAL_QUORUM` can be used to provide
+a weaker but still useful guarantee: reads are guaranteed to see the
+latest write from within the same datacenter. This is often sufficient
+as clients homed to a single datacenter will read their own writes.
+
+If this type of strong consistency isn't required, lower consistency
+levels like `LOCAL_ONE` or `ONE` may be used to improve throughput,
+latency, and availability. With replication spanning multiple
+datacenters, `LOCAL_ONE` is typically less available than `ONE` but is
+faster as a rule. Indeed `ONE` will succeed if a single replica is
+available in any datacenter.
+
+== Distributed Cluster Membership and Failure Detection
+
+The replication protocols and dataset partitioning rely on knowing which
+nodes are alive and dead in the cluster so that write and read
+operations can be optimally routed. In Cassandra liveness information is
+shared in a distributed fashion through a failure detection mechanism
+based on a gossip protocol.
+
+=== Gossip
+
+Gossip is how Cassandra propagates basic cluster bootstrapping
+information such as endpoint membership and internode network protocol
+versions. In Cassandra's gossip system, nodes exchange state information
+not only about themselves but also about other nodes they know about.
+This information is versioned with a vector clock of
+`(generation, version)` tuples, where the generation is a monotonic
+timestamp and version is a logical clock the increments roughly every
+second. These logical clocks allow Cassandra gossip to ignore old
+versions of cluster state just by inspecting the logical clocks
+presented with gossip messages.
+
+Every node in the Cassandra cluster runs the gossip task independently
+and periodically. Every second, every node in the cluster:
+
+[arabic]
+. Updates the local node's heartbeat state (the version) and constructs
+the node's local view of the cluster gossip endpoint state.
+. Picks a random other node in the cluster to exchange gossip endpoint
+state with.
+. Probabilistically attempts to gossip with any unreachable nodes (if
+one exists)
+. Gossips with a seed node if that didn't happen in step 2.
+
+When an operator first bootstraps a Cassandra cluster they designate
+certain nodes as seed nodes. Any node can be a seed node and the only
+difference between seed and non-seed nodes is seed nodes are allowed to
+bootstrap into the ring without seeing any other seed nodes.
+Furthermore, once a cluster is bootstrapped, seed nodes become
+hotspots for gossip due to step 4 above.
+
+As non-seed nodes must be able to contact at least one seed node in
+order to bootstrap into the cluster, it is common to include multiple
+seed nodes, often one for each rack or datacenter. Seed nodes are often
+chosen using existing off-the-shelf service discovery mechanisms.
+
+[NOTE]
+.Note
+====
+Nodes do not have to agree on the seed nodes, and indeed once a cluster
+is bootstrapped, newly launched nodes can be configured to use any
+existing nodes as seeds. The only advantage to picking the same nodes
+as seeds is it increases their usefullness as gossip hotspots.
+====
+
+Currently, gossip also propagates token metadata and schema
+_version_ information. This information forms the control plane for
+scheduling data movements and schema pulls. For example, if a node sees
+a mismatch in schema version in gossip state, it will schedule a schema
+sync task with the other nodes. As token information propagates via
+gossip it is also the control plane for teaching nodes which endpoints
+own what data.
+
+=== Ring Membership and Failure Detection
+
+Gossip forms the basis of ring membership, but the *failure detector*
+ultimately makes decisions about if nodes are `UP` or `DOWN`. Every node
+in Cassandra runs a variant of the
+https://www.computer.org/csdl/proceedings-article/srds/2004/22390066/12OmNvT2phv[Phi
+Accrual Failure Detector], in which every node is constantly making an
+independent decision of if their peer nodes are available or not. This
+decision is primarily based on received heartbeat state. For example, if
+a node does not see an increasing heartbeat from a node for a certain
+amount of time, the failure detector "convicts" that node, at which
+point Cassandra will stop routing reads to it (writes will typically be
+written to hints). If/when the node starts heartbeating again, Cassandra
+will try to reach out and connect, and if it can open communication
+channels it will mark that node as available.
+
+[NOTE]
+.Note
+====
+`UP` and `DOWN` state are local node decisions and are not propagated with
+gossip. Heartbeat state is propagated with gossip, but nodes will not
+consider each other as `UP` until they can successfully message each
+other over an actual network channel.
+====
+
+Cassandra will never remove a node from gossip state without
+explicit instruction from an operator via a decommission operation or a
+new node bootstrapping with a `replace_address_first_boot` option. This
+choice is intentional to allow Cassandra nodes to temporarily fail
+without causing data to needlessly re-balance. This also helps to
+prevent simultaneous range movements, where multiple replicas of a token
+range are moving at the same time, which can violate monotonic
+consistency and can even cause data loss.
+
+== Incremental Scale-out on Commodity Hardware
+
+Cassandra scales-out to meet the requirements of growth in data size and
+request rates. Scaling-out means adding additional nodes to the ring,
+and every additional node brings linear improvements in compute and
+storage. In contrast, scaling-up implies adding more capacity to the
+existing database nodes. Cassandra is also capable of scale-up, and in
+certain environments it may be preferable depending on the deployment.
+Cassandra gives operators the flexibility to chose either scale-out or
+scale-up.
+
+One key aspect of Dynamo that Cassandra follows is to attempt to run on
+commodity hardware, and many engineering choices are made under this
+assumption. For example, Cassandra assumes nodes can fail at any time,
+auto-tunes to make the best use of CPU and memory resources available
+and makes heavy use of advanced compression and caching techniques to
+get the most storage out of limited memory and storage capabilities.
+
+=== Simple Query Model
+
+Cassandra, like Dynamo, chooses not to provide cross-partition
+transactions that are common in SQL Relational Database Management
+Systems (RDBMS). This both gives the programmer a simpler read and write
+API, and allows Cassandra to more easily scale horizontally since
+multi-partition transactions spanning multiple nodes are notoriously
+difficult to implement and typically very latent.
+
+Instead, Cassanda chooses to offer fast, consistent, latency at any
+scale for single partition operations, allowing retrieval of entire
+partitions or only subsets of partitions based on primary key filters.
+Furthermore, Cassandra does support single partition compare and swap
+functionality via the lightweight transaction CQL API.
+
+=== Simple Interface for Storing Records
+
+Cassandra, in a slight departure from Dynamo, chooses a storage
+interface that is more sophisticated then "simple key value" stores but
+significantly less complex than SQL relational data models. Cassandra
+presents a wide-column store interface, where partitions of data contain
+multiple rows, each of which contains a flexible set of individually
+typed columns. Every row is uniquely identified by the partition key and
+one or more clustering keys, and every row can have as many columns as
+needed.
+
+This allows users to flexibly add new columns to existing datasets as
+new requirements surface. Schema changes involve only metadata changes
+and run fully concurrently with live workloads. Therefore, users can
+safely add columns to existing Cassandra databases while remaining
+confident that query performance will not degrade.
diff --git a/doc/modules/cassandra/pages/architecture/guarantees.adoc b/doc/modules/cassandra/pages/architecture/guarantees.adoc
new file mode 100644
index 00000000000..3313a1140cf
--- /dev/null
+++ b/doc/modules/cassandra/pages/architecture/guarantees.adoc
@@ -0,0 +1,108 @@
+= Guarantees
+
+Apache Cassandra is a highly scalable and reliable database. Cassandra
+is used in web based applications that serve large number of clients and
+the quantity of data processed is web-scale (Petabyte) large. Cassandra
+makes some guarantees about its scalability, availability and
+reliability. To fully understand the inherent limitations of a storage
+system in an environment in which a certain level of network partition
+failure is to be expected and taken into account when designing the
+system it is important to first briefly introduce the CAP theorem.
+
+== What is CAP?
+
+According to the CAP theorem it is not possible for a distributed data
+store to provide more than two of the following guarantees
+simultaneously.
+
+* Consistency: Consistency implies that every read receives the most
+recent write or errors out
+* Availability: Availability implies that every request receives a
+response. It is not guaranteed that the response contains the most
+recent write or data.
+* Partition tolerance: Partition tolerance refers to the tolerance of a
+storage system to failure of a network partition. Even if some of the
+messages are dropped or delayed the system continues to operate.
+
+CAP theorem implies that when using a network partition, with the
+inherent risk of partition failure, one has to choose between
+consistency and availability and both cannot be guaranteed at the same
+time. CAP theorem is illustrated in Figure 1.
+
+image::Figure_1_guarantees.jpg[image]
+
+Figure 1. CAP Theorem
+
+High availability is a priority in web based applications and to this
+objective Cassandra chooses Availability and Partition Tolerance from
+the CAP guarantees, compromising on data Consistency to some extent.
+
+Cassandra makes the following guarantees.
+
+* High Scalability
+* High Availability
+* Durability
+* Eventual Consistency of writes to a single table
+* Lightweight transactions with linearizable consistency
+* Batched writes across multiple tables are guaranteed to succeed
+completely or not at all
+* Secondary indexes are guaranteed to be consistent with their local
+replicas data
+
+== High Scalability
+
+Cassandra is a highly scalable storage system in which nodes may be
+added/removed as needed. Using gossip-based protocol a unified and
+consistent membership list is kept at each node.
+
+== High Availability
+
+Cassandra guarantees high availability of data by implementing a
+fault-tolerant storage system. Failure detection in a node is detected
+using a gossip-based protocol.
+
+== Durability
+
+Cassandra guarantees data durability by using replicas. Replicas are
+multiple copies of a data stored on different nodes in a cluster. In a
+multi-datacenter environment the replicas may be stored on different
+datacenters. If one replica is lost due to unrecoverable node/datacenter
+failure the data is not completely lost as replicas are still available.
+
+== Eventual Consistency
+
+Meeting the requirements of performance, reliability, scalability and
+high availability in production Cassandra is an eventually consistent
+storage system. Eventually consistent implies that all updates reach all
+replicas eventually. Divergent versions of the same data may exist
+temporarily but they are eventually reconciled to a consistent state.
+Eventual consistency is a tradeoff to achieve high availability and it
+involves some read and write latencies.
+
+== Lightweight transactions with linearizable consistency
+
+Data must be read and written in a sequential order. Paxos consensus
+protocol is used to implement lightweight transactions. Paxos protocol
+implements lightweight transactions that are able to handle concurrent
+operations using linearizable consistency. Linearizable consistency is
+sequential consistency with real-time constraints and it ensures
+transaction isolation with compare and set (CAS) transaction. With CAS
+replica data is compared and data that is found to be out of date is set
+to the most consistent value. Reads with linearizable consistency allow
+reading the current state of the data, which may possibly be
+uncommitted, without making a new addition or update.
+
+== Batched Writes
+
+The guarantee for batched writes across multiple tables is that they
+will eventually succeed, or none will. Batch data is first written to
+batchlog system data, and when the batch data has been successfully
+stored in the cluster the batchlog data is removed. The batch is
+replicated to another node to ensure the full batch completes in the
+event the coordinator node fails.
+
+== Secondary Indexes
+
+A secondary index is an index on a column and is used to query a table
+that is normally not queryable. Secondary indexes when built are
+guaranteed to be consistent with their local replicas.
diff --git a/doc/modules/cassandra/pages/architecture/images/ring.svg b/doc/modules/cassandra/pages/architecture/images/ring.svg
new file mode 100644
index 00000000000..d0db8c579e3
--- /dev/null
+++ b/doc/modules/cassandra/pages/architecture/images/ring.svg
@@ -0,0 +1,11 @@
+
+
+
+
+ ...
+
diff --git a/doc/modules/cassandra/pages/architecture/images/vnodes.svg b/doc/modules/cassandra/pages/architecture/images/vnodes.svg
new file mode 100644
index 00000000000..71b4fa2d8b9
--- /dev/null
+++ b/doc/modules/cassandra/pages/architecture/images/vnodes.svg
@@ -0,0 +1,11 @@
+
+
+
+
+
+
diff --git a/doc/modules/cassandra/pages/architecture/index.adoc b/doc/modules/cassandra/pages/architecture/index.adoc
new file mode 100644
index 00000000000..c4bef05cfdf
--- /dev/null
+++ b/doc/modules/cassandra/pages/architecture/index.adoc
@@ -0,0 +1,9 @@
+= Architecture
+
+This section describes the general architecture of Apache Cassandra.
+
+* xref:architecture/overview.adoc[Overview]
+* xref:architecture/dynamo.adoc[Dynamo]
+* xref:architecture/storage_engine.adoc[Storage Engine]
+* xref:architecture/guarantees.adoc[Guarantees]
+* xref:architecture/snitch.adoc[Snitches]
diff --git a/doc/modules/cassandra/pages/architecture/overview.adoc b/doc/modules/cassandra/pages/architecture/overview.adoc
new file mode 100644
index 00000000000..605e347830a
--- /dev/null
+++ b/doc/modules/cassandra/pages/architecture/overview.adoc
@@ -0,0 +1,101 @@
+= Overview
+:exper: experimental
+
+Apache Cassandra is an open source, distributed, NoSQL database. It
+presents a partitioned wide column storage model with eventually
+consistent semantics.
+
+Apache Cassandra was initially designed at
+https://www.cs.cornell.edu/projects/ladis2009/papers/lakshman-ladis2009.pdf[Facebook]
+using a staged event-driven architecture
+(http://www.sosp.org/2001/papers/welsh.pdf[SEDA]) to implement a
+combination of Amazon’s
+http://courses.cse.tamu.edu/caverlee/csce438/readings/dynamo-paper.pdf[Dynamo]
+distributed storage and replication techniques and Google's
+https://static.googleusercontent.com/media/research.google.com/en//archive/bigtable-osdi06.pdf[Bigtable]
+data and storage engine model. Dynamo and Bigtable were both developed
+to meet emerging requirements for scalable, reliable and highly
+available storage systems, but each had areas that could be improved.
+
+Cassandra was designed as a best-in-class combination of both systems to
+meet emerging largescale, both in data footprint and query volume,
+storage requirements. As applications began to require full global
+replication and always available low-latency reads and writes, it became
+imperative to design a new kind of database model as the relational
+database systems of the time struggled to meet the new requirements of
+global scale applications.
+
+Systems like Cassandra are designed for these challenges and seek the
+following design objectives:
+
+* Full multi-master database replication
+* Global availability at low latency
+* Scaling out on commodity hardware
+* Linear throughput increase with each additional processor
+* Online load balancing and cluster growth
+* Partitioned key-oriented queries
+* Flexible schema
+
+== Features
+
+Cassandra provides the Cassandra Query Language (xref:cql/ddl.adoc[CQL]), an SQL-like
+language, to create and update database schema and access data. CQL
+allows users to organize data within a cluster of Cassandra nodes using:
+
+* *Keyspace*: Defines how a dataset is replicated, per datacenter.
+Replication is the number of copies saved per cluster.
+Keyspaces contain tables.
+* *Table*: Defines the typed schema for a collection of partitions.
+Tables contain partitions, which contain rows, which contain columns.
+Cassandra tables can flexibly add new columns to tables with zero downtime.
+* *Partition*: Defines the mandatory part of the primary key all rows in
+Cassandra must have to identify the node in a cluster where the row is stored.
+All performant queries supply the partition key in the query.
+* *Row*: Contains a collection of columns identified by a unique primary
+key made up of the partition key and optionally additional clustering
+keys.
+* *Column*: A single datum with a type which belongs to a row.
+
+CQL supports numerous advanced features over a partitioned dataset such
+as:
+
+* Single partition lightweight transactions with atomic compare and set
+semantics.
+* User-defined types, functions and aggregates
+* Collection types including sets, maps, and lists.
+* Local secondary indices
+* (Experimental) materialized views
+
+Cassandra explicitly chooses not to implement operations that require
+cross partition coordination as they are typically slow and hard to
+provide highly available global semantics. For example Cassandra does
+not support:
+
+* Cross partition transactions
+* Distributed joins
+* Foreign keys or referential integrity.
+
+== Operating
+
+Apache Cassandra configuration settings are configured in the
+`cassandra.yaml` file that can be edited by hand or with the aid of
+configuration management tools. Some settings can be manipulated live
+using an online interface, but others require a restart of the database
+to take effect.
+
+Cassandra provides tools for managing a cluster. The `nodetool` command
+interacts with Cassandra's live control interface, allowing runtime
+manipulation of many settings from `cassandra.yaml`. The
+`auditlogviewer` is used to view the audit logs. The `fqltool` is used
+to view, replay and compare full query logs. The `auditlogviewer` and
+`fqltool` are new tools in Apache Cassandra {40_version}.
+
+In addition, Cassandra supports out of the box atomic snapshot
+functionality, which presents a point in time snapshot of Cassandra's
+data for easy integration with many backup tools. Cassandra also
+supports incremental backups where data can be backed up as it is
+written.
+
+Apache Cassandra {40_version} has added several new features including virtual
+tables, transient replication ({exper}), audit logging, full query logging, and
+support for Java 11 ({exper}).
diff --git a/doc/modules/cassandra/pages/architecture/snitch.adoc b/doc/modules/cassandra/pages/architecture/snitch.adoc
new file mode 100644
index 00000000000..90b32fb2e2c
--- /dev/null
+++ b/doc/modules/cassandra/pages/architecture/snitch.adoc
@@ -0,0 +1,74 @@
+= Snitch
+
+In cassandra, the snitch has two functions:
+
+* it teaches Cassandra enough about your network topology to route
+requests efficiently.
+* it allows Cassandra to spread replicas around your cluster to avoid
+correlated failures. It does this by grouping machines into
+"datacenters" and "racks." Cassandra will do its best not to have more
+than one replica on the same "rack" (which may not actually be a
+physical location).
+
+== Dynamic snitching
+
+The dynamic snitch monitor read latencies to avoid reading from hosts
+that have slowed down. The dynamic snitch is configured with the
+following properties on `cassandra.yaml`:
+
+* `dynamic_snitch`: whether the dynamic snitch should be enabled or
+disabled.
+* `dynamic_snitch_update_interval_in_ms`: controls how often to perform
+the more expensive part of host score calculation.
+* `dynamic_snitch_reset_interval_in_ms`: if set greater than zero, this
+will allow 'pinning' of replicas to hosts in order to increase cache
+capacity.
+* `dynamic_snitch_badness_threshold:`: The badness threshold will
+control how much worse the pinned host has to be before the dynamic
+snitch will prefer other replicas over it. This is expressed as a double
+which represents a percentage. Thus, a value of 0.2 means Cassandra
+would continue to prefer the static snitch values until the pinned host
+was 20% worse than the fastest.
+
+== Snitch classes
+
+The `endpoint_snitch` parameter in `cassandra.yaml` should be set to the
+class that implements `IEndPointSnitch` which will be wrapped by the
+dynamic snitch and decide if two endpoints are in the same data center
+or on the same rack. Out of the box, Cassandra provides the snitch
+implementations:
+
+GossipingPropertyFileSnitch::
+ This should be your go-to snitch for production use. The rack and
+ datacenter for the local node are defined in
+ cassandra-rackdc.properties and propagated to other nodes via gossip.
+ If `cassandra-topology.properties` exists, it is used as a fallback,
+ allowing migration from the PropertyFileSnitch.
+SimpleSnitch::
+ Treats Strategy order as proximity. This can improve cache locality
+ when disabling read repair. Only appropriate for single-datacenter
+ deployments.
+PropertyFileSnitch::
+ Proximity is determined by rack and data center, which are explicitly
+ configured in `cassandra-topology.properties`.
+Ec2Snitch::
+ Appropriate for EC2 deployments in a single Region, or in multiple
+ regions with inter-region VPC enabled (available since the end of
+ 2017, see
+ https://aws.amazon.com/about-aws/whats-new/2017/11/announcing-support-for-inter-region-vpc-peering/[AWS
+ announcement]). Loads Region and Availability Zone information from
+ the EC2 API. The Region is treated as the datacenter, and the
+ Availability Zone as the rack. Only private IPs are used, so this will
+ work across multiple regions only if inter-region VPC is enabled.
+Ec2MultiRegionSnitch::
+ Uses public IPs as broadcast_address to allow cross-region
+ connectivity (thus, you should set seed addresses to the public IP as
+ well). You will need to open the `storage_port` or `ssl_storage_port`
+ on the public IP firewall (For intra-Region traffic, Cassandra will
+ switch to the private IP after establishing a connection).
+RackInferringSnitch::
+ Proximity is determined by rack and data center, which are assumed to
+ correspond to the 3rd and 2nd octet of each node's IP address,
+ respectively. Unless this happens to match your deployment
+ conventions, this is best used as an example of writing a custom
+ Snitch class and is provided in that spirit.
diff --git a/doc/modules/cassandra/pages/architecture/storage_engine.adoc b/doc/modules/cassandra/pages/architecture/storage_engine.adoc
new file mode 100644
index 00000000000..77c52e5d52f
--- /dev/null
+++ b/doc/modules/cassandra/pages/architecture/storage_engine.adoc
@@ -0,0 +1,225 @@
+= Storage Engine
+
+[[commit-log]]
+== CommitLog
+
+Commitlogs are an append only log of all mutations local to a Cassandra
+node. Any data written to Cassandra will first be written to a commit
+log before being written to a memtable. This provides durability in the
+case of unexpected shutdown. On startup, any mutations in the commit log
+will be applied to memtables.
+
+All mutations write optimized by storing in commitlog segments, reducing
+the number of seeks needed to write to disk. Commitlog Segments are
+limited by the `commitlog_segment_size_in_mb` option, once the size is
+reached, a new commitlog segment is created. Commitlog segments can be
+archived, deleted, or recycled once all its data has been flushed to
+SSTables. Commitlog segments are truncated when Cassandra has written
+data older than a certain point to the SSTables. Running "nodetool
+drain" before stopping Cassandra will write everything in the memtables
+to SSTables and remove the need to sync with the commitlogs on startup.
+
+* `commitlog_segment_size_in_mb`: The default size is 32, which is
+almost always fine, but if you are archiving commitlog segments (see
+commitlog_archiving.properties), then you probably want a finer
+granularity of archiving; 8 or 16 MB is reasonable. Max mutation size is
+also configurable via `max_mutation_size_in_kb` setting in `cassandra.yaml`.
+The default is half the size `commitlog_segment_size_in_mb * 1024`.
+
+**NOTE: If `max_mutation_size_in_kb` is set explicitly then
+`commitlog_segment_size_in_mb` must be set to at least twice the size of
+`max_mutation_size_in_kb / 1024`**.
+
+Commitlogs are an append only log of all mutations local to a Cassandra
+node. Any data written to Cassandra will first be written to a commit
+log before being written to a memtable. This provides durability in the
+case of unexpected shutdown. On startup, any mutations in the commit log
+will be applied.
+
+* `commitlog_sync`: may be either _periodic_ or _batch_.
+** `batch`: In batch mode, Cassandra won’t ack writes until the commit
+log has been fsynced to disk. It will wait
+"commitlog_sync_batch_window_in_ms" milliseconds between fsyncs. This
+window should be kept short because the writer threads will be unable to
+do extra work while waiting. You may need to increase concurrent_writes
+for the same reason.
++
+- `commitlog_sync_batch_window_in_ms`: Time to wait between "batch"
+fsyncs _Default Value:_ 2
+** `periodic`: In periodic mode, writes are immediately ack'ed, and the
+CommitLog is simply synced every "commitlog_sync_period_in_ms"
+milliseconds.
++
+- `commitlog_sync_period_in_ms`: Time to wait between "periodic" fsyncs
+_Default Value:_ 10000
+
+_Default Value:_ batch
+
+** NOTE: In the event of an unexpected shutdown, Cassandra can lose up
+to the sync period or more if the sync is delayed. If using "batch"
+mode, it is recommended to store commitlogs in a separate, dedicated
+device.*
+
+* `commitlog_directory`: This option is commented out by default When
+running on magnetic HDD, this should be a separate spindle than the data
+directories. If not set, the default directory is
+$CASSANDRA_HOME/data/commitlog.
+
+_Default Value:_ /var/lib/cassandra/commitlog
+
+* `commitlog_compression`: Compression to apply to the commitlog. If
+omitted, the commit log will be written uncompressed. LZ4, Snappy,
+Deflate and Zstd compressors are supported.
+
+(Default Value: (complex option):
+
+[source, yaml]
+----
+# - class_name: LZ4Compressor
+# parameters:
+----
+
+* `commitlog_total_space_in_mb`: Total space to use for commit logs on
+disk.
+
+If space gets above this value, Cassandra will flush every dirty CF in
+the oldest segment and remove it. So a small total commitlog space will
+tend to cause more flush activity on less-active columnfamilies.
+
+The default value is the smaller of 8192, and 1/4 of the total space of
+the commitlog volume.
+
+_Default Value:_ 8192
+
+== Memtables
+
+Memtables are in-memory structures where Cassandra buffers writes. In
+general, there is one active memtable per table. Eventually, memtables
+are flushed onto disk and become immutable link:#sstables[SSTables].
+This can be triggered in several ways:
+
+* The memory usage of the memtables exceeds the configured threshold
+(see `memtable_cleanup_threshold`)
+* The `commit-log` approaches its maximum size, and forces memtable
+flushes in order to allow commitlog segments to be freed
+
+Memtables may be stored entirely on-heap or partially off-heap,
+depending on `memtable_allocation_type`.
+
+== SSTables
+
+SSTables are the immutable data files that Cassandra uses for persisting
+data on disk.
+
+As SSTables are flushed to disk from `memtables` or are streamed from
+other nodes, Cassandra triggers compactions which combine multiple
+SSTables into one. Once the new SSTable has been written, the old
+SSTables can be removed.
+
+Each SSTable is comprised of multiple components stored in separate
+files:
+
+`Data.db`::
+ The actual data, i.e. the contents of rows.
+`Index.db`::
+ An index from partition keys to positions in the `Data.db` file. For
+ wide partitions, this may also include an index to rows within a
+ partition.
+`Summary.db`::
+ A sampling of (by default) every 128th entry in the `Index.db` file.
+`Filter.db`::
+ A Bloom Filter of the partition keys in the SSTable.
+`CompressionInfo.db`::
+ Metadata about the offsets and lengths of compression chunks in the
+ `Data.db` file.
+`Statistics.db`::
+ Stores metadata about the SSTable, including information about
+ timestamps, tombstones, clustering keys, compaction, repair,
+ compression, TTLs, and more.
+`Digest.crc32`::
+ A CRC-32 digest of the `Data.db` file.
+`TOC.txt`::
+ A plain text list of the component files for the SSTable.
+
+Within the `Data.db` file, rows are organized by partition. These
+partitions are sorted in token order (i.e. by a hash of the partition
+key when the default partitioner, `Murmur3Partition`, is used). Within a
+partition, rows are stored in the order of their clustering keys.
+
+SSTables can be optionally compressed using block-based compression.
+
+== SSTable Versions
+
+This section was created using the following
+https://gist.github.com/shyamsalimkumar/49a61e5bc6f403d20c55[gist] which
+utilized this original
+http://www.bajb.net/2013/03/cassandra-sstable-format-version-numbers/[source].
+
+The version numbers, to date are:
+
+=== Version 0
+
+* b (0.7.0): added version to sstable filenames
+* c (0.7.0): bloom filter component computes hashes over raw key bytes
+instead of strings
+* d (0.7.0): row size in data component becomes a long instead of int
+* e (0.7.0): stores undecorated keys in data and index components
+* f (0.7.0): switched bloom filter implementations in data component
+* g (0.8): tracks flushed-at context in metadata component
+
+=== Version 1
+
+* h (1.0): tracks max client timestamp in metadata component
+* hb (1.0.3): records compression ration in metadata component
+* hc (1.0.4): records partitioner in metadata component
+* hd (1.0.10): includes row tombstones in maxtimestamp
+* he (1.1.3): includes ancestors generation in metadata component
+* hf (1.1.6): marker that replay position corresponds to 1.1.5+
+millis-based id (see CASSANDRA-4782)
+* ia (1.2.0):
+** column indexes are promoted to the index file
+** records estimated histogram of deletion times in tombstones
+** bloom filter (keys and columns) upgraded to Murmur3
+* ib (1.2.1): tracks min client timestamp in metadata component
+* ic (1.2.5): omits per-row bloom filter of column names
+
+=== Version 2
+
+* ja (2.0.0):
+** super columns are serialized as composites (note that there is no
+real format change, this is mostly a marker to know if we should expect
+super columns or not. We do need a major version bump however, because
+we should not allow streaming of super columns into this new format)
+** tracks max local deletiontime in sstable metadata
+** records bloom_filter_fp_chance in metadata component
+** remove data size and column count from data file (CASSANDRA-4180)
+** tracks max/min column values (according to comparator)
+* jb (2.0.1):
+** switch from crc32 to adler32 for compression checksums
+** checksum the compressed data
+* ka (2.1.0):
+** new Statistics.db file format
+** index summaries can be downsampled and the sampling level is
+persisted
+** switch uncompressed checksums to adler32
+** tracks presense of legacy (local and remote) counter shards
+* la (2.2.0): new file name format
+* lb (2.2.7): commit log lower bound included
+
+=== Version 3
+
+* ma (3.0.0):
+** swap bf hash order
+** store rows natively
+* mb (3.0.7, 3.7): commit log lower bound included
+* mc (3.0.8, 3.9): commit log intervals included
+
+=== Example Code
+
+The following example is useful for finding all sstables that do not
+match the "ib" SSTable version
+
+[source,bash]
+----
+include:example$find_sstables.sh[]
+----
diff --git a/doc/modules/cassandra/pages/configuration/cass_cl_archive_file.adoc b/doc/modules/cassandra/pages/configuration/cass_cl_archive_file.adoc
new file mode 100644
index 00000000000..f7b07887ed2
--- /dev/null
+++ b/doc/modules/cassandra/pages/configuration/cass_cl_archive_file.adoc
@@ -0,0 +1,48 @@
+[[cassandra-cl-archive]]
+== commitlog-archiving.properties file
+
+The `commitlog-archiving.properties` configuration file can optionally
+set commands that are executed when archiving or restoring a commitlog
+segment.
+
+== Options
+
+`archive_command=` ------One command can be inserted with %path
+and %name arguments. %path is the fully qualified path of the commitlog
+segment to archive. %name is the filename of the commitlog. STDOUT,
+STDIN, or multiple commands cannot be executed. If multiple commands are
+required, add a pointer to a script in this option.
+
+*Example:* archive_command=/bin/ln %path /backup/%name
+
+*Default value:* blank
+
+`restore_command=` ------One command can be inserted with %from
+and %to arguments. %from is the fully qualified path to an archived
+commitlog segment using the specified restore directories. %to defines
+the directory to the live commitlog location.
+
+*Example:* restore_command=/bin/cp -f %from %to
+
+*Default value:* blank
+
+`restore_directories=` ------Defines the directory to scan
+the recovery files into.
+
+*Default value:* blank
+
+`restore_point_in_time=` ------Restore mutations created up
+to and including this timestamp in GMT in the format
+`yyyy:MM:dd HH:mm:ss`. Recovery will continue through the segment when
+the first client-supplied timestamp greater than this time is
+encountered, but only mutations less than or equal to this timestamp
+will be applied.
+
+*Example:* 2020:04:31 20:43:12
+
+*Default value:* blank
+
+`precision=` ------Precision of the timestamp used
+in the inserts. Choice is generally MILLISECONDS or MICROSECONDS
+
+*Default value:* MICROSECONDS
diff --git a/doc/modules/cassandra/pages/configuration/cass_env_sh_file.adoc b/doc/modules/cassandra/pages/configuration/cass_env_sh_file.adoc
new file mode 100644
index 00000000000..d895186246e
--- /dev/null
+++ b/doc/modules/cassandra/pages/configuration/cass_env_sh_file.adoc
@@ -0,0 +1,162 @@
+= cassandra-env.sh file
+
+The `cassandra-env.sh` bash script file can be used to pass additional
+options to the Java virtual machine (JVM), such as maximum and minimum
+heap size, rather than setting them in the environment. If the JVM
+settings are static and do not need to be computed from the node
+characteristics, the `cassandra-jvm-options` files should be used
+instead. For example, commonly computed values are the heap sizes, using
+the system values.
+
+For example, add
+`JVM_OPTS="$JVM_OPTS -Dcassandra.load_ring_state=false"` to the
+`cassandra_env.sh` file and run the command-line `cassandra` to start.
+The option is set from the `cassandra-env.sh` file, and is equivalent to
+starting Cassandra with the command-line option
+`cassandra -Dcassandra.load_ring_state=false`.
+
+The `-D` option specifies the start-up parameters in both the command
+line and `cassandra-env.sh` file. The following options are available:
+
+== `cassandra.auto_bootstrap=false`
+
+Facilitates setting auto_bootstrap to false on initial set-up of the
+cluster. The next time you start the cluster, you do not need to change
+the `cassandra.yaml` file on each node to revert to true, the default
+value.
+
+== `cassandra.available_processors=`
+
+In a multi-instance deployment, multiple Cassandra instances will
+independently assume that all CPU processors are available to it. This
+setting allows you to specify a smaller set of processors.
+
+== `cassandra.boot_without_jna=true`
+
+If JNA fails to initialize, Cassandra fails to boot. Use this command to
+boot Cassandra without JNA.
+
+== `cassandra.config=`
+
+The directory location of the `cassandra.yaml file`. The default
+location depends on the type of installation.
+
+== `cassandra.ignore_dynamic_snitch_severity=true|false`
+
+Setting this property to true causes the dynamic snitch to ignore the
+severity indicator from gossip when scoring nodes. Explore failure
+detection and recovery and dynamic snitching for more information.
+
+*Default:* false
+
+== `cassandra.initial_token=`
+
+Use when virtual nodes (vnodes) are not used. Sets the initial
+partitioner token for a node the first time the node is started. Note:
+Vnodes are highly recommended as they automatically select tokens.
+
+*Default:* disabled
+
+== `cassandra.join_ring=true|false`
+
+Set to false to start Cassandra on a node but not have the node join the
+cluster. You can use `nodetool join` and a JMX call to join the ring
+afterwards.
+
+*Default:* true
+
+== `cassandra.load_ring_state=true|false`
+
+Set to false to clear all gossip state for the node on restart.
+
+*Default:* true
+
+== `cassandra.metricsReporterConfigFile=`
+
+Enable pluggable metrics reporter. Explore pluggable metrics reporting
+for more information.
+
+== `cassandra.partitioner=`
+
+Set the partitioner.
+
+*Default:* org.apache.cassandra.dht.Murmur3Partitioner
+
+== `cassandra.prepared_statements_cache_size_in_bytes=`
+
+Set the cache size for prepared statements.
+
+== `cassandra.replace_address=|`
+
+To replace a node that has died, restart a new node in its place
+specifying the `listen_address` or `broadcast_address` that the new node
+is assuming. The new node must not have any data in its data directory,
+the same state as before bootstrapping. Note: The `broadcast_address`
+defaults to the `listen_address` except when using the
+`Ec2MultiRegionSnitch`.
+
+== `cassandra.replayList=`
+
+Allow restoring specific tables from an archived commit log.
+
+== `cassandra.ring_delay_ms=`
+
+Defines the amount of time a node waits to hear from other nodes before
+formally joining the ring.
+
+*Default:* 1000ms
+
+== `cassandra.native_transport_port=`
+
+Set the port on which the CQL native transport listens for clients.
+
+*Default:* 9042
+
+== `cassandra.rpc_port=`
+
+Set the port for the Thrift RPC service, which is used for client
+connections.
+
+*Default:* 9160
+
+== `cassandra.storage_port=`
+
+Set the port for inter-node communication.
+
+*Default:* 7000
+
+== `cassandra.ssl_storage_port=`
+
+Set the SSL port for encrypted communication.
+
+*Default:* 7001
+
+== `cassandra.start_native_transport=true|false`
+
+Enable or disable the native transport server. See
+`start_native_transport` in `cassandra.yaml`.
+
+*Default:* true
+
+== `cassandra.start_rpc=true|false`
+
+Enable or disable the Thrift RPC server.
+
+*Default:* true
+
+== `cassandra.triggers_dir=`
+
+Set the default location for the trigger JARs.
+
+*Default:* conf/triggers
+
+== `cassandra.write_survey=true`
+
+For testing new compaction and compression strategies. It allows you to
+experiment with different strategies and benchmark write performance
+differences without affecting the production workload.
+
+== `consistent.rangemovement=true|false`
+
+Set to true makes Cassandra perform bootstrap safely without violating
+consistency. False disables this.
diff --git a/doc/modules/cassandra/pages/configuration/cass_jvm_options_file.adoc b/doc/modules/cassandra/pages/configuration/cass_jvm_options_file.adoc
new file mode 100644
index 00000000000..b9a312c3409
--- /dev/null
+++ b/doc/modules/cassandra/pages/configuration/cass_jvm_options_file.adoc
@@ -0,0 +1,22 @@
+= jvm-* files
+
+Several files for JVM configuration are included in Cassandra. The
+`jvm-server.options` file, and corresponding files `jvm8-server.options`
+and `jvm11-server.options` are the main file for settings that affect
+the operation of the Cassandra JVM on cluster nodes. The file includes
+startup parameters, general JVM settings such as garbage collection, and
+heap settings. The `jvm-clients.options` and corresponding
+`jvm8-clients.options` and `jvm11-clients.options` files can be used to
+configure JVM settings for clients like `nodetool` and the `sstable`
+tools.
+
+See each file for examples of settings.
+
+[NOTE]
+.Note
+====
+The `jvm-*` files replace the `cassandra-envsh` file used in Cassandra
+versions prior to Cassandra 3.0. The `cassandra-env.sh` bash script file
+is still useful if JVM settings must be dynamically calculated based on
+system settings. The `jvm-*` files only store static JVM settings.
+====
diff --git a/doc/modules/cassandra/pages/configuration/cass_logback_xml_file.adoc b/doc/modules/cassandra/pages/configuration/cass_logback_xml_file.adoc
new file mode 100644
index 00000000000..e673622099d
--- /dev/null
+++ b/doc/modules/cassandra/pages/configuration/cass_logback_xml_file.adoc
@@ -0,0 +1,166 @@
+= logback.xml file
+
+The `logback.xml` configuration file can optionally set logging levels
+for the logs written to `system.log` and `debug.log`. The logging levels
+can also be set using `nodetool setlogginglevels`.
+
+== Options
+
+=== `appender name=""...`
+
+Specify log type and settings. Possible appender names are: `SYSTEMLOG`,
+`DEBUGLOG`, `ASYNCDEBUGLOG`, and `STDOUT`. `SYSTEMLOG` ensures that WARN
+and ERROR message are written synchronously to the specified file.
+`DEBUGLOG` and `ASYNCDEBUGLOG` ensure that DEBUG messages are written
+either synchronously or asynchronously, respectively, to the specified
+file. `STDOUT` writes all messages to the console in a human-readable
+format.
+
+*Example:*
+
+=== ` `
+
+Specify the filename for a log.
+
+*Example:* $\{cassandra.logdir}/system.log
+
+=== ` `
+
+Specify the level for a log. Part of the filter. Levels are: `ALL`,
+`TRACE`, `DEBUG`, `INFO`, `WARN`, `ERROR`, `OFF`. `TRACE` creates the
+most verbose log, `ERROR` the least.
+
+[NOTE]
+.Note
+====
+Note: Increasing logging levels can generate heavy logging output on
+a moderately trafficked cluster. You can use the
+`nodetool getlogginglevels` command to see the current logging
+configuration.
+====
+
+*Default:* INFO
+
+*Example:* INFO
+
+=== ` ... `
+
+Specify the policy for rolling logs over to an archive.
+
+*Example:*
+
+=== ` `
+
+Specify the pattern information for rolling over the log to archive.
+Part of the rolling policy.
+
+*Example:*
+$\{cassandra.logdir}/system.log.%d\{yyyy-MM-dd}.%i.zip
+
+=== ` `
+
+Specify the maximum file size to trigger rolling a log. Part of the
+rolling policy.
+
+*Example:* 50MB
+
+=== ` `
+
+Specify the maximum history in days to trigger rolling a log. Part of
+the rolling policy.
+
+*Example:* 7
+
+=== ` ... `
+
+Specify the format of the message. Part of the rolling policy.
+
+*Example:* 7 *Example:*
+%-5level [%thread] %date\{ISO8601} %F:%L - %msg%n
+
+
+=== Contents of default `logback.xml`
+
+[source,XML]
+----
+
+
+
+
+
+
+
+
+
+ INFO
+
+ ${cassandra.logdir}/system.log
+
+
+ ${cassandra.logdir}/system.log.%d{yyyy-MM-dd}.%i.zip
+
+ 50MB
+ 7
+ 5GB
+
+
+ %-5level [%thread] %date{ISO8601} %F:%L - %msg%n
+
+
+
+
+
+
+ ${cassandra.logdir}/debug.log
+
+
+ ${cassandra.logdir}/debug.log.%d{yyyy-MM-dd}.%i.zip
+
+ 50MB
+ 7
+ 5GB
+
+
+ %-5level [%thread] %date{ISO8601} %F:%L - %msg%n
+
+
+
+
+
+
+ 1024
+ 0
+ true
+
+
+
+
+
+
+
+ INFO
+
+
+ %-5level [%thread] %date{ISO8601} %F:%L - %msg%n
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+----
diff --git a/doc/modules/cassandra/pages/configuration/cass_rackdc_file.adoc b/doc/modules/cassandra/pages/configuration/cass_rackdc_file.adoc
new file mode 100644
index 00000000000..0b370c9cc59
--- /dev/null
+++ b/doc/modules/cassandra/pages/configuration/cass_rackdc_file.adoc
@@ -0,0 +1,79 @@
+= cassandra-rackdc.properties file
+
+Several `snitch` options use the `cassandra-rackdc.properties`
+configuration file to determine which `datacenters` and racks cluster
+nodes belong to. Information about the network topology allows requests
+to be routed efficiently and to distribute replicas evenly. The
+following snitches can be configured here:
+
+* GossipingPropertyFileSnitch
+* AWS EC2 single-region snitch
+* AWS EC2 multi-region snitch
+
+The GossipingPropertyFileSnitch is recommended for production. This
+snitch uses the datacenter and rack information configured in a local
+node's `cassandra-rackdc.properties` file and propagates the information
+to other nodes using `gossip`. It is the default snitch and the settings
+in this properties file are enabled.
+
+The AWS EC2 snitches are configured for clusters in AWS. This snitch
+uses the `cassandra-rackdc.properties` options to designate one of two
+AWS EC2 datacenter and rack naming conventions:
+
+* legacy: Datacenter name is the part of the availability zone name
+preceding the last "-" when the zone ends in -1 and includes the number
+if not -1. Rack name is the portion of the availability zone name
+following the last "-".
++
+____
+Examples: us-west-1a => dc: us-west, rack: 1a; us-west-2b => dc:
+us-west-2, rack: 2b;
+____
+* standard: Datacenter name is the standard AWS region name, including
+the number. Rack name is the region plus the availability zone letter.
++
+____
+Examples: us-west-1a => dc: us-west-1, rack: us-west-1a; us-west-2b =>
+dc: us-west-2, rack: us-west-2b;
+____
+
+Either snitch can set to use the local or internal IP address when
+multiple datacenters are not communicating.
+
+== GossipingPropertyFileSnitch
+
+=== `dc`
+
+Name of the datacenter. The value is case-sensitive.
+
+*Default value:* DC1
+
+=== `rack`
+
+Rack designation. The value is case-sensitive.
+
+*Default value:* RAC1
+
+== AWS EC2 snitch
+
+=== `ec2_naming_scheme`
+
+Datacenter and rack naming convention. Options are `legacy` or
+`standard` (default). *This option is commented out by default.*
+
+*Default value:* standard
+
+[NOTE]
+.Note
+====
+YOU MUST USE THE `legacy` VALUE IF YOU ARE UPGRADING A PRE-4.0 CLUSTER.
+====
+
+== Either snitch
+
+=== `prefer_local`
+
+Option to use the local or internal IP address when communication is not
+across different datacenters. *This option is commented out by default.*
+
+*Default value:* true
diff --git a/doc/modules/cassandra/pages/configuration/cass_topo_file.adoc b/doc/modules/cassandra/pages/configuration/cass_topo_file.adoc
new file mode 100644
index 00000000000..5ca82219b5c
--- /dev/null
+++ b/doc/modules/cassandra/pages/configuration/cass_topo_file.adoc
@@ -0,0 +1,53 @@
+[[cassandra-topology]]
+cassandra-topologies.properties file ================================
+
+The `PropertyFileSnitch` `snitch` option uses the
+`cassandra-topologies.properties` configuration file to determine which
+`datacenters` and racks cluster nodes belong to. If other snitches are
+used, the :ref:cassandra_rackdc must be used. The snitch determines
+network topology (proximity by rack and datacenter) so that requests are
+routed efficiently and allows the database to distribute replicas
+evenly.
+
+Include every node in the cluster in the properties file, defining your
+datacenter names as in the keyspace definition. The datacenter and rack
+names are case-sensitive.
+
+The `cassandra-topologies.properties` file must be copied identically to
+every node in the cluster.
+
+== Example
+
+This example uses three datacenters:
+
+[source,bash]
+----
+# datacenter One
+
+175.56.12.105=DC1:RAC1
+175.50.13.200=DC1:RAC1
+175.54.35.197=DC1:RAC1
+
+120.53.24.101=DC1:RAC2
+120.55.16.200=DC1:RAC2
+120.57.102.103=DC1:RAC2
+
+# datacenter Two
+
+110.56.12.120=DC2:RAC1
+110.50.13.201=DC2:RAC1
+110.54.35.184=DC2:RAC1
+
+50.33.23.120=DC2:RAC2
+50.45.14.220=DC2:RAC2
+50.17.10.203=DC2:RAC2
+
+# datacenter Three
+
+172.106.12.120=DC3:RAC1
+172.106.12.121=DC3:RAC1
+172.106.12.122=DC3:RAC1
+
+# default for unknown nodes
+default =DC3:RAC1
+----
diff --git a/doc/modules/cassandra/pages/configuration/index.adoc b/doc/modules/cassandra/pages/configuration/index.adoc
new file mode 100644
index 00000000000..7c8ee367a90
--- /dev/null
+++ b/doc/modules/cassandra/pages/configuration/index.adoc
@@ -0,0 +1,11 @@
+= Configuring Cassandra
+
+This section describes how to configure Apache Cassandra.
+
+* xref:configuration/cass_yaml_file.adoc[cassandra.yaml]
+* xref:configuration/cass_rackdc_file.adoc[cassandra-rackdc.properties]
+* xref:configuration/cass_env_sh_file.adoc[cassandra-env.sh]
+* xref:configuration/cass_topo_file.adoc[cassandra-topologies.properties]
+* xref:configuration/cass_cl_archive_file.adoc[commitlog-archiving.properties]
+* xref:configuration/cass_cl_logback_xml_file.adoc[logback.xml]
+* xref:configuration/cass_jvm_options_file.adoc[jvm-* files]
diff --git a/doc/modules/cassandra/pages/cql/SASI.adoc b/doc/modules/cassandra/pages/cql/SASI.adoc
new file mode 100644
index 00000000000..c24009ad24c
--- /dev/null
+++ b/doc/modules/cassandra/pages/cql/SASI.adoc
@@ -0,0 +1,809 @@
+== SASIIndex
+
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/SASIIndex.java[`SASIIndex`],
+or ``SASI`` for short, is an implementation of Cassandra's `Index`
+interface that can be used as an alternative to the existing
+implementations. SASI's indexing and querying improves on existing
+implementations by tailoring it specifically to Cassandra’s needs. SASI
+has superior performance in cases where queries would previously require
+filtering. In achieving this performance, SASI aims to be significantly
+less resource intensive than existing implementations, in memory, disk,
+and CPU usage. In addition, SASI supports prefix and contains queries on
+strings (similar to SQL’s `LIKE = "foo*"` or `LIKE = "*foo*"'`).
+
+The following goes on describe how to get up and running with SASI,
+demonstrates usage with examples, and provides some details on its
+implementation.
+
+=== Using SASI
+
+The examples below walk through creating a table and indexes on its
+columns, and performing queries on some inserted data.
+
+The examples below assume the `demo` keyspace has been created and is in
+use.
+
+....
+cqlsh> CREATE KEYSPACE demo WITH replication = {
+ ... 'class': 'SimpleStrategy',
+ ... 'replication_factor': '1'
+ ... };
+cqlsh> USE demo;
+....
+
+All examples are performed on the `sasi` table:
+
+....
+cqlsh:demo> CREATE TABLE sasi (id uuid, first_name text, last_name text,
+ ... age int, height int, created_at bigint, primary key (id));
+....
+
+==== Creating Indexes
+
+To create SASI indexes use CQLs `CREATE CUSTOM INDEX` statement:
+
+....
+cqlsh:demo> CREATE CUSTOM INDEX ON sasi (first_name) USING 'org.apache.cassandra.index.sasi.SASIIndex'
+ ... WITH OPTIONS = {
+ ... 'analyzer_class':
+ ... 'org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer',
+ ... 'case_sensitive': 'false'
+ ... };
+
+cqlsh:demo> CREATE CUSTOM INDEX ON sasi (last_name) USING 'org.apache.cassandra.index.sasi.SASIIndex'
+ ... WITH OPTIONS = {'mode': 'CONTAINS'};
+
+cqlsh:demo> CREATE CUSTOM INDEX ON sasi (age) USING 'org.apache.cassandra.index.sasi.SASIIndex';
+
+cqlsh:demo> CREATE CUSTOM INDEX ON sasi (created_at) USING 'org.apache.cassandra.index.sasi.SASIIndex'
+ ... WITH OPTIONS = {'mode': 'SPARSE'};
+....
+
+The indexes created have some options specified that customize their
+behaviour and potentially performance. The index on `first_name` is
+case-insensitive. The analyzers are discussed more in a subsequent
+example. The `NonTokenizingAnalyzer` performs no analysis on the text.
+Each index has a mode: `PREFIX`, `CONTAINS`, or `SPARSE`, the first
+being the default. The `last_name` index is created with the mode
+`CONTAINS` which matches terms on suffixes instead of prefix only.
+Examples of this are available below and more detail can be found in the
+section on link:#ondiskindexbuilder[OnDiskIndex].The `created_at` column
+is created with its mode set to `SPARSE`, which is meant to improve
+performance of querying large, dense number ranges like timestamps for
+data inserted every millisecond. Details of the `SPARSE` implementation
+can also be found in the section on the
+link:#ondiskindexbuilder[OnDiskIndex]. The `age` index is created with
+the default `PREFIX` mode and no case-sensitivity or text analysis
+options are specified since the field is numeric.
+
+After inserting the following data and performing a `nodetool flush`,
+SASI performing index flushes to disk can be seen in Cassandra’s logs –
+although the direct call to flush is not required (see
+link:#indexmemtable[IndexMemtable] for more details).
+
+....
+cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
+ ... VALUES (556ebd54-cbe5-4b75-9aae-bf2a31a24500, 'Pavel', 'Yaskevich', 27, 181, 1442959315018);
+
+cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
+ ... VALUES (5770382a-c56f-4f3f-b755-450e24d55217, 'Jordan', 'West', 26, 173, 1442959315019);
+
+cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
+ ... VALUES (96053844-45c3-4f15-b1b7-b02c441d3ee1, 'Mikhail', 'Stepura', 36, 173, 1442959315020);
+
+cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
+ ... VALUES (f5dfcabe-de96-4148-9b80-a1c41ed276b4, 'Michael', 'Kjellman', 26, 180, 1442959315021);
+
+cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
+ ... VALUES (2970da43-e070-41a8-8bcb-35df7a0e608a, 'Johnny', 'Zhang', 32, 175, 1442959315022);
+
+cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
+ ... VALUES (6b757016-631d-4fdb-ac62-40b127ccfbc7, 'Jason', 'Brown', 40, 182, 1442959315023);
+
+cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
+ ... VALUES (8f909e8a-008e-49dd-8d43-1b0df348ed44, 'Vijay', 'Parthasarathy', 34, 183, 1442959315024);
+
+cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi;
+
+ first_name | last_name | age | height | created_at
+------------+---------------+-----+--------+---------------
+ Michael | Kjellman | 26 | 180 | 1442959315021
+ Mikhail | Stepura | 36 | 173 | 1442959315020
+ Jason | Brown | 40 | 182 | 1442959315023
+ Pavel | Yaskevich | 27 | 181 | 1442959315018
+ Vijay | Parthasarathy | 34 | 183 | 1442959315024
+ Jordan | West | 26 | 173 | 1442959315019
+ Johnny | Zhang | 32 | 175 | 1442959315022
+
+(7 rows)
+....
+
+==== Equality & Prefix Queries
+
+SASI supports all queries already supported by CQL, including LIKE
+statement for PREFIX, CONTAINS and SUFFIX searches.
+
+....
+cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi
+ ... WHERE first_name = 'Pavel';
+
+ first_name | last_name | age | height | created_at
+-------------+-----------+-----+--------+---------------
+ Pavel | Yaskevich | 27 | 181 | 1442959315018
+
+(1 rows)
+....
+
+....
+cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi
+ ... WHERE first_name = 'pavel';
+
+ first_name | last_name | age | height | created_at
+-------------+-----------+-----+--------+---------------
+ Pavel | Yaskevich | 27 | 181 | 1442959315018
+
+(1 rows)
+....
+
+....
+cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi
+ ... WHERE first_name LIKE 'M%';
+
+ first_name | last_name | age | height | created_at
+------------+-----------+-----+--------+---------------
+ Michael | Kjellman | 26 | 180 | 1442959315021
+ Mikhail | Stepura | 36 | 173 | 1442959315020
+
+(2 rows)
+....
+
+Of course, the case of the query does not matter for the `first_name`
+column because of the options provided at index creation time.
+
+....
+cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi
+ ... WHERE first_name LIKE 'm%';
+
+ first_name | last_name | age | height | created_at
+------------+-----------+-----+--------+---------------
+ Michael | Kjellman | 26 | 180 | 1442959315021
+ Mikhail | Stepura | 36 | 173 | 1442959315020
+
+(2 rows)
+....
+
+==== Compound Queries
+
+SASI supports queries with multiple predicates, however, due to the
+nature of the default indexing implementation, CQL requires the user to
+specify `ALLOW FILTERING` to opt-in to the potential performance
+pitfalls of such a query. With SASI, while the requirement to include
+`ALLOW FILTERING` remains, to reduce modifications to the grammar, the
+performance pitfalls do not exist because filtering is not performed.
+Details on how SASI joins data from multiple predicates is available
+below in the link:#implementation-details[Implementation Details]
+section.
+
+....
+cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi
+ ... WHERE first_name LIKE 'M%' and age < 30 ALLOW FILTERING;
+
+ first_name | last_name | age | height | created_at
+------------+-----------+-----+--------+---------------
+ Michael | Kjellman | 26 | 180 | 1442959315021
+
+(1 rows)
+....
+
+==== Suffix Queries
+
+The next example demonstrates `CONTAINS` mode on the `last_name` column.
+By using this mode, predicates can search for any strings containing the
+search string as a sub-string. In this case the strings containing ``a''
+or ``an''.
+
+....
+cqlsh:demo> SELECT * FROM sasi WHERE last_name LIKE '%a%';
+
+ id | age | created_at | first_name | height | last_name
+--------------------------------------+-----+---------------+------------+--------+---------------
+ f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | 1442959315021 | Michael | 180 | Kjellman
+ 96053844-45c3-4f15-b1b7-b02c441d3ee1 | 36 | 1442959315020 | Mikhail | 173 | Stepura
+ 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | 1442959315018 | Pavel | 181 | Yaskevich
+ 8f909e8a-008e-49dd-8d43-1b0df348ed44 | 34 | 1442959315024 | Vijay | 183 | Parthasarathy
+ 2970da43-e070-41a8-8bcb-35df7a0e608a | 32 | 1442959315022 | Johnny | 175 | Zhang
+
+(5 rows)
+
+cqlsh:demo> SELECT * FROM sasi WHERE last_name LIKE '%an%';
+
+ id | age | created_at | first_name | height | last_name
+--------------------------------------+-----+---------------+------------+--------+-----------
+ f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | 1442959315021 | Michael | 180 | Kjellman
+ 2970da43-e070-41a8-8bcb-35df7a0e608a | 32 | 1442959315022 | Johnny | 175 | Zhang
+
+(2 rows)
+....
+
+==== Expressions on Non-Indexed Columns
+
+SASI also supports filtering on non-indexed columns like `height`. The
+expression can only narrow down an existing query using `AND`.
+
+....
+cqlsh:demo> SELECT * FROM sasi WHERE last_name LIKE '%a%' AND height >= 175 ALLOW FILTERING;
+
+ id | age | created_at | first_name | height | last_name
+--------------------------------------+-----+---------------+------------+--------+---------------
+ f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | 1442959315021 | Michael | 180 | Kjellman
+ 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | 1442959315018 | Pavel | 181 | Yaskevich
+ 8f909e8a-008e-49dd-8d43-1b0df348ed44 | 34 | 1442959315024 | Vijay | 183 | Parthasarathy
+ 2970da43-e070-41a8-8bcb-35df7a0e608a | 32 | 1442959315022 | Johnny | 175 | Zhang
+
+(4 rows)
+....
+
+==== Delimiter based Tokenization Analysis
+
+A simple text analysis provided is delimiter based tokenization. This
+provides an alternative to indexing collections, as delimiter separated
+text can be indexed without the overhead of `CONTAINS` mode nor using
+`PREFIX` or `SUFFIX` queries.
+
+....
+cqlsh:demo> ALTER TABLE sasi ADD aliases text;
+cqlsh:demo> CREATE CUSTOM INDEX on sasi (aliases) USING 'org.apache.cassandra.index.sasi.SASIIndex'
+ ... WITH OPTIONS = {
+ ... 'analyzer_class': 'org.apache.cassandra.index.sasi.analyzer.DelimiterAnalyzer',
+ ... 'delimiter': ',',
+ ... 'mode': 'prefix',
+ ... 'analyzed': 'true'};
+cqlsh:demo> UPDATE sasi SET aliases = 'Mike,Mick,Mikey,Mickey' WHERE id = f5dfcabe-de96-4148-9b80-a1c41ed276b4;
+cqlsh:demo> SELECT * FROM sasi WHERE aliases LIKE 'Mikey' ALLOW FILTERING;
+
+ id | age | aliases | created_at | first_name | height | last_name
+--------------------------------------+-----+------------------------+---------------+------------+--------+-----------
+ f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | Mike,Mick,Mikey,Mickey | 1442959315021 | Michael | 180 | Kjellman
+....
+
+==== Text Analysis (Tokenization and Stemming)
+
+Lastly, to demonstrate text analysis an additional column is needed on
+the table. Its definition, index, and statements to update rows are
+shown below.
+
+....
+cqlsh:demo> ALTER TABLE sasi ADD bio text;
+cqlsh:demo> CREATE CUSTOM INDEX ON sasi (bio) USING 'org.apache.cassandra.index.sasi.SASIIndex'
+ ... WITH OPTIONS = {
+ ... 'analyzer_class': 'org.apache.cassandra.index.sasi.analyzer.StandardAnalyzer',
+ ... 'tokenization_enable_stemming': 'true',
+ ... 'analyzed': 'true',
+ ... 'tokenization_normalize_lowercase': 'true',
+ ... 'tokenization_locale': 'en'
+ ... };
+cqlsh:demo> UPDATE sasi SET bio = 'Software Engineer, who likes distributed systems, doesnt like to argue.' WHERE id = 5770382a-c56f-4f3f-b755-450e24d55217;
+cqlsh:demo> UPDATE sasi SET bio = 'Software Engineer, works on the freight distribution at nights and likes arguing' WHERE id = 556ebd54-cbe5-4b75-9aae-bf2a31a24500;
+cqlsh:demo> SELECT * FROM sasi;
+
+ id | age | bio | created_at | first_name | height | last_name
+--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+---------------
+ f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | null | 1442959315021 | Michael | 180 | Kjellman
+ 96053844-45c3-4f15-b1b7-b02c441d3ee1 | 36 | null | 1442959315020 | Mikhail | 173 | Stepura
+ 6b757016-631d-4fdb-ac62-40b127ccfbc7 | 40 | null | 1442959315023 | Jason | 182 | Brown
+ 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich
+ 8f909e8a-008e-49dd-8d43-1b0df348ed44 | 34 | null | 1442959315024 | Vijay | 183 | Parthasarathy
+ 5770382a-c56f-4f3f-b755-450e24d55217 | 26 | Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 | Jordan | 173 | West
+ 2970da43-e070-41a8-8bcb-35df7a0e608a | 32 | null | 1442959315022 | Johnny | 175 | Zhang
+
+(7 rows)
+....
+
+Index terms and query search strings are stemmed for the `bio` column
+because it was configured to use the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java[`StandardAnalyzer`]
+and `analyzed` is set to `true`. The `tokenization_normalize_lowercase`
+is similar to the `case_sensitive` property but for the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java[`StandardAnalyzer`].
+These query demonstrates the stemming applied by
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java[`StandardAnalyzer`].
+
+....
+cqlsh:demo> SELECT * FROM sasi WHERE bio LIKE 'distributing';
+
+ id | age | bio | created_at | first_name | height | last_name
+--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+-----------
+ 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich
+ 5770382a-c56f-4f3f-b755-450e24d55217 | 26 | Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 | Jordan | 173 | West
+
+(2 rows)
+
+cqlsh:demo> SELECT * FROM sasi WHERE bio LIKE 'they argued';
+
+ id | age | bio | created_at | first_name | height | last_name
+--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+-----------
+ 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich
+ 5770382a-c56f-4f3f-b755-450e24d55217 | 26 | Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 | Jordan | 173 | West
+
+(2 rows)
+
+cqlsh:demo> SELECT * FROM sasi WHERE bio LIKE 'working at the company';
+
+ id | age | bio | created_at | first_name | height | last_name
+--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+-----------
+ 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich
+
+(1 rows)
+
+cqlsh:demo> SELECT * FROM sasi WHERE bio LIKE 'soft eng';
+
+ id | age | bio | created_at | first_name | height | last_name
+--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+-----------
+ 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich
+ 5770382a-c56f-4f3f-b755-450e24d55217 | 26 | Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 | Jordan | 173 | West
+
+(2 rows)
+....
+
+=== Implementation Details
+
+While SASI, at the surface, is simply an implementation of the `Index`
+interface, at its core there are several data structures and algorithms
+used to satisfy it. These are described here. Additionally, the changes
+internal to Cassandra to support SASI’s integration are described.
+
+The `Index` interface divides responsibility of the implementer into two
+parts: Indexing and Querying. Further, Cassandra makes it possible to
+divide those responsibilities into the memory and disk components. SASI
+takes advantage of Cassandra’s write-once, immutable, ordered data model
+to build indexes along with the flushing of the memtable to disk – this
+is the origin of the name ``SSTable Attached Secondary Index''.
+
+The SASI index data structures are built in memory as the SSTable is
+being written and they are flushed to disk before the writing of the
+SSTable completes. The writing of each index file only requires
+sequential writes to disk. In some cases, partial flushes are performed,
+and later stitched back together, to reduce memory usage. These data
+structures are optimized for this use case.
+
+Taking advantage of Cassandra’s ordered data model, at query time,
+candidate indexes are narrowed down for searching, minimizing the amount
+of work done. Searching is then performed using an efficient method that
+streams data off disk as needed.
+
+==== Indexing
+
+Per SSTable, SASI writes an index file for each indexed column. The data
+for these files is built in memory using the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java[`OnDiskIndexBuilder`].
+Once flushed to disk, the data is read using the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java[`OnDiskIndex`]
+class. These are composed of bytes representing indexed terms, organized
+for efficient writing or searching respectively. The keys and values
+they hold represent tokens and positions in an SSTable and these are
+stored per-indexed term in
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTreeBuilder.java[`TokenTreeBuilder`]s
+for writing, and
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java[`TokenTree`]s
+for querying. These index files are memory mapped after being written to
+disk, for quicker access. For indexing data in the memtable, SASI uses
+its
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/IndexMemtable.java[`IndexMemtable`]
+class.
+
+===== OnDiskIndex(Builder)
+
+Each
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java[`OnDiskIndex`]
+is an instance of a modified
+https://en.wikipedia.org/wiki/Suffix_array[Suffix Array] data structure.
+The
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java[`OnDiskIndex`]
+is comprised of page-size blocks of sorted terms and pointers to the
+terms’ associated data, as well as the data itself, stored also in one
+or more page-sized blocks. The
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java[`OnDiskIndex`]
+is structured as a tree of arrays, where each level describes the terms
+in the level below, the final level being the terms themselves. The
+`PointerLevel`s and their `PointerBlock`s contain terms and pointers to
+other blocks that _end_ with those terms. The `DataLevel`, the final
+level, and its `DataBlock`s contain terms and point to the data itself,
+contained in
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java[`TokenTree`]s.
+
+The terms written to the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java[`OnDiskIndex`]
+vary depending on its ``mode'': either `PREFIX`, `CONTAINS`, or
+`SPARSE`. In the `PREFIX` and `SPARSE` cases, terms’ exact values are
+written exactly once per `OnDiskIndex`. For example, when using a
+`PREFIX` index with terms `Jason`, `Jordan`, `Pavel`, all three will be
+included in the index. A `CONTAINS` index writes additional terms for
+each suffix of each term recursively. Continuing with the example, a
+`CONTAINS` index storing the previous terms would also store `ason`,
+`ordan`, `avel`, `son`, `rdan`, `vel`, etc. This allows for queries on
+the suffix of strings. The `SPARSE` mode differs from `PREFIX` in that
+for every 64 blocks of terms a
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java[`TokenTree`]
+is built merging all the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java[`TokenTree`]s
+for each term into a single one. This copy of the data is used for
+efficient iteration of large ranges of e.g. timestamps. The index
+``mode'' is configurable per column at index creation time.
+
+===== TokenTree(Builder)
+
+The
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java[`TokenTree`]
+is an implementation of the well-known
+https://en.wikipedia.org/wiki/B%2B_tree[B+-tree] that has been modified
+to optimize for its use-case. In particular, it has been optimized to
+associate tokens, longs, with a set of positions in an SSTable, also
+longs. Allowing the set of long values accommodates the possibility of a
+hash collision in the token, but the data structure is optimized for the
+unlikely possibility of such a collision.
+
+To optimize for its write-once environment the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTreeBuilder.java[`TokenTreeBuilder`]
+completely loads its interior nodes as the tree is built and it uses the
+well-known algorithm optimized for bulk-loading the data structure.
+
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java[`TokenTree`]s
+provide the means to iterate over tokens, and file positions, that match
+a given term, and to skip forward in that iteration, an operation used
+heavily at query time.
+
+===== IndexMemtable
+
+The
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/IndexMemtable.java[`IndexMemtable`]
+handles indexing the in-memory data held in the memtable. The
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/IndexMemtable.java[`IndexMemtable`]
+in turn manages either a
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java[`TrieMemIndex`]
+or a
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java[`SkipListMemIndex`]
+per-column. The choice of which index type is used is data dependent.
+The
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java[`TrieMemIndex`]
+is used for literal types. `AsciiType` and `UTF8Type` are literal types
+by default but any column can be configured as a literal type using the
+`is_literal` option at index creation time. For non-literal types the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java[`SkipListMemIndex`]
+is used. The
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java[`TrieMemIndex`]
+is an implementation that can efficiently support prefix queries on
+character-like data. The
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java[`SkipListMemIndex`],
+conversely, is better suited for other Cassandra data types like
+numbers.
+
+The
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java[`TrieMemIndex`]
+is built using either the `ConcurrentRadixTree` or
+`ConcurrentSuffixTree` from the `com.goooglecode.concurrenttrees`
+package. The choice between the two is made based on the indexing mode,
+`PREFIX` or other modes, and `CONTAINS` mode, respectively.
+
+The
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java[`SkipListMemIndex`]
+is built on top of `java.util.concurrent.ConcurrentSkipListSet`.
+
+==== Querying
+
+Responsible for converting the internal `IndexExpression` representation
+into SASI’s
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`]
+and
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java[`Expression`]
+trees, optimizing the trees to reduce the amount of work done, and
+driving the query itself, the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`]
+is the work horse of SASI’s querying implementation. To efficiently
+perform union and intersection operations, SASI provides several
+iterators similar to Cassandra’s `MergeIterator`, but tailored
+specifically for SASI’s use while including more features. The
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java[`RangeUnionIterator`],
+like its name suggests, performs set unions over sets of tokens/keys
+matching the query, only reading as much data as it needs from each set
+to satisfy the query. The
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java[`RangeIntersectionIterator`],
+similar to its counterpart, performs set intersections over its data.
+
+===== QueryPlan
+
+The
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`]
+instantiated per search query is at the core of SASI’s querying
+implementation. Its work can be divided in two stages: analysis and
+execution.
+
+During the analysis phase,
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`]
+converts from Cassandra’s internal representation of `IndexExpression`s,
+which has also been modified to support encoding queries that contain
+ORs and groupings of expressions using parentheses (see the
+link:#cassandra-internal-changes[Cassandra Internal Changes] section
+below for more details). This process produces a tree of
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`]s,
+which in turn may contain
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java[`Expression`]s,
+all of which provide an alternative, more efficient, representation of
+the query.
+
+During execution, the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`]
+uses the `DecoratedKey`-generating iterator created from the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`]
+tree. These keys are read from disk and a final check to ensure they
+satisfy the query is made, once again using the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`]
+tree. At the point the desired amount of matching data has been found,
+or there is no more matching data, the result set is returned to the
+coordinator through the existing internal components.
+
+The number of queries (total/failed/timed-out), and their latencies, are
+maintined per-table/column family.
+
+SASI also supports concurrently iterating terms for the same index
+across SSTables. The concurrency factor is controlled by the
+`cassandra.search_concurrency_factor` system property. The default is
+`1`.
+
+====== QueryController
+
+Each
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`]
+references a
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java[`QueryController`]
+used throughout the execution phase. The
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java[`QueryController`]
+has two responsibilities: to manage and ensure the proper cleanup of
+resources (indexes), and to strictly enforce the time bound per query,
+specified by the user via the range slice timeout. All indexes are
+accessed via the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java[`QueryController`]
+so that they can be safely released by it later. The
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java[`QueryController`]’s
+`checkpoint` function is called in specific places in the execution path
+to ensure the time-bound is enforced.
+
+====== QueryPlan Optimizations
+
+While in the analysis phase, the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`]
+performs several potential optimizations to the query. The goal of these
+optimizations is to reduce the amount of work performed during the
+execution phase.
+
+The simplest optimization performed is compacting multiple expressions
+joined by logical intersections (`AND`) into a single
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`]
+with three or more
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java[`Expression`]s.
+For example, the query
+`WHERE age < 100 AND fname = 'p*' AND first_name != 'pa*' AND age > 21`
+would, without modification, have the following tree:
+
+....
+ ┌───────┐
+ ┌────────│ AND │──────┐
+ │ └───────┘ │
+ ▼ ▼
+ ┌───────┐ ┌──────────┐
+ ┌─────│ AND │─────┐ │age < 100 │
+ │ └───────┘ │ └──────────┘
+ ▼ ▼
+┌──────────┐ ┌───────┐
+│ fname=p* │ ┌─│ AND │───┐
+└──────────┘ │ └───────┘ │
+ ▼ ▼
+ ┌──────────┐ ┌──────────┐
+ │fname!=pa*│ │ age > 21 │
+ └──────────┘ └──────────┘
+....
+
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`]
+will remove the redundant right branch whose root is the final `AND` and
+has leaves `fname != pa*` and `age > 21`. These
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java[`Expression`]s
+will be compacted into the parent `AND`, a safe operation due to `AND`
+being associative and commutative. The resulting tree looks like the
+following:
+
+....
+ ┌───────┐
+ ┌────────│ AND │──────┐
+ │ └───────┘ │
+ ▼ ▼
+ ┌───────┐ ┌──────────┐
+ ┌───────────│ AND │────────┐ │age < 100 │
+ │ └───────┘ │ └──────────┘
+ ▼ │ ▼
+┌──────────┐ │ ┌──────────┐
+│ fname=p* │ ▼ │ age > 21 │
+└──────────┘ ┌──────────┐ └──────────┘
+ │fname!=pa*│
+ └──────────┘
+....
+
+When excluding results from the result set, using `!=`, the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`]
+determines the best method for handling it. For range queries, for
+example, it may be optimal to divide the range into multiple parts with
+a hole for the exclusion. For string queries, such as this one, it is
+more optimal, however, to simply note which data to skip, or exclude,
+while scanning the index. Following this optimization the tree looks
+like this:
+
+....
+ ┌───────┐
+ ┌────────│ AND │──────┐
+ │ └───────┘ │
+ ▼ ▼
+ ┌───────┐ ┌──────────┐
+ ┌───────│ AND │────────┐ │age < 100 │
+ │ └───────┘ │ └──────────┘
+ ▼ ▼
+ ┌──────────────────┐ ┌──────────┐
+ │ fname=p* │ │ age > 21 │
+ │ exclusions=[pa*] │ └──────────┘
+ └──────────────────┘
+....
+
+The last type of optimization applied, for this query, is to merge range
+expressions across branches of the tree – without modifying the meaning
+of the query, of course. In this case, because the query contains all
+`AND`s the `age` expressions can be collapsed. Along with this
+optimization, the initial collapsing of unneeded `AND`s can also be
+applied once more to result in this final tree using to execute the
+query:
+
+....
+ ┌───────┐
+ ┌──────│ AND │───────┐
+ │ └───────┘ │
+ ▼ ▼
+ ┌──────────────────┐ ┌────────────────┐
+ │ fname=p* │ │ 21 < age < 100 │
+ │ exclusions=[pa*] │ └────────────────┘
+ └──────────────────┘
+....
+
+===== Operations and Expressions
+
+As discussed, the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`]
+optimizes a tree represented by
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`]s
+as interior nodes, and
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java[`Expression`]s
+as leaves. The
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`]
+class, more specifically, can have zero, one, or two
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`]s
+as children and an unlimited number of expressions. The iterators used
+to perform the queries, discussed below in the
+``Range(Union|Intersection)Iterator'' section, implement the necessary
+logic to merge results transparently regardless of the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`]s
+children.
+
+Besides participating in the optimizations performed by the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`],
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`]
+is also responsible for taking a row that has been returned by the query
+and performing a final validation that it in fact does match. This
+`satisfiesBy` operation is performed recursively from the root of the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java[`Operation`]
+tree for a given query. These checks are performed directly on the data
+in a given row. For more details on how `satisfiesBy` works, see the
+documentation
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java#L87-L123[in
+the code].
+
+===== Range(Union|Intersection)Iterator
+
+The abstract `RangeIterator` class provides a unified interface over the
+two main operations performed by SASI at various layers in the execution
+path: set intersection and union. These operations are performed in a
+iterated, or ``streaming'', fashion to prevent unneeded reads of
+elements from either set. In both the intersection and union cases the
+algorithms take advantage of the data being pre-sorted using the same
+sort order, e.g. term or token order.
+
+The
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java[`RangeUnionIterator`]
+performs the ``Merge-Join'' portion of the
+https://en.wikipedia.org/wiki/Sort-merge_join[Sort-Merge-Join]
+algorithm, with the properties of an outer-join, or union. It is
+implemented with several optimizations to improve its performance over a
+large number of iterators – sets to union. Specifically, the iterator
+exploits the likely case of the data having many sub-groups of
+overlapping ranges and the unlikely case that all ranges will overlap
+each other. For more details see the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java#L9-L21[javadoc].
+
+The
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java[`RangeIntersectionIterator`]
+itself is not a subclass of `RangeIterator`. It is a container for
+several classes, one of which, `AbstractIntersectionIterator`,
+sub-classes `RangeIterator`. SASI supports two methods of performing the
+intersection operation, and the ability to be adaptive in choosing
+between them based on some properties of the data.
+
+`BounceIntersectionIterator`, and the `BOUNCE` strategy, works like the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java[`RangeUnionIterator`]
+in that it performs a ``Merge-Join'', however, its nature is similar to
+a inner-join, where like values are merged by a data-specific merge
+function (e.g. merging two tokens in a list to lookup in a SSTable
+later). See the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java#L88-L101[javadoc]
+for more details on its implementation.
+
+`LookupIntersectionIterator`, and the `LOOKUP` strategy, performs a
+different operation, more similar to a lookup in an associative data
+structure, or ``hash lookup'' in database terminology. Once again,
+details on the implementation can be found in the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java#L199-L208[javadoc].
+
+The choice between the two iterators, or the `ADAPTIVE` strategy, is
+based upon the ratio of data set sizes of the minimum and maximum range
+of the sets being intersected. If the number of the elements in minimum
+range divided by the number of elements is the maximum range is less
+than or equal to `0.01`, then the `ADAPTIVE` strategy chooses the
+`LookupIntersectionIterator`, otherwise the `BounceIntersectionIterator`
+is chosen.
+
+==== The SASIIndex Class
+
+The above components are glued together by the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/SASIIndex.java[`SASIIndex`]
+class which implements `Index`, and is instantiated per-table containing
+SASI indexes. It manages all indexes for a table via the
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/conf/DataTracker.java[`sasi.conf.DataTracker`]
+and
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/conf/view/View.java[`sasi.conf.view.View`]
+components, controls writing of all indexes for an SSTable via its
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/PerSSTableIndexWriter.java[`PerSSTableIndexWriter`],
+and initiates searches with `Searcher`. These classes glue the
+previously mentioned indexing components together with Cassandra’s
+SSTable life-cycle ensuring indexes are not only written when Memtable’s
+flush, but also as SSTable’s are compacted. For querying, the `Searcher`
+does little but defer to
+https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java[`QueryPlan`]
+and update e.g. latency metrics exposed by SASI.
+
+==== Cassandra Internal Changes
+
+To support the above changes and integrate them into Cassandra a few
+minor internal changes were made to Cassandra itself. These are
+described here.
+
+===== SSTable Write Life-cycle Notifications
+
+The `SSTableFlushObserver` is an observer pattern-like interface, whose
+sub-classes can register to be notified about events in the life-cycle
+of writing out a SSTable. Sub-classes can be notified when a flush
+begins and ends, as well as when each next row is about to be written,
+and each next column. SASI’s `PerSSTableIndexWriter`, discussed above,
+is the only current subclass.
+
+==== Limitations and Caveats
+
+The following are items that can be addressed in future updates but are
+not available in this repository or are not currently implemented.
+
+* The cluster must be configured to use a partitioner that produces
+`LongToken`s, e.g. `Murmur3Partitioner`. Other existing partitioners
+which don’t produce LongToken e.g. `ByteOrderedPartitioner` and
+`RandomPartitioner` will not work with SASI.
+* Not Equals and OR support have been removed in this release while
+changes are made to Cassandra itself to support them.
+
+==== Contributors
+
+* https://github.com/xedin[Pavel Yaskevich]
+* https://github.com/jrwest[Jordan West]
+* https://github.com/mkjellman[Michael Kjellman]
+* https://github.com/jasobrown[Jason Brown]
+* https://github.com/mishail[Mikhail Stepura]
diff --git a/doc/modules/cassandra/pages/cql/appendices.adoc b/doc/modules/cassandra/pages/cql/appendices.adoc
new file mode 100644
index 00000000000..7e17266a3f7
--- /dev/null
+++ b/doc/modules/cassandra/pages/cql/appendices.adoc
@@ -0,0 +1,179 @@
+= Appendices
+
+[[appendix-A]]
+== Appendix A: CQL Keywords
+
+CQL distinguishes between _reserved_ and _non-reserved_ keywords.
+Reserved keywords cannot be used as identifier, they are truly reserved
+for the language (but one can enclose a reserved keyword by
+double-quotes to use it as an identifier). Non-reserved keywords however
+only have a specific meaning in certain context but can used as
+identifier otherwise. The only _raison d’être_ of these non-reserved
+keywords is convenience: some keyword are non-reserved when it was
+always easy for the parser to decide whether they were used as keywords
+or not.
+
+[width="48%",cols="60%,40%",options="header",]
+|===
+|Keyword |Reserved?
+|`ADD` |yes
+|`AGGREGATE` |no
+|`ALL` |no
+|`ALLOW` |yes
+|`ALTER` |yes
+|`AND` |yes
+|`APPLY` |yes
+|`AS` |no
+|`ASC` |yes
+|`ASCII` |no
+|`AUTHORIZE` |yes
+|`BATCH` |yes
+|`BEGIN` |yes
+|`BIGINT` |no
+|`BLOB` |no
+|`BOOLEAN` |no
+|`BY` |yes
+|`CALLED` |no
+|`CLUSTERING` |no
+|`COLUMNFAMILY` |yes
+|`COMPACT` |no
+|`CONTAINS` |no
+|`COUNT` |no
+|`COUNTER` |no
+|`CREATE` |yes
+|`CUSTOM` |no
+|`DATE` |no
+|`DECIMAL` |no
+|`DELETE` |yes
+|`DESC` |yes
+|`DESCRIBE` |yes
+|`DISTINCT` |no
+|`DOUBLE` |no
+|`DROP` |yes
+|`ENTRIES` |yes
+|`EXECUTE` |yes
+|`EXISTS` |no
+|`FILTERING` |no
+|`FINALFUNC` |no
+|`FLOAT` |no
+|`FROM` |yes
+|`FROZEN` |no
+|`FULL` |yes
+|`FUNCTION` |no
+|`FUNCTIONS` |no
+|`GRANT` |yes
+|`IF` |yes
+|`IN` |yes
+|`INDEX` |yes
+|`INET` |no
+|`INFINITY` |yes
+|`INITCOND` |no
+|`INPUT` |no
+|`INSERT` |yes
+|`INT` |no
+|`INTO` |yes
+|`JSON` |no
+|`KEY` |no
+|`KEYS` |no
+|`KEYSPACE` |yes
+|`KEYSPACES` |no
+|`LANGUAGE` |no
+|`LIMIT` |yes
+|`LIST` |no
+|`LOGIN` |no
+|`MAP` |no
+|`MODIFY` |yes
+|`NAN` |yes
+|`NOLOGIN` |no
+|`NORECURSIVE` |yes
+|`NOSUPERUSER` |no
+|`NOT` |yes
+|`NULL` |yes
+|`OF` |yes
+|`ON` |yes
+|`OPTIONS` |no
+|`OR` |yes
+|`ORDER` |yes
+|`PASSWORD` |no
+|`PERMISSION` |no
+|`PERMISSIONS` |no
+|`PRIMARY` |yes
+|`RENAME` |yes
+|`REPLACE` |yes
+|`RETURNS` |no
+|`REVOKE` |yes
+|`ROLE` |no
+|`ROLES` |no
+|`SCHEMA` |yes
+|`SELECT` |yes
+|`SET` |yes
+|`SFUNC` |no
+|`SMALLINT` |no
+|`STATIC` |no
+|`STORAGE` |no
+|`STYPE` |no
+|`SUPERUSER` |no
+|`TABLE` |yes
+|`TEXT` |no
+|`TIME` |no
+|`TIMESTAMP` |no
+|`TIMEUUID` |no
+|`TINYINT` |no
+|`TO` |yes
+|`TOKEN` |yes
+|`TRIGGER` |no
+|`TRUNCATE` |yes
+|`TTL` |no
+|`TUPLE` |no
+|`TYPE` |no
+|`UNLOGGED` |yes
+|`UPDATE` |yes
+|`USE` |yes
+|`USER` |no
+|`USERS` |no
+|`USING` |yes
+|`UUID` |no
+|`VALUES` |no
+|`VARCHAR` |no
+|`VARINT` |no
+|`WHERE` |yes
+|`WITH` |yes
+|`WRITETIME` |no
+|===
+
+== Appendix B: CQL Reserved Types
+
+The following type names are not currently used by CQL, but are reserved
+for potential future use. User-defined types may not use reserved type
+names as their name.
+
+[width="25%",cols="100%",options="header",]
+|===
+|type
+|`bitstring`
+|`byte`
+|`complex`
+|`enum`
+|`interval`
+|`macaddr`
+|===
+
+== Appendix C: Dropping Compact Storage
+
+Starting version 4.0, Thrift and COMPACT STORAGE is no longer supported.
+
+`ALTER ... DROP COMPACT STORAGE` statement makes Compact Tables
+CQL-compatible, exposing internal structure of Thrift/Compact Tables:
+
+* CQL-created Compact Tables that have no clustering columns, will
+expose an additional clustering column `column1` with `UTF8Type`.
+* CQL-created Compact Tables that had no regular columns, will expose a
+regular column `value` with `BytesType`.
+* For CQL-Created Compact Tables, all columns originally defined as
+`regular` will be come `static`
+* CQL-created Compact Tables that have clustering but have no regular
+columns will have an empty value column (of `EmptyType`)
+* SuperColumn Tables (can only be created through Thrift) will expose a
+compact value map with an empty name.
+* Thrift-created Compact Tables will have types corresponding to their
+Thrift definition.
diff --git a/doc/modules/cassandra/pages/cql/changes.adoc b/doc/modules/cassandra/pages/cql/changes.adoc
new file mode 100644
index 00000000000..1f89469a328
--- /dev/null
+++ b/doc/modules/cassandra/pages/cql/changes.adoc
@@ -0,0 +1,215 @@
+= Changes
+
+The following describes the changes in each version of CQL.
+
+== 3.4.5
+
+* Adds support for arithmetic operators (`11935`)
+* Adds support for `+` and `-` operations on dates (`11936`)
+* Adds `currentTimestamp`, `currentDate`, `currentTime` and
+`currentTimeUUID` functions (`13132`)
+
+== 3.4.4
+
+* `ALTER TABLE` `ALTER` has been removed; a column's type may not be
+changed after creation (`12443`).
+* `ALTER TYPE` `ALTER` has been removed; a field's type may not be
+changed after creation (`12443`).
+
+== 3.4.3
+
+* Adds a new `duration` `data types ` (`11873`).
+* Support for `GROUP BY` (`10707`).
+* Adds a `DEFAULT UNSET` option for `INSERT JSON` to ignore omitted
+columns (`11424`).
+* Allows `null` as a legal value for TTL on insert and update. It will
+be treated as equivalent to inserting a 0 (`12216`).
+
+== 3.4.2
+
+* If a table has a non zero `default_time_to_live`, then explicitly
+specifying a TTL of 0 in an `INSERT` or `UPDATE` statement will result
+in the new writes not having any expiration (that is, an explicit TTL of
+0 cancels the `default_time_to_live`). This wasn't the case before and
+the `default_time_to_live` was applied even though a TTL had been
+explicitly set.
+* `ALTER TABLE` `ADD` and `DROP` now allow multiple columns to be
+added/removed.
+* New `PER PARTITION LIMIT` option for `SELECT` statements (see
+https://issues.apache.org/jira/browse/CASSANDRA-7017)[CASSANDRA-7017].
+* `User-defined functions ` can now instantiate
+`UDTValue` and `TupleValue` instances via the new `UDFContext` interface
+(see
+https://issues.apache.org/jira/browse/CASSANDRA-10818)[CASSANDRA-10818].
+* `User-defined types ` may now be stored in a non-frozen form,
+allowing individual fields to be updated and deleted in `UPDATE`
+statements and `DELETE` statements, respectively.
+(https://issues.apache.org/jira/browse/CASSANDRA-7423)[CASSANDRA-7423]).
+
+== 3.4.1
+
+* Adds `CAST` functions.
+
+== 3.4.0
+
+* Support for `materialized views `.
+* `DELETE` support for inequality expressions and `IN` restrictions on
+any primary key columns.
+* `UPDATE` support for `IN` restrictions on any primary key columns.
+
+== 3.3.1
+
+* The syntax `TRUNCATE TABLE X` is now accepted as an alias for
+`TRUNCATE X`.
+
+== 3.3.0
+
+* `User-defined functions and aggregates ` are now
+supported.
+* Allows double-dollar enclosed strings literals as an alternative to
+single-quote enclosed strings.
+* Introduces Roles to supersede user based authentication and access
+control
+* New `date`, `time`, `tinyint` and `smallint` `data types `
+have been added.
+* `JSON support ` has been added
+* Adds new time conversion functions and deprecate `dateOf` and
+`unixTimestampOf`.
+
+== 3.2.0
+
+* `User-defined types ` supported.
+* `CREATE INDEX` now supports indexing collection columns, including
+indexing the keys of map collections through the `keys()` function
+* Indexes on collections may be queried using the new `CONTAINS` and
+`CONTAINS KEY` operators
+* `Tuple types ` were added to hold fixed-length sets of typed
+positional fields.
+* `DROP INDEX` now supports optionally specifying a keyspace.
+
+== 3.1.7
+
+* `SELECT` statements now support selecting multiple rows in a single
+partition using an `IN` clause on combinations of clustering columns.
+* `IF NOT EXISTS` and `IF EXISTS` syntax is now supported by
+`CREATE USER` and `DROP USER` statements, respectively.
+
+== 3.1.6
+
+* A new `uuid()` method has been added.
+* Support for `DELETE ... IF EXISTS` syntax.
+
+== 3.1.5
+
+* It is now possible to group clustering columns in a relation, see
+`WHERE ` clauses.
+* Added support for `static columns `.
+
+== 3.1.4
+
+* `CREATE INDEX` now allows specifying options when creating CUSTOM
+indexes.
+
+== 3.1.3
+
+* Millisecond precision formats have been added to the
+`timestamp ` parser.
+
+== 3.1.2
+
+* `NaN` and `Infinity` has been added as valid float constants. They are
+now reserved keywords. In the unlikely case you we using them as a
+column identifier (or keyspace/table one), you will now need to double
+quote them.
+
+== 3.1.1
+
+* `SELECT` statement now allows listing the partition keys (using the
+`DISTINCT` modifier). See
+https://issues.apache.org/jira/browse/CASSANDRA-4536[CASSANDRA-4536].
+* The syntax `c IN ?` is now supported in `WHERE` clauses. In that case,
+the value expected for the bind variable will be a list of whatever type
+`c` is.
+* It is now possible to use named bind variables (using `:name` instead
+of `?`).
+
+== 3.1.0
+
+* `ALTER TABLE` `DROP` option added.
+* `SELECT` statement now supports aliases in select clause. Aliases in
+WHERE and ORDER BY clauses are not supported.
+* `CREATE` statements for `KEYSPACE`, `TABLE` and `INDEX` now supports
+an `IF NOT EXISTS` condition. Similarly, `DROP` statements support a
+`IF EXISTS` condition.
+* `INSERT` statements optionally supports a `IF NOT EXISTS` condition
+and `UPDATE` supports `IF` conditions.
+
+== 3.0.5
+
+* `SELECT`, `UPDATE`, and `DELETE` statements now allow empty `IN`
+relations (see
+https://issues.apache.org/jira/browse/CASSANDRA-5626)[CASSANDRA-5626].
+
+== 3.0.4
+
+* Updated the syntax for custom `secondary indexes `.
+* Non-equal condition on the partition key are now never supported, even
+for ordering partitioner as this was not correct (the order was *not*
+the one of the type of the partition key). Instead, the `token` method
+should always be used for range queries on the partition key (see
+`WHERE clauses `).
+
+== 3.0.3
+
+* Support for custom `secondary indexes ` has been
+added.
+
+== 3.0.2
+
+* Type validation for the `constants ` has been fixed. For
+instance, the implementation used to allow `'2'` as a valid value for an
+`int` column (interpreting it has the equivalent of `2`), or `42` as a
+valid `blob` value (in which case `42` was interpreted as an hexadecimal
+representation of the blob). This is no longer the case, type validation
+of constants is now more strict. See the `data types `
+section for details on which constant is allowed for which type.
+* The type validation fixed of the previous point has lead to the
+introduction of blobs constants to allow the input of blobs. Do note
+that while the input of blobs as strings constant is still supported by
+this version (to allow smoother transition to blob constant), it is now
+deprecated and will be removed by a future version. If you were using
+strings as blobs, you should thus update your client code ASAP to switch
+blob constants.
+* A number of functions to convert native types to blobs have also been
+introduced. Furthermore the token function is now also allowed in select
+clauses. See the `section on functions ` for details.
+
+== 3.0.1
+
+* Date strings (and timestamps) are no longer accepted as valid
+`timeuuid` values. Doing so was a bug in the sense that date string are
+not valid `timeuuid`, and it was thus resulting in
+https://issues.apache.org/jira/browse/CASSANDRA-4936[confusing
+behaviors]. However, the following new methods have been added to help
+working with `timeuuid`: `now`, `minTimeuuid`, `maxTimeuuid` , `dateOf`
+and `unixTimestampOf`.
+* Float constants now support the exponent notation. In other words,
+`4.2E10` is now a valid floating point value.
+
+== Versioning
+
+Versioning of the CQL language adheres to the http://semver.org[Semantic
+Versioning] guidelines. Versions take the form X.Y.Z where X, Y, and Z
+are integer values representing major, minor, and patch level
+respectively. There is no correlation between Cassandra release versions
+and the CQL language version.
+
+[cols=",",options="header",]
+|===
+|version |description
+| Major | The major version _must_ be bumped when backward incompatible changes
+are introduced. This should rarely occur.
+| Minor | Minor version increments occur when new, but backward compatible,
+functionality is introduced.
+| Patch | The patch version is incremented when bugs are fixed.
+|===
diff --git a/doc/modules/cassandra/pages/cql/cql_singlefile.adoc b/doc/modules/cassandra/pages/cql/cql_singlefile.adoc
new file mode 100644
index 00000000000..e2fea00dc01
--- /dev/null
+++ b/doc/modules/cassandra/pages/cql/cql_singlefile.adoc
@@ -0,0 +1,3904 @@
+== Cassandra Query Language (CQL) v3.4.3
+
+\{toc:maxLevel=3}
+
+=== CQL Syntax
+
+==== Preamble
+
+This document describes the Cassandra Query Language (CQL) version 3.
+CQL v3 is not backward compatible with CQL v2 and differs from it in
+numerous ways. Note that this document describes the last version of the
+languages. However, the link:#changes[changes] section provides the diff
+between the different versions of CQL v3.
+
+CQL v3 offers a model very close to SQL in the sense that data is put in
+_tables_ containing _rows_ of _columns_. For that reason, when used in
+this document, these terms (tables, rows and columns) have the same
+definition than they have in SQL. But please note that as such, they do
+*not* refer to the concept of rows and columns found in the internal
+implementation of Cassandra and in the thrift and CQL v2 API.
+
+==== Conventions
+
+To aid in specifying the CQL syntax, we will use the following
+conventions in this document:
+
+* Language rules will be given in a
+http://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form[BNF] -like
+notation:
+
+bc(syntax). ::= TERMINAL
+
+* Nonterminal symbols will have ``.
+* As additional shortcut notations to BNF, we’ll use traditional regular
+expression’s symbols (`?`, `+` and `*`) to signify that a given symbol
+is optional and/or can be repeated. We’ll also allow parentheses to
+group symbols and the `[]` notation to represent any one of
+``.
+* The grammar is provided for documentation purposes and leave some
+minor details out. For instance, the last column definition in a
+`CREATE TABLE` statement is optional but supported if present even
+though the provided grammar in this document suggest it is not
+supported.
+* Sample code will be provided in a code block:
+
+bc(sample). SELECT sample_usage FROM cql;
+
+* References to keywords or pieces of CQL code in running text will be
+shown in a `fixed-width font`.
+
+[[identifiers]]
+==== Identifiers and keywords
+
+The CQL language uses _identifiers_ (or _names_) to identify tables,
+columns and other objects. An identifier is a token matching the regular
+expression `[a-zA-Z]``[a-zA-Z0-9_]``*`.
+
+A number of such identifiers, like `SELECT` or `WITH`, are _keywords_.
+They have a fixed meaning for the language and most are reserved. The
+list of those keywords can be found in link:#appendixA[Appendix A].
+
+Identifiers and (unquoted) keywords are case insensitive. Thus `SELECT`
+is the same than `select` or `sElEcT`, and `myId` is the same than
+`myid` or `MYID` for instance. A convention often used (in particular by
+the samples of this documentation) is to use upper case for keywords and
+lower case for other identifiers.
+
+There is a second kind of identifiers called _quoted identifiers_
+defined by enclosing an arbitrary sequence of characters in
+double-quotes(`"`). Quoted identifiers are never keywords. Thus
+`"select"` is not a reserved keyword and can be used to refer to a
+column, while `select` would raise a parse error. Also, contrarily to
+unquoted identifiers and keywords, quoted identifiers are case sensitive
+(`"My Quoted Id"` is _different_ from `"my quoted id"`). A fully
+lowercase quoted identifier that matches `[a-zA-Z]``[a-zA-Z0-9_]``*` is
+equivalent to the unquoted identifier obtained by removing the
+double-quote (so `"myid"` is equivalent to `myid` and to `myId` but
+different from `"myId"`). Inside a quoted identifier, the double-quote
+character can be repeated to escape it, so `"foo "" bar"` is a valid
+identifier.
+
+*Warning*: _quoted identifiers_ allows to declare columns with arbitrary
+names, and those can sometime clash with specific names used by the
+server. For instance, when using conditional update, the server will
+respond with a result-set containing a special result named
+`"[applied]"`. If you’ve declared a column with such a name, this could
+potentially confuse some tools and should be avoided. In general,
+unquoted identifiers should be preferred but if you use quoted
+identifiers, it is strongly advised to avoid any name enclosed by
+squared brackets (like `"[applied]"`) and any name that looks like a
+function call (like `"f(x)"`).
+
+==== Constants
+
+CQL defines the following kind of _constants_: strings, integers,
+floats, booleans, uuids and blobs:
+
+* A string constant is an arbitrary sequence of characters characters
+enclosed by single-quote(`'`). One can include a single-quote in a
+string by repeating it, e.g. `'It''s raining today'`. Those are not to
+be confused with quoted identifiers that use double-quotes.
+* An integer constant is defined by `'-'?[0-9]+`.
+* A float constant is defined by
+`'-'?[0-9]+('.'[0-9]*)?([eE][+-]?[0-9+])?`. On top of that, `NaN` and
+`Infinity` are also float constants.
+* A boolean constant is either `true` or `false` up to
+case-insensitivity (i.e. `True` is a valid boolean constant).
+* A http://en.wikipedia.org/wiki/Universally_unique_identifier[UUID]
+constant is defined by `hex{8}-hex{4}-hex{4}-hex{4}-hex{12}` where `hex`
+is an hexadecimal character, e.g. `[0-9a-fA-F]` and `{4}` is the number
+of such characters.
+* A blob constant is an hexadecimal number defined by `0[xX](hex)+`
+where `hex` is an hexadecimal character, e.g. `[0-9a-fA-F]`.
+
+For how these constants are typed, see the link:#types[data types
+section].
+
+==== Comments
+
+A comment in CQL is a line beginning by either double dashes (`--`) or
+double slash (`//`).
+
+Multi-line comments are also supported through enclosure within `/*` and
+`*/` (but nesting is not supported).
+
+bc(sample). +
+— This is a comment +
+// This is a comment too +
+/* This is +
+a multi-line comment */
+
+==== Statements
+
+CQL consists of statements. As in SQL, these statements can be divided
+in 3 categories:
+
+* Data definition statements, that allow to set and change the way data
+is stored.
+* Data manipulation statements, that allow to change data
+* Queries, to look up data
+
+All statements end with a semicolon (`;`) but that semicolon can be
+omitted when dealing with a single statement. The supported statements
+are described in the following sections. When describing the grammar of
+said statements, we will reuse the non-terminal symbols defined below:
+
+bc(syntax).. +
+::= any quoted or unquoted identifier, excluding reserved keywords +
+::= ( `.')?
+
+::= a string constant +
+::= an integer constant +
+::= a float constant +
+::= | +
+::= a uuid constant +
+::= a boolean constant +
+::= a blob constant
+
+::= +
+| +
+| +
+| +
+| +
+::= `?' +
+| `:' +
+::= +
+| +
+| +
+| `(' ( (`,' )*)? `)'
+
+::= +
+| +
+| +
+::= `\{' ( `:' ( `,' `:' )* )? `}' +
+::= `\{' ( ( `,' )* )? `}' +
+::= `[' ( ( `,' )* )? `]'
+
+::=
+
+::= (AND )* +
+::= `=' ( | | ) +
+p. +
+Please note that not every possible productions of the grammar above
+will be valid in practice. Most notably, `` and nested
+`` are currently not allowed inside
+``.
+
+A `` can be either anonymous (a question mark (`?`)) or named
+(an identifier preceded by `:`). Both declare a bind variables for
+link:#preparedStatement[prepared statements]. The only difference
+between an anymous and a named variable is that a named one will be
+easier to refer to (how exactly depends on the client driver used).
+
+The `` production is use by statement that create and alter
+keyspaces and tables. Each `` is either a _simple_ one, in
+which case it just has a value, or a _map_ one, in which case it’s value
+is a map grouping sub-options. The following will refer to one or the
+other as the _kind_ (_simple_ or _map_) of the property.
+
+A `` will be used to identify a table. This is an identifier
+representing the table name that can be preceded by a keyspace name. The
+keyspace name, if provided, allow to identify a table in another
+keyspace than the currently active one (the currently active keyspace is
+set through the `USE` statement).
+
+For supported ``, see the section on
+link:#functions[functions].
+
+Strings can be either enclosed with single quotes or two dollar
+characters. The second syntax has been introduced to allow strings that
+contain single quotes. Typical candidates for such strings are source
+code fragments for user-defined functions.
+
+_Sample:_
+
+bc(sample).. +
+`some string value'
+
+$$double-dollar string can contain single ’ quotes$$ +
+p.
+
+[[preparedStatement]]
+==== Prepared Statement
+
+CQL supports _prepared statements_. Prepared statement is an
+optimization that allows to parse a query only once but execute it
+multiple times with different concrete values.
+
+In a statement, each time a column value is expected (in the data
+manipulation and query statements), a `` (see above) can be
+used instead. A statement with bind variables must then be _prepared_.
+Once it has been prepared, it can executed by providing concrete values
+for the bind variables. The exact procedure to prepare a statement and
+execute a prepared statement depends on the CQL driver used and is
+beyond the scope of this document.
+
+In addition to providing column values, bind markers may be used to
+provide values for `LIMIT`, `TIMESTAMP`, and `TTL` clauses. If anonymous
+bind markers are used, the names for the query parameters will be
+`[limit]`, `[timestamp]`, and `[ttl]`, respectively.
+
+[[dataDefinition]]
+=== Data Definition
+
+[[createKeyspaceStmt]]
+==== CREATE KEYSPACE
+
+_Syntax:_
+
+bc(syntax).. +
+::= CREATE KEYSPACE (IF NOT EXISTS)? WITH +
+p. +
+_Sample:_
+
+bc(sample).. +
+CREATE KEYSPACE Excelsior +
+WITH replication = \{’class’: `SimpleStrategy', `replication_factor' :
+3};
+
+CREATE KEYSPACE Excalibur +
+WITH replication = \{’class’: `NetworkTopologyStrategy', `DC1' : 1,
+`DC2' : 3} +
+AND durable_writes = false; +
+p. +
+The `CREATE KEYSPACE` statement creates a new top-level _keyspace_. A
+keyspace is a namespace that defines a replication strategy and some
+options for a set of tables. Valid keyspaces names are identifiers
+composed exclusively of alphanumerical characters and whose length is
+lesser or equal to 32. Note that as identifiers, keyspace names are case
+insensitive: use a quoted identifier for case sensitive keyspace names.
+
+The supported `` for `CREATE KEYSPACE` are:
+
+[cols=",,,,",options="header",]
+|===
+|name |kind |mandatory |default |description
+|`replication` |_map_ |yes | |The replication strategy and options to
+use for the keyspace.
+
+|`durable_writes` |_simple_ |no |true |Whether to use the commit log for
+updates on this keyspace (disable this option at your own risk!).
+|===
+
+The `replication` `` is mandatory. It must at least contains
+the `'class'` sub-option which defines the replication strategy class to
+use. The rest of the sub-options depends on that replication strategy
+class. By default, Cassandra support the following `'class'`:
+
+* `'SimpleStrategy'`: A simple strategy that defines a simple
+replication factor for the whole cluster. The only sub-options supported
+is `'replication_factor'` to define that replication factor and is
+mandatory.
+* `'NetworkTopologyStrategy'`: A replication strategy that allows to set
+the replication factor independently for each data-center. The rest of
+the sub-options are key-value pairs where each time the key is the name
+of a datacenter and the value the replication factor for that
+data-center.
+
+Attempting to create an already existing keyspace will return an error
+unless the `IF NOT EXISTS` option is used. If it is used, the statement
+will be a no-op if the keyspace already exists.
+
+[[useStmt]]
+==== USE
+
+_Syntax:_
+
+bc(syntax). ::= USE
+
+_Sample:_
+
+bc(sample). USE myApp;
+
+The `USE` statement takes an existing keyspace name as argument and set
+it as the per-connection current working keyspace. All subsequent
+keyspace-specific actions will be performed in the context of the
+selected keyspace, unless link:#statements[otherwise specified], until
+another USE statement is issued or the connection terminates.
+
+[[alterKeyspaceStmt]]
+==== ALTER KEYSPACE
+
+_Syntax:_
+
+bc(syntax).. +
+::= ALTER KEYSPACE WITH +
+p. +
+_Sample:_
+
+bc(sample).. +
+ALTER KEYSPACE Excelsior +
+WITH replication = \{’class’: `SimpleStrategy', `replication_factor' :
+4};
+
+The `ALTER KEYSPACE` statement alters the properties of an existing
+keyspace. The supported `` are the same as for the
+link:#createKeyspaceStmt[`CREATE KEYSPACE`] statement.
+
+[[dropKeyspaceStmt]]
+==== DROP KEYSPACE
+
+_Syntax:_
+
+bc(syntax). ::= DROP KEYSPACE ( IF EXISTS )?
+
+_Sample:_
+
+bc(sample). DROP KEYSPACE myApp;
+
+A `DROP KEYSPACE` statement results in the immediate, irreversible
+removal of an existing keyspace, including all column families in it,
+and all data contained in those column families.
+
+If the keyspace does not exists, the statement will return an error,
+unless `IF EXISTS` is used in which case the operation is a no-op.
+
+[[createTableStmt]]
+==== CREATE TABLE
+
+_Syntax:_
+
+bc(syntax).. +
+::= CREATE ( TABLE | COLUMNFAMILY ) ( IF NOT EXISTS )? +
+`(' ( `,' )* `)' +
+( WITH ( AND )* )?
+
+::= ( STATIC )? ( PRIMARY KEY )? +
+| PRIMARY KEY `(' ( `,' )* `)'
+
+::= +
+| `(' (`,' )* `)'
+
+::= +
+| COMPACT STORAGE +
+| CLUSTERING ORDER +
+p. +
+_Sample:_
+
+bc(sample).. +
+CREATE TABLE monkeySpecies ( +
+species text PRIMARY KEY, +
+common_name text, +
+population varint, +
+average_size int +
+) WITH comment=`Important biological records';
+
+CREATE TABLE timeline ( +
+userid uuid, +
+posted_month int, +
+posted_time uuid, +
+body text, +
+posted_by text, +
+PRIMARY KEY (userid, posted_month, posted_time) +
+) WITH compaction = \{ `class' : `LeveledCompactionStrategy' }; +
+p. +
+The `CREATE TABLE` statement creates a new table. Each such table is a
+set of _rows_ (usually representing related entities) for which it
+defines a number of properties. A table is defined by a
+link:#createTableName[name], it defines the columns composing rows of
+the table and have a number of link:#createTableOptions[options]. Note
+that the `CREATE COLUMNFAMILY` syntax is supported as an alias for
+`CREATE TABLE` (for historical reasons).
+
+Attempting to create an already existing table will return an error
+unless the `IF NOT EXISTS` option is used. If it is used, the statement
+will be a no-op if the table already exists.
+
+[[createTableName]]
+===== ``
+
+Valid table names are the same as valid
+link:#createKeyspaceStmt[keyspace names] (up to 32 characters long
+alphanumerical identifiers). If the table name is provided alone, the
+table is created within the current keyspace (see `USE`), but if it is
+prefixed by an existing keyspace name (see
+link:#statements[``] grammar), it is created in the specified
+keyspace (but does *not* change the current keyspace).
+
+[[createTableColumn]]
+===== ``
+
+A `CREATE TABLE` statement defines the columns that rows of the table
+can have. A _column_ is defined by its name (an identifier) and its type
+(see the link:#types[data types] section for more details on allowed
+types and their properties).
+
+Within a table, a row is uniquely identified by its `PRIMARY KEY` (or
+more simply the key), and hence all table definitions *must* define a
+PRIMARY KEY (and only one). A `PRIMARY KEY` is composed of one or more
+of the columns defined in the table. If the `PRIMARY KEY` is only one
+column, this can be specified directly after the column definition.
+Otherwise, it must be specified by following `PRIMARY KEY` by the
+comma-separated list of column names composing the key within
+parenthesis. Note that:
+
+bc(sample). +
+CREATE TABLE t ( +
+k int PRIMARY KEY, +
+other text +
+)
+
+is equivalent to
+
+bc(sample). +
+CREATE TABLE t ( +
+k int, +
+other text, +
+PRIMARY KEY (k) +
+)
+
+[[createTablepartitionClustering]]
+===== Partition key and clustering columns
+
+In CQL, the order in which columns are defined for the `PRIMARY KEY`
+matters. The first column of the key is called the _partition key_. It
+has the property that all the rows sharing the same partition key (even
+across table in fact) are stored on the same physical node. Also,
+insertion/update/deletion on rows sharing the same partition key for a
+given table are performed _atomically_ and in _isolation_. Note that it
+is possible to have a composite partition key, i.e. a partition key
+formed of multiple columns, using an extra set of parentheses to define
+which columns forms the partition key.
+
+The remaining columns of the `PRIMARY KEY` definition, if any, are
+called __clustering columns. On a given physical node, rows for a given
+partition key are stored in the order induced by the clustering columns,
+making the retrieval of rows in that clustering order particularly
+efficient (see `SELECT`).
+
+[[createTableStatic]]
+===== `STATIC` columns
+
+Some columns can be declared as `STATIC` in a table definition. A column
+that is static will be ``shared'' by all the rows belonging to the same
+partition (having the same partition key). For instance, in:
+
+bc(sample). +
+CREATE TABLE test ( +
+pk int, +
+t int, +
+v text, +
+s text static, +
+PRIMARY KEY (pk, t) +
+); +
+INSERT INTO test(pk, t, v, s) VALUES (0, 0, `val0', `static0'); +
+INSERT INTO test(pk, t, v, s) VALUES (0, 1, `val1', `static1'); +
+SELECT * FROM test WHERE pk=0 AND t=0;
+
+the last query will return `'static1'` as value for `s`, since `s` is
+static and thus the 2nd insertion modified this ``shared'' value. Note
+however that static columns are only static within a given partition,
+and if in the example above both rows where from different partitions
+(i.e. if they had different value for `pk`), then the 2nd insertion
+would not have modified the value of `s` for the first row.
+
+A few restrictions applies to when static columns are allowed:
+
+* tables with the `COMPACT STORAGE` option (see below) cannot have them
+* a table without clustering columns cannot have static columns (in a
+table without clustering columns, every partition has only one row, and
+so every column is inherently static).
+* only non `PRIMARY KEY` columns can be static
+
+[[createTableOptions]]
+===== ``
+
+The `CREATE TABLE` statement supports a number of options that controls
+the configuration of a new table. These options can be specified after
+the `WITH` keyword.
+
+The first of these option is `COMPACT STORAGE`. This option is mainly
+targeted towards backward compatibility for definitions created before
+CQL3 (see
+http://www.datastax.com/dev/blog/thrift-to-cql3[www.datastax.com/dev/blog/thrift-to-cql3]
+for more details). The option also provides a slightly more compact
+layout of data on disk but at the price of diminished flexibility and
+extensibility for the table. Most notably, `COMPACT STORAGE` tables
+cannot have collections nor static columns and a `COMPACT STORAGE` table
+with at least one clustering column supports exactly one (as in not 0
+nor more than 1) column not part of the `PRIMARY KEY` definition (which
+imply in particular that you cannot add nor remove columns after
+creation). For those reasons, `COMPACT STORAGE` is not recommended
+outside of the backward compatibility reason evoked above.
+
+Another option is `CLUSTERING ORDER`. It allows to define the ordering
+of rows on disk. It takes the list of the clustering column names with,
+for each of them, the on-disk order (Ascending or descending). Note that
+this option affects link:#selectOrderBy[what `ORDER BY` are allowed
+during `SELECT`].
+
+Table creation supports the following other ``:
+
+[cols=",,,",options="header",]
+|===
+|option |kind |default |description
+|`comment` |_simple_ |none |A free-form, human-readable comment.
+
+|`gc_grace_seconds` |_simple_ |864000 |Time to wait before garbage
+collecting tombstones (deletion markers).
+
+|`bloom_filter_fp_chance` |_simple_ |0.00075 |The target probability of
+false positive of the sstable bloom filters. Said bloom filters will be
+sized to provide the provided probability (thus lowering this value
+impact the size of bloom filters in-memory and on-disk)
+
+|`default_time_to_live` |_simple_ |0 |The default expiration time
+(``TTL'') in seconds for a table.
+
+|`compaction` |_map_ |_see below_ |Compaction options, see
+link:#compactionOptions[below].
+
+|`compression` |_map_ |_see below_ |Compression options, see
+link:#compressionOptions[below].
+
+|`caching` |_map_ |_see below_ |Caching options, see
+link:#cachingOptions[below].
+|===
+
+[[compactionOptions]]
+===== Compaction options
+
+The `compaction` property must at least define the `'class'` sub-option,
+that defines the compaction strategy class to use. The default supported
+class are `'SizeTieredCompactionStrategy'`,
+`'LeveledCompactionStrategy'`, `'DateTieredCompactionStrategy'` and
+`'TimeWindowCompactionStrategy'`. Custom strategy can be provided by
+specifying the full class name as a link:#constants[string constant].
+The rest of the sub-options depends on the chosen class. The sub-options
+supported by the default classes are:
+
+[cols=",,,",options="header",]
+|===
+|option |supported compaction strategy |default |description
+|`enabled` |_all_ |true |A boolean denoting whether compaction should be
+enabled or not.
+
+|`tombstone_threshold` |_all_ |0.2 |A ratio such that if a sstable has
+more than this ratio of gcable tombstones over all contained columns,
+the sstable will be compacted (with no other sstables) for the purpose
+of purging those tombstones.
+
+|`tombstone_compaction_interval` |_all_ |1 day |The minimum time to wait
+after an sstable creation time before considering it for ``tombstone
+compaction'', where ``tombstone compaction'' is the compaction triggered
+if the sstable has more gcable tombstones than `tombstone_threshold`.
+
+|`unchecked_tombstone_compaction` |_all_ |false |Setting this to true
+enables more aggressive tombstone compactions - single sstable tombstone
+compactions will run without checking how likely it is that they will be
+successful.
+
+|`min_sstable_size` |SizeTieredCompactionStrategy |50MB |The size tiered
+strategy groups SSTables to compact in buckets. A bucket groups SSTables
+that differs from less than 50% in size. However, for small sizes, this
+would result in a bucketing that is too fine grained. `min_sstable_size`
+defines a size threshold (in bytes) below which all SSTables belong to
+one unique bucket
+
+|`min_threshold` |SizeTieredCompactionStrategy |4 |Minimum number of
+SSTables needed to start a minor compaction.
+
+|`max_threshold` |SizeTieredCompactionStrategy |32 |Maximum number of
+SSTables processed by one minor compaction.
+
+|`bucket_low` |SizeTieredCompactionStrategy |0.5 |Size tiered consider
+sstables to be within the same bucket if their size is within
+[average_size * `bucket_low`, average_size * `bucket_high` ] (i.e the
+default groups sstable whose sizes diverges by at most 50%)
+
+|`bucket_high` |SizeTieredCompactionStrategy |1.5 |Size tiered consider
+sstables to be within the same bucket if their size is within
+[average_size * `bucket_low`, average_size * `bucket_high` ] (i.e the
+default groups sstable whose sizes diverges by at most 50%).
+
+|`sstable_size_in_mb` |LeveledCompactionStrategy |5MB |The target size
+(in MB) for sstables in the leveled strategy. Note that while sstable
+sizes should stay less or equal to `sstable_size_in_mb`, it is possible
+to exceptionally have a larger sstable as during compaction, data for a
+given partition key are never split into 2 sstables
+
+|`timestamp_resolution` |DateTieredCompactionStrategy |MICROSECONDS |The
+timestamp resolution used when inserting data, could be MILLISECONDS,
+MICROSECONDS etc (should be understandable by Java TimeUnit) - don’t
+change this unless you do mutations with USING TIMESTAMP (or equivalent
+directly in the client)
+
+|`base_time_seconds` |DateTieredCompactionStrategy |60 |The base size of
+the time windows.
+
+|`max_sstable_age_days` |DateTieredCompactionStrategy |365 |SSTables
+only containing data that is older than this will never be compacted.
+
+|`timestamp_resolution` |TimeWindowCompactionStrategy |MICROSECONDS |The
+timestamp resolution used when inserting data, could be MILLISECONDS,
+MICROSECONDS etc (should be understandable by Java TimeUnit) - don’t
+change this unless you do mutations with USING TIMESTAMP (or equivalent
+directly in the client)
+
+|`compaction_window_unit` |TimeWindowCompactionStrategy |DAYS |The Java
+TimeUnit used for the window size, set in conjunction with
+`compaction_window_size`. Must be one of DAYS, HOURS, MINUTES
+
+|`compaction_window_size` |TimeWindowCompactionStrategy |1 |The number
+of `compaction_window_unit` units that make up a time window.
+
+|`unsafe_aggressive_sstable_expiration` |TimeWindowCompactionStrategy
+|false |Expired sstables will be dropped without checking its data is
+shadowing other sstables. This is a potentially risky option that can
+lead to data loss or deleted data re-appearing, going beyond what
+`unchecked_tombstone_compaction` does for single sstable compaction. Due
+to the risk the jvm must also be started with
+`-Dcassandra.unsafe_aggressive_sstable_expiration=true`.
+|===
+
+[[compressionOptions]]
+===== Compression options
+
+For the `compression` property, the following sub-options are available:
+
+[cols=",,,,,",options="header",]
+|===
+|option |default |description | | |
+|`class` |LZ4Compressor |The compression algorithm to use. Default
+compressor are: LZ4Compressor, SnappyCompressor and DeflateCompressor.
+Use `'enabled' : false` to disable compression. Custom compressor can be
+provided by specifying the full class name as a link:#constants[string
+constant]. | | |
+
+|`enabled` |true |By default compression is enabled. To disable it, set
+`enabled` to `false` |`chunk_length_in_kb` |64KB |On disk SSTables are
+compressed by block (to allow random reads). This defines the size (in
+KB) of said block. Bigger values may improve the compression rate, but
+increases the minimum size of data to be read from disk for a read
+
+|`crc_check_chance` |1.0 |When compression is enabled, each compressed
+block includes a checksum of that block for the purpose of detecting
+disk bitrot and avoiding the propagation of corruption to other replica.
+This option defines the probability with which those checksums are
+checked during read. By default they are always checked. Set to 0 to
+disable checksum checking and to 0.5 for instance to check them every
+other read | | |
+|===
+
+[[cachingOptions]]
+===== Caching options
+
+For the `caching` property, the following sub-options are available:
+
+[cols=",,",options="header",]
+|===
+|option |default |description
+|`keys` |ALL |Whether to cache keys (``key cache'') for this table.
+Valid values are: `ALL` and `NONE`.
+
+|`rows_per_partition` |NONE |The amount of rows to cache per partition
+(``row cache''). If an integer `n` is specified, the first `n` queried
+rows of a partition will be cached. Other possible options are `ALL`, to
+cache all rows of a queried partition, or `NONE` to disable row caching.
+|===
+
+===== Other considerations:
+
+* When link:#insertStmt[inserting] / link:#updateStmt[updating] a given
+row, not all columns needs to be defined (except for those part of the
+key), and missing columns occupy no space on disk. Furthermore, adding
+new columns (see `ALTER TABLE`) is a constant time operation. There is
+thus no need to try to anticipate future usage (or to cry when you
+haven’t) when creating a table.
+
+[[alterTableStmt]]
+==== ALTER TABLE
+
+_Syntax:_
+
+bc(syntax).. +
+::= ALTER (TABLE | COLUMNFAMILY)
+
+::= ADD +
+| ADD ( ( , )* ) +
+| DROP +
+| DROP ( ( , )* ) +
+| WITH ( AND )* +
+p. +
+_Sample:_
+
+bc(sample).. +
+ALTER TABLE addamsFamily
+
+ALTER TABLE addamsFamily +
+ADD gravesite varchar;
+
+ALTER TABLE addamsFamily +
+WITH comment = `A most excellent and useful column family'; +
+p. +
+The `ALTER` statement is used to manipulate table definitions. It allows
+for adding new columns, dropping existing ones, or updating the table
+options. As with table creation, `ALTER COLUMNFAMILY` is allowed as an
+alias for `ALTER TABLE`.
+
+The `` is the table name optionally preceded by the keyspace
+name. The `` defines the alteration to perform:
+
+* `ADD`: Adds a new column to the table. The `` for the new
+column must not conflict with an existing column. Moreover, columns
+cannot be added to tables defined with the `COMPACT STORAGE` option.
+* `DROP`: Removes a column from the table. Dropped columns will
+immediately become unavailable in the queries and will not be included
+in compacted sstables in the future. If a column is readded, queries
+won’t return values written before the column was last dropped. It is
+assumed that timestamps represent actual time, so if this is not your
+case, you should NOT readd previously dropped columns. Columns can’t be
+dropped from tables defined with the `COMPACT STORAGE` option.
+* `WITH`: Allows to update the options of the table. The
+link:#createTableOptions[supported ``] (and syntax) are the same
+as for the `CREATE TABLE` statement except that `COMPACT STORAGE` is not
+supported. Note that setting any `compaction` sub-options has the effect
+of erasing all previous `compaction` options, so you need to re-specify
+all the sub-options if you want to keep them. The same note applies to
+the set of `compression` sub-options.
+
+===== CQL type compatibility:
+
+CQL data types may be converted only as the following table.
+
+[cols=",",options="header",]
+|===
+|Data type may be altered to: |Data type
+|timestamp |bigint
+
+|ascii, bigint, boolean, date, decimal, double, float, inet, int,
+smallint, text, time, timestamp, timeuuid, tinyint, uuid, varchar,
+varint |blob
+
+|int |date
+
+|ascii, varchar |text
+
+|bigint |time
+
+|bigint |timestamp
+
+|timeuuid |uuid
+
+|ascii, text |varchar
+
+|bigint, int, timestamp |varint
+|===
+
+Clustering columns have stricter requirements, only the below
+conversions are allowed.
+
+[cols=",",options="header",]
+|===
+|Data type may be altered to: |Data type
+|ascii, text, varchar |blob
+|ascii, varchar |text
+|ascii, text |varchar
+|===
+
+[[dropTableStmt]]
+==== DROP TABLE
+
+_Syntax:_
+
+bc(syntax). ::= DROP TABLE ( IF EXISTS )?
+
+_Sample:_
+
+bc(sample). DROP TABLE worldSeriesAttendees;
+
+The `DROP TABLE` statement results in the immediate, irreversible
+removal of a table, including all data contained in it. As for table
+creation, `DROP COLUMNFAMILY` is allowed as an alias for `DROP TABLE`.
+
+If the table does not exist, the statement will return an error, unless
+`IF EXISTS` is used in which case the operation is a no-op.
+
+[[truncateStmt]]
+==== TRUNCATE
+
+_Syntax:_
+
+bc(syntax). ::= TRUNCATE ( TABLE | COLUMNFAMILY )?
+
+_Sample:_
+
+bc(sample). TRUNCATE superImportantData;
+
+The `TRUNCATE` statement permanently removes all data from a table.
+
+[[createIndexStmt]]
+==== CREATE INDEX
+
+_Syntax:_
+
+bc(syntax).. +
+::= CREATE ( CUSTOM )? INDEX ( IF NOT EXISTS )? ( )? +
+ON `(' `)' +
+( USING ( WITH OPTIONS = )? )?
+
+::= +
+| keys( ) +
+p. +
+_Sample:_
+
+bc(sample). +
+CREATE INDEX userIndex ON NerdMovies (user); +
+CREATE INDEX ON Mutants (abilityId); +
+CREATE INDEX ON users (keys(favs)); +
+CREATE CUSTOM INDEX ON users (email) USING `path.to.the.IndexClass'; +
+CREATE CUSTOM INDEX ON users (email) USING `path.to.the.IndexClass' WITH
+OPTIONS = \{’storage’: `/mnt/ssd/indexes/'};
+
+The `CREATE INDEX` statement is used to create a new (automatic)
+secondary index for a given (existing) column in a given table. A name
+for the index itself can be specified before the `ON` keyword, if
+desired. If data already exists for the column, it will be indexed
+asynchronously. After the index is created, new data for the column is
+indexed automatically at insertion time.
+
+Attempting to create an already existing index will return an error
+unless the `IF NOT EXISTS` option is used. If it is used, the statement
+will be a no-op if the index already exists.
+
+[[keysIndex]]
+===== Indexes on Map Keys
+
+When creating an index on a link:#map[map column], you may index either
+the keys or the values. If the column identifier is placed within the
+`keys()` function, the index will be on the map keys, allowing you to
+use `CONTAINS KEY` in `WHERE` clauses. Otherwise, the index will be on
+the map values.
+
+[[dropIndexStmt]]
+==== DROP INDEX
+
+_Syntax:_
+
+bc(syntax). ::= DROP INDEX ( IF EXISTS )? ( `.' )?
+
+_Sample:_
+
+bc(sample).. +
+DROP INDEX userIndex;
+
+DROP INDEX userkeyspace.address_index; +
+p. +
+The `DROP INDEX` statement is used to drop an existing secondary index.
+The argument of the statement is the index name, which may optionally
+specify the keyspace of the index.
+
+If the index does not exists, the statement will return an error, unless
+`IF EXISTS` is used in which case the operation is a no-op.
+
+[[createMVStmt]]
+==== CREATE MATERIALIZED VIEW
+
+_Syntax:_
+
+bc(syntax).. +
+::= CREATE MATERIALIZED VIEW ( IF NOT EXISTS )? AS +
+SELECT ( `(' ( `,' ) * `)' | `*' ) +
+FROM +
+( WHERE )? +
+PRIMARY KEY `(' ( `,' )* `)' +
+( WITH ( AND )* )? +
+p. +
+_Sample:_
+
+bc(sample).. +
+CREATE MATERIALIZED VIEW monkeySpecies_by_population AS +
+SELECT * +
+FROM monkeySpecies +
+WHERE population IS NOT NULL AND species IS NOT NULL +
+PRIMARY KEY (population, species) +
+WITH comment=`Allow query by population instead of species'; +
+p. +
+The `CREATE MATERIALIZED VIEW` statement creates a new materialized
+view. Each such view is a set of _rows_ which corresponds to rows which
+are present in the underlying, or base, table specified in the `SELECT`
+statement. A materialized view cannot be directly updated, but updates
+to the base table will cause corresponding updates in the view.
+
+Attempting to create an already existing materialized view will return
+an error unless the `IF NOT EXISTS` option is used. If it is used, the
+statement will be a no-op if the materialized view already exists.
+
+[[createMVWhere]]
+===== `WHERE` Clause
+
+The `` is similar to the link:#selectWhere[where clause of
+a `SELECT` statement], with a few differences. First, the where clause
+must contain an expression that disallows `NULL` values in columns in
+the view’s primary key. If no other restriction is desired, this can be
+accomplished with an `IS NOT NULL` expression. Second, only columns
+which are in the base table’s primary key may be restricted with
+expressions other than `IS NOT NULL`. (Note that this second restriction
+may be lifted in the future.)
+
+[[alterMVStmt]]
+==== ALTER MATERIALIZED VIEW
+
+_Syntax:_
+
+bc(syntax). ::= ALTER MATERIALIZED VIEW +
+WITH ( AND )*
+
+The `ALTER MATERIALIZED VIEW` statement allows options to be update;
+these options are the same as `CREATE TABLE`’s options.
+
+[[dropMVStmt]]
+==== DROP MATERIALIZED VIEW
+
+_Syntax:_
+
+bc(syntax). ::= DROP MATERIALIZED VIEW ( IF EXISTS )?
+
+_Sample:_
+
+bc(sample). DROP MATERIALIZED VIEW monkeySpecies_by_population;
+
+The `DROP MATERIALIZED VIEW` statement is used to drop an existing
+materialized view.
+
+If the materialized view does not exists, the statement will return an
+error, unless `IF EXISTS` is used in which case the operation is a
+no-op.
+
+[[createTypeStmt]]
+==== CREATE TYPE
+
+_Syntax:_
+
+bc(syntax).. +
+::= CREATE TYPE ( IF NOT EXISTS )? +
+`(' ( `,' )* `)'
+
+::= ( `.' )?
+
+::=
+
+_Sample:_
+
+bc(sample).. +
+CREATE TYPE address ( +
+street_name text, +
+street_number int, +
+city text, +
+state text, +
+zip int +
+)
+
+CREATE TYPE work_and_home_addresses ( +
+home_address address, +
+work_address address +
+) +
+p. +
+The `CREATE TYPE` statement creates a new user-defined type. Each type
+is a set of named, typed fields. Field types may be any valid type,
+including collections and other existing user-defined types.
+
+Attempting to create an already existing type will result in an error
+unless the `IF NOT EXISTS` option is used. If it is used, the statement
+will be a no-op if the type already exists.
+
+[[createTypeName]]
+===== ``
+
+Valid type names are identifiers. The names of existing CQL types and
+link:#appendixB[reserved type names] may not be used.
+
+If the type name is provided alone, the type is created with the current
+keyspace (see `USE`). If it is prefixed by an existing keyspace name,
+the type is created within the specified keyspace instead of the current
+keyspace.
+
+[[alterTypeStmt]]
+==== ALTER TYPE
+
+_Syntax:_
+
+bc(syntax).. +
+::= ALTER TYPE
+
+::= ADD +
+| RENAME TO ( AND TO )* +
+p. +
+_Sample:_
+
+bc(sample).. +
+ALTER TYPE address ADD country text
+
+ALTER TYPE address RENAME zip TO zipcode AND street_name TO street +
+p. +
+The `ALTER TYPE` statement is used to manipulate type definitions. It
+allows for adding new fields, renaming existing fields, or changing the
+type of existing fields.
+
+[[dropTypeStmt]]
+==== DROP TYPE
+
+_Syntax:_
+
+bc(syntax).. +
+::= DROP TYPE ( IF EXISTS )? +
+p. +
+The `DROP TYPE` statement results in the immediate, irreversible removal
+of a type. Attempting to drop a type that is still in use by another
+type or a table will result in an error.
+
+If the type does not exist, an error will be returned unless `IF EXISTS`
+is used, in which case the operation is a no-op.
+
+[[createTriggerStmt]]
+==== CREATE TRIGGER
+
+_Syntax:_
+
+bc(syntax).. +
+::= CREATE TRIGGER ( IF NOT EXISTS )? ( )? +
+ON +
+USING
+
+_Sample:_
+
+bc(sample). +
+CREATE TRIGGER myTrigger ON myTable USING
+`org.apache.cassandra.triggers.InvertedIndex';
+
+The actual logic that makes up the trigger can be written in any Java
+(JVM) language and exists outside the database. You place the trigger
+code in a `lib/triggers` subdirectory of the Cassandra installation
+directory, it loads during cluster startup, and exists on every node
+that participates in a cluster. The trigger defined on a table fires
+before a requested DML statement occurs, which ensures the atomicity of
+the transaction.
+
+[[dropTriggerStmt]]
+==== DROP TRIGGER
+
+_Syntax:_
+
+bc(syntax).. +
+::= DROP TRIGGER ( IF EXISTS )? ( )? +
+ON +
+p. +
+_Sample:_
+
+bc(sample). +
+DROP TRIGGER myTrigger ON myTable;
+
+`DROP TRIGGER` statement removes the registration of a trigger created
+using `CREATE TRIGGER`.
+
+[[createFunctionStmt]]
+==== CREATE FUNCTION
+
+_Syntax:_
+
+bc(syntax).. +
+::= CREATE ( OR REPLACE )? +
+FUNCTION ( IF NOT EXISTS )? +
+( `.' )? +
+`(' ( `,' )* `)' +
+( CALLED | RETURNS NULL ) ON NULL INPUT +
+RETURNS +
+LANGUAGE +
+AS
+
+_Sample:_
+
+bc(sample). +
+CREATE OR REPLACE FUNCTION somefunction +
+( somearg int, anotherarg text, complexarg frozen, listarg list ) +
+RETURNS NULL ON NULL INPUT +
+RETURNS text +
+LANGUAGE java +
+AS $$ +
+// some Java code +
+$$; +
+CREATE FUNCTION akeyspace.fname IF NOT EXISTS +
+( someArg int ) +
+CALLED ON NULL INPUT +
+RETURNS text +
+LANGUAGE java +
+AS $$ +
+// some Java code +
+$$;
+
+`CREATE FUNCTION` creates or replaces a user-defined function.
+
+[[functionSignature]]
+===== Function Signature
+
+Signatures are used to distinguish individual functions. The signature
+consists of:
+
+. The fully qualified function name - i.e _keyspace_ plus
+_function-name_
+. The concatenated list of all argument types
+
+Note that keyspace names, function names and argument types are subject
+to the default naming conventions and case-sensitivity rules.
+
+`CREATE FUNCTION` with the optional `OR REPLACE` keywords either creates
+a function or replaces an existing one with the same signature. A
+`CREATE FUNCTION` without `OR REPLACE` fails if a function with the same
+signature already exists.
+
+Behavior on invocation with `null` values must be defined for each
+function. There are two options:
+
+. `RETURNS NULL ON NULL INPUT` declares that the function will always
+return `null` if any of the input arguments is `null`.
+. `CALLED ON NULL INPUT` declares that the function will always be
+executed.
+
+If the optional `IF NOT EXISTS` keywords are used, the function will
+only be created if another function with the same signature does not
+exist.
+
+`OR REPLACE` and `IF NOT EXIST` cannot be used together.
+
+Functions belong to a keyspace. If no keyspace is specified in
+``, the current keyspace is used (i.e. the keyspace
+specified using the link:#useStmt[`USE`] statement). It is not possible
+to create a user-defined function in one of the system keyspaces.
+
+See the section on link:#udfs[user-defined functions] for more
+information.
+
+[[dropFunctionStmt]]
+==== DROP FUNCTION
+
+_Syntax:_
+
+bc(syntax).. +
+::= DROP FUNCTION ( IF EXISTS )? +
+( `.' )? +
+( `(' ( `,' )* `)' )?
+
+_Sample:_
+
+bc(sample). +
+DROP FUNCTION myfunction; +
+DROP FUNCTION mykeyspace.afunction; +
+DROP FUNCTION afunction ( int ); +
+DROP FUNCTION afunction ( text );
+
+`DROP FUNCTION` statement removes a function created using
+`CREATE FUNCTION`. +
+You must specify the argument types (link:#functionSignature[signature]
+) of the function to drop if there are multiple functions with the same
+name but a different signature (overloaded functions).
+
+`DROP FUNCTION` with the optional `IF EXISTS` keywords drops a function
+if it exists.
+
+[[createAggregateStmt]]
+==== CREATE AGGREGATE
+
+_Syntax:_
+
+bc(syntax).. +
+::= CREATE ( OR REPLACE )? +
+AGGREGATE ( IF NOT EXISTS )? +
+( `.' )? +
+`(' ( `,' )* `)' +
+SFUNC +
+STYPE +
+( FINALFUNC )? +
+( INITCOND )? +
+p. +
+_Sample:_
+
+bc(sample). +
+CREATE AGGREGATE myaggregate ( val text ) +
+SFUNC myaggregate_state +
+STYPE text +
+FINALFUNC myaggregate_final +
+INITCOND `foo';
+
+See the section on link:#udas[user-defined aggregates] for a complete
+example.
+
+`CREATE AGGREGATE` creates or replaces a user-defined aggregate.
+
+`CREATE AGGREGATE` with the optional `OR REPLACE` keywords either
+creates an aggregate or replaces an existing one with the same
+signature. A `CREATE AGGREGATE` without `OR REPLACE` fails if an
+aggregate with the same signature already exists.
+
+`CREATE AGGREGATE` with the optional `IF NOT EXISTS` keywords either
+creates an aggregate if it does not already exist.
+
+`OR REPLACE` and `IF NOT EXIST` cannot be used together.
+
+Aggregates belong to a keyspace. If no keyspace is specified in
+``, the current keyspace is used (i.e. the keyspace
+specified using the link:#useStmt[`USE`] statement). It is not possible
+to create a user-defined aggregate in one of the system keyspaces.
+
+Signatures for user-defined aggregates follow the
+link:#functionSignature[same rules] as for user-defined functions.
+
+`STYPE` defines the type of the state value and must be specified.
+
+The optional `INITCOND` defines the initial state value for the
+aggregate. It defaults to `null`. A non-`null` `INITCOND` must be
+specified for state functions that are declared with
+`RETURNS NULL ON NULL INPUT`.
+
+`SFUNC` references an existing function to be used as the state
+modifying function. The type of first argument of the state function
+must match `STYPE`. The remaining argument types of the state function
+must match the argument types of the aggregate function. State is not
+updated for state functions declared with `RETURNS NULL ON NULL INPUT`
+and called with `null`.
+
+The optional `FINALFUNC` is called just before the aggregate result is
+returned. It must take only one argument with type `STYPE`. The return
+type of the `FINALFUNC` may be a different type. A final function
+declared with `RETURNS NULL ON NULL INPUT` means that the aggregate’s
+return value will be `null`, if the last state is `null`.
+
+If no `FINALFUNC` is defined, the overall return type of the aggregate
+function is `STYPE`. If a `FINALFUNC` is defined, it is the return type
+of that function.
+
+See the section on link:#udas[user-defined aggregates] for more
+information.
+
+[[dropAggregateStmt]]
+==== DROP AGGREGATE
+
+_Syntax:_
+
+bc(syntax).. +
+::= DROP AGGREGATE ( IF EXISTS )? +
+( `.' )? +
+( `(' ( `,' )* `)' )? +
+p.
+
+_Sample:_
+
+bc(sample). +
+DROP AGGREGATE myAggregate; +
+DROP AGGREGATE myKeyspace.anAggregate; +
+DROP AGGREGATE someAggregate ( int ); +
+DROP AGGREGATE someAggregate ( text );
+
+The `DROP AGGREGATE` statement removes an aggregate created using
+`CREATE AGGREGATE`. You must specify the argument types of the aggregate
+to drop if there are multiple aggregates with the same name but a
+different signature (overloaded aggregates).
+
+`DROP AGGREGATE` with the optional `IF EXISTS` keywords drops an
+aggregate if it exists, and does nothing if a function with the
+signature does not exist.
+
+Signatures for user-defined aggregates follow the
+link:#functionSignature[same rules] as for user-defined functions.
+
+[[dataManipulation]]
+=== Data Manipulation
+
+[[insertStmt]]
+==== INSERT
+
+_Syntax:_
+
+bc(syntax).. +
+::= INSERT INTO +
+( ( VALUES ) +
+| ( JSON )) +
+( IF NOT EXISTS )? +
+( USING ( AND )* )?
+
+::= `(' ( `,' )* `)'
+
+::= `(' ( `,' )* `)'
+
+::= TIMESTAMP +
+| TTL +
+p. +
+_Sample:_
+
+bc(sample).. +
+INSERT INTO NerdMovies (movie, director, main_actor, year) +
+VALUES (`Serenity', `Joss Whedon', `Nathan Fillion', 2005) +
+USING TTL 86400;
+
+INSERT INTO NerdMovies JSON `\{``movie'': ``Serenity'', ``director'':
+``Joss Whedon'', ``year'': 2005}' +
+p. +
+The `INSERT` statement writes one or more columns for a given row in a
+table. Note that since a row is identified by its `PRIMARY KEY`, at
+least the columns composing it must be specified. The list of columns to
+insert to must be supplied when using the `VALUES` syntax. When using
+the `JSON` syntax, they are optional. See the section on
+link:#insertJson[`INSERT JSON`] for more details.
+
+Note that unlike in SQL, `INSERT` does not check the prior existence of
+the row by default: the row is created if none existed before, and
+updated otherwise. Furthermore, there is no mean to know which of
+creation or update happened.
+
+It is however possible to use the `IF NOT EXISTS` condition to only
+insert if the row does not exist prior to the insertion. But please note
+that using `IF NOT EXISTS` will incur a non negligible performance cost
+(internally, Paxos will be used) so this should be used sparingly.
+
+All updates for an `INSERT` are applied atomically and in isolation.
+
+Please refer to the link:#updateOptions[`UPDATE`] section for
+information on the `` available and to the
+link:#collections[collections] section for use of
+``. Also note that `INSERT` does not support
+counters, while `UPDATE` does.
+
+[[updateStmt]]
+==== UPDATE
+
+_Syntax:_
+
+bc(syntax).. +
+::= UPDATE +
+( USING ( AND )* )? +
+SET ( `,' )* +
+WHERE +
+( IF ( AND condition )* )?
+
+::= `=' +
+| `=' (`+' | `-') ( | | ) +
+| `=' `+' +
+| `[' `]' `=' +
+| `.' `='
+
+::= +
+| IN +
+| `[' `]' +
+| `[' `]' IN +
+| `.' +
+| `.' IN
+
+::= `<' | `<=' | `=' | `!=' | `>=' | `>' +
+::= ( | `(' ( ( `,' )* )? `)')
+
+::= ( AND )*
+
+::= `=' +
+| `(' (`,' )* `)' `=' +
+| IN `(' ( ( `,' )* )? `)' +
+| IN +
+| `(' (`,' )* `)' IN `(' ( ( `,' )* )? `)' +
+| `(' (`,' )* `)' IN
+
+::= TIMESTAMP +
+| TTL +
+p. +
+_Sample:_
+
+bc(sample).. +
+UPDATE NerdMovies USING TTL 400 +
+SET director = `Joss Whedon', +
+main_actor = `Nathan Fillion', +
+year = 2005 +
+WHERE movie = `Serenity';
+
+UPDATE UserActions SET total = total + 2 WHERE user =
+B70DE1D0-9908-4AE3-BE34-5573E5B09F14 AND action = `click'; +
+p. +
+The `UPDATE` statement writes one or more columns for a given row in a
+table. The `` is used to select the row to update and must
+include all columns composing the `PRIMARY KEY`. Other columns values
+are specified through `` after the `SET` keyword.
+
+Note that unlike in SQL, `UPDATE` does not check the prior existence of
+the row by default (except through the use of ``, see below):
+the row is created if none existed before, and updated otherwise.
+Furthermore, there are no means to know whether a creation or update
+occurred.
+
+It is however possible to use the conditions on some columns through
+`IF`, in which case the row will not be updated unless the conditions
+are met. But, please note that using `IF` conditions will incur a
+non-negligible performance cost (internally, Paxos will be used) so this
+should be used sparingly.
+
+In an `UPDATE` statement, all updates within the same partition key are
+applied atomically and in isolation.
+
+The `c = c + 3` form of `` is used to increment/decrement
+counters. The identifier after the `=' sign *must* be the same than the
+one before the `=' sign (Only increment/decrement is supported on
+counters, not the assignment of a specific value).
+
+The `id = id + ` and `id[value1] = value2` forms of
+`` are for collections. Please refer to the
+link:#collections[relevant section] for more details.
+
+The `id.field = ` form of `` is for setting the value
+of a single field on a non-frozen user-defined types.
+
+[[updateOptions]]
+===== ``
+
+The `UPDATE` and `INSERT` statements support the following options:
+
+* `TIMESTAMP`: sets the timestamp for the operation. If not specified,
+the coordinator will use the current time (in microseconds) at the start
+of statement execution as the timestamp. This is usually a suitable
+default.
+* `TTL`: specifies an optional Time To Live (in seconds) for the
+inserted values. If set, the inserted values are automatically removed
+from the database after the specified time. Note that the TTL concerns
+the inserted values, not the columns themselves. This means that any
+subsequent update of the column will also reset the TTL (to whatever TTL
+is specified in that update). By default, values never expire. A TTL of
+0 is equivalent to no TTL. If the table has a default_time_to_live, a
+TTL of 0 will remove the TTL for the inserted or updated values.
+
+[[deleteStmt]]
+==== DELETE
+
+_Syntax:_
+
+bc(syntax).. +
+::= DELETE ( ( `,' )* )? +
+FROM +
+( USING TIMESTAMP )? +
+WHERE +
+( IF ( EXISTS | ( ( AND )*) ) )?
+
+::= +
+| `[' `]' +
+| `.'
+
+::= ( AND )*
+
+::= +
+| `(' (`,' )* `)' +
+| IN `(' ( ( `,' )* )? `)' +
+| IN +
+| `(' (`,' )* `)' IN `(' ( ( `,' )* )? `)' +
+| `(' (`,' )* `)' IN
+
+::= `=' | `<' | `>' | `<=' | `>=' +
+::= ( | `(' ( ( `,' )* )? `)')
+
+::= ( | `!=') +
+| IN +
+| `[' `]' ( | `!=') +
+| `[' `]' IN +
+| `.' ( | `!=') +
+| `.' IN
+
+_Sample:_
+
+bc(sample).. +
+DELETE FROM NerdMovies USING TIMESTAMP 1240003134 WHERE movie =
+`Serenity';
+
+DELETE phone FROM Users WHERE userid IN
+(C73DE1D3-AF08-40F3-B124-3FF3E5109F22,
+B70DE1D0-9908-4AE3-BE34-5573E5B09F14); +
+p. +
+The `DELETE` statement deletes columns and rows. If column names are
+provided directly after the `DELETE` keyword, only those columns are
+deleted from the row indicated by the ``. The `id[value]`
+syntax in `` is for non-frozen collections (please refer to
+the link:#collections[collection section] for more details). The
+`id.field` syntax is for the deletion of non-frozen user-defined types.
+Otherwise, whole rows are removed. The `` specifies which
+rows are to be deleted. Multiple rows may be deleted with one statement
+by using an `IN` clause. A range of rows may be deleted using an
+inequality operator (such as `>=`).
+
+`DELETE` supports the `TIMESTAMP` option with the same semantics as the
+link:#updateStmt[`UPDATE`] statement.
+
+In a `DELETE` statement, all deletions within the same partition key are
+applied atomically and in isolation.
+
+A `DELETE` operation can be conditional through the use of an `IF`
+clause, similar to `UPDATE` and `INSERT` statements. However, as with
+`INSERT` and `UPDATE` statements, this will incur a non-negligible
+performance cost (internally, Paxos will be used) and so should be used
+sparingly.
+
+[[batchStmt]]
+==== BATCH
+
+_Syntax:_
+
+bc(syntax).. +
+::= BEGIN ( UNLOGGED | COUNTER ) BATCH +
+( USING ( AND )* )? +
+( `;' )* +
+APPLY BATCH
+
+::= +
+| +
+|
+
+::= TIMESTAMP +
+p. +
+_Sample:_
+
+bc(sample). +
+BEGIN BATCH +
+INSERT INTO users (userid, password, name) VALUES (`user2', `ch@ngem3b',
+`second user'); +
+UPDATE users SET password = `ps22dhds' WHERE userid = `user3'; +
+INSERT INTO users (userid, password) VALUES (`user4', `ch@ngem3c'); +
+DELETE name FROM users WHERE userid = `user1'; +
+APPLY BATCH;
+
+The `BATCH` statement group multiple modification statements
+(insertions/updates and deletions) into a single statement. It serves
+several purposes:
+
+. It saves network round-trips between the client and the server (and
+sometimes between the server coordinator and the replicas) when batching
+multiple updates.
+. All updates in a `BATCH` belonging to a given partition key are
+performed in isolation.
+. By default, all operations in the batch are performed as `LOGGED`, to
+ensure all mutations eventually complete (or none will). See the notes
+on link:#unloggedBatch[`UNLOGGED`] for more details.
+
+Note that:
+
+* `BATCH` statements may only contain `UPDATE`, `INSERT` and `DELETE`
+statements.
+* Batches are _not_ a full analogue for SQL transactions.
+* If a timestamp is not specified for each operation, then all
+operations will be applied with the same timestamp. Due to Cassandra’s
+conflict resolution procedure in the case of
+http://wiki.apache.org/cassandra/FAQ#clocktie[timestamp ties],
+operations may be applied in an order that is different from the order
+they are listed in the `BATCH` statement. To force a particular
+operation ordering, you must specify per-operation timestamps.
+
+[[unloggedBatch]]
+===== `UNLOGGED`
+
+By default, Cassandra uses a batch log to ensure all operations in a
+batch eventually complete or none will (note however that operations are
+only isolated within a single partition).
+
+There is a performance penalty for batch atomicity when a batch spans
+multiple partitions. If you do not want to incur this penalty, you can
+tell Cassandra to skip the batchlog with the `UNLOGGED` option. If the
+`UNLOGGED` option is used, a failed batch might leave the patch only
+partly applied.
+
+[[counterBatch]]
+===== `COUNTER`
+
+Use the `COUNTER` option for batched counter updates. Unlike other
+updates in Cassandra, counter updates are not idempotent.
+
+[[batchOptions]]
+===== ``
+
+`BATCH` supports both the `TIMESTAMP` option, with similar semantic to
+the one described in the link:#updateOptions[`UPDATE`] statement (the
+timestamp applies to all the statement inside the batch). However, if
+used, `TIMESTAMP` *must not* be used in the statements within the batch.
+
+=== Queries
+
+[[selectStmt]]
+==== SELECT
+
+_Syntax:_
+
+bc(syntax).. +
+::= SELECT ( JSON )? +
+FROM +
+( WHERE )? +
+( GROUP BY )? +
+( ORDER BY )? +
+( PER PARTITION LIMIT )? +
+( LIMIT )? +
+( ALLOW FILTERING )?
+
+::= DISTINCT?
+
+::= (AS )? ( `,' (AS )? )* +
+| `*'
+
+::= +
+| +
+| WRITETIME `(' `)' +
+| COUNT `(' `*' `)' +
+| TTL `(' `)' +
+| CAST `(' AS `)' +
+| `(' ( (`,' )*)? `)' +
+| `.' +
+| `[' `]' +
+| `[' ? .. ? `]'
+
+::= ( AND )*
+
+::= +
+| `(' (`,' )* `)' +
+| IN `(' ( ( `,' )* )? `)' +
+| `(' (`,' )* `)' IN `(' ( ( `,' )* )? `)' +
+| TOKEN `(' ( `,' )* `)'
+
+::= `=' | `<' | `>' | `<=' | `>=' | CONTAINS | CONTAINS KEY +
+::= (`,' )* +
+::= ( `,' )* +
+::= ( ASC | DESC )? +
+::= `(' (`,' )* `)' +
+p. +
+_Sample:_
+
+bc(sample).. +
+SELECT name, occupation FROM users WHERE userid IN (199, 200, 207);
+
+SELECT JSON name, occupation FROM users WHERE userid = 199;
+
+SELECT name AS user_name, occupation AS user_occupation FROM users;
+
+SELECT time, value +
+FROM events +
+WHERE event_type = `myEvent' +
+AND time > `2011-02-03' +
+AND time <= `2012-01-01'
+
+SELECT COUNT (*) FROM users;
+
+SELECT COUNT (*) AS user_count FROM users;
+
+The `SELECT` statements reads one or more columns for one or more rows
+in a table. It returns a result-set of rows, where each row contains the
+collection of columns corresponding to the query. If the `JSON` keyword
+is used, the results for each row will contain only a single column
+named ``json''. See the section on link:#selectJson[`SELECT JSON`] for
+more details.
+
+[[selectSelection]]
+===== ``
+
+The `` determines which columns needs to be queried and
+returned in the result-set. It consists of either the comma-separated
+list of or the wildcard character (`*`) to select all the columns
+defined for the table. Please note that for wildcard `SELECT` queries
+the order of columns returned is not specified and is not guaranteed to
+be stable between Cassandra versions.
+
+A `` is either a column name to retrieve or a `` of
+one or more ``s. The function allowed are the same as for ``
+and are described in the link:#functions[function section]. In addition
+to these generic functions, the `WRITETIME` (resp. `TTL`) function
+allows to select the timestamp of when the column was inserted (resp.
+the time to live (in seconds) for the column (or null if the column has
+no expiration set)) and the link:#castFun[`CAST`] function can be used
+to convert one data type to another.
+
+Additionally, individual values of maps and sets can be selected using
+`[ ]`. For maps, this will return the value corresponding to the
+key, if such entry exists. For sets, this will return the key that is
+selected if it exists and is thus mainly a way to check element
+existence. It is also possible to select a slice of a set or map with
+`[ ... `], where both bound can be omitted.
+
+Any `` can be aliased using `AS` keyword (see examples).
+Please note that `` and `` clause should refer
+to the columns by their original names and not by their aliases.
+
+The `COUNT` keyword can be used with parenthesis enclosing `*`. If so,
+the query will return a single result: the number of rows matching the
+query. Note that `COUNT(1)` is supported as an alias.
+
+[[selectWhere]]
+===== ``
+
+The `` specifies which rows must be queried. It is
+composed of relations on the columns that are part of the `PRIMARY KEY`
+and/or have a link:#createIndexStmt[secondary index] defined on them.
+
+Not all relations are allowed in a query. For instance, non-equal
+relations (where `IN` is considered as an equal relation) on a partition
+key are not supported (but see the use of the `TOKEN` method below to do
+non-equal queries on the partition key). Moreover, for a given partition
+key, the clustering columns induce an ordering of rows and relations on
+them is restricted to the relations that allow to select a *contiguous*
+(for the ordering) set of rows. For instance, given
+
+bc(sample). +
+CREATE TABLE posts ( +
+userid text, +
+blog_title text, +
+posted_at timestamp, +
+entry_title text, +
+content text, +
+category int, +
+PRIMARY KEY (userid, blog_title, posted_at) +
+)
+
+The following query is allowed:
+
+bc(sample). +
+SELECT entry_title, content FROM posts WHERE userid=`john doe' AND
+blog_title=`John'`s Blog' AND posted_at >= `2012-01-01' AND posted_at <
+`2012-01-31'
+
+But the following one is not, as it does not select a contiguous set of
+rows (and we suppose no secondary indexes are set):
+
+bc(sample). +
+// Needs a blog_title to be set to select ranges of posted_at +
+SELECT entry_title, content FROM posts WHERE userid=`john doe' AND
+posted_at >= `2012-01-01' AND posted_at < `2012-01-31'
+
+When specifying relations, the `TOKEN` function can be used on the
+`PARTITION KEY` column to query. In that case, rows will be selected
+based on the token of their `PARTITION_KEY` rather than on the value.
+Note that the token of a key depends on the partitioner in use, and that
+in particular the RandomPartitioner won’t yield a meaningful order. Also
+note that ordering partitioners always order token values by bytes (so
+even if the partition key is of type int, `token(-1) > token(0)` in
+particular). Example:
+
+bc(sample). +
+SELECT * FROM posts WHERE token(userid) > token(`tom') AND token(userid)
+< token(`bob')
+
+Moreover, the `IN` relation is only allowed on the last column of the
+partition key and on the last column of the full primary key.
+
+It is also possible to ``group'' `CLUSTERING COLUMNS` together in a
+relation using the tuple notation. For instance:
+
+bc(sample). +
+SELECT * FROM posts WHERE userid=`john doe' AND (blog_title, posted_at)
+> (`John'`s Blog', `2012-01-01')
+
+will request all rows that sorts after the one having ``John’s Blog'' as
+`blog_tile` and `2012-01-01' for `posted_at` in the clustering order. In
+particular, rows having a `post_at <= '2012-01-01'` will be returned as
+long as their `blog_title > 'John''s Blog'`, which wouldn’t be the case
+for:
+
+bc(sample). +
+SELECT * FROM posts WHERE userid=`john doe' AND blog_title > `John'`s
+Blog' AND posted_at > `2012-01-01'
+
+The tuple notation may also be used for `IN` clauses on
+`CLUSTERING COLUMNS`:
+
+bc(sample). +
+SELECT * FROM posts WHERE userid=`john doe' AND (blog_title, posted_at)
+IN ((`John'`s Blog', `2012-01-01), (’Extreme Chess', `2014-06-01'))
+
+The `CONTAINS` operator may only be used on collection columns (lists,
+sets, and maps). In the case of maps, `CONTAINS` applies to the map
+values. The `CONTAINS KEY` operator may only be used on map columns and
+applies to the map keys.
+
+[[selectOrderBy]]
+===== ``
+
+The `ORDER BY` option allows to select the order of the returned
+results. It takes as argument a list of column names along with the
+order for the column (`ASC` for ascendant and `DESC` for descendant,
+omitting the order being equivalent to `ASC`). Currently the possible
+orderings are limited (which depends on the table
+link:#createTableOptions[`CLUSTERING ORDER`] ):
+
+* if the table has been defined without any specific `CLUSTERING ORDER`,
+then then allowed orderings are the order induced by the clustering
+columns and the reverse of that one.
+* otherwise, the orderings allowed are the order of the
+`CLUSTERING ORDER` option and the reversed one.
+
+[[selectGroupBy]]
+===== ``
+
+The `GROUP BY` option allows to condense into a single row all selected
+rows that share the same values for a set of columns.
+
+Using the `GROUP BY` option, it is only possible to group rows at the
+partition key level or at a clustering column level. By consequence, the
+`GROUP BY` option only accept as arguments primary key column names in
+the primary key order. If a primary key column is restricted by an
+equality restriction it is not required to be present in the `GROUP BY`
+clause.
+
+Aggregate functions will produce a separate value for each group. If no
+`GROUP BY` clause is specified, aggregates functions will produce a
+single value for all the rows.
+
+If a column is selected without an aggregate function, in a statement
+with a `GROUP BY`, the first value encounter in each group will be
+returned.
+
+[[selectLimit]]
+===== `LIMIT` and `PER PARTITION LIMIT`
+
+The `LIMIT` option to a `SELECT` statement limits the number of rows
+returned by a query, while the `PER PARTITION LIMIT` option limits the
+number of rows returned for a given partition by the query. Note that
+both type of limit can used in the same statement.
+
+[[selectAllowFiltering]]
+===== `ALLOW FILTERING`
+
+By default, CQL only allows select queries that don’t involve
+``filtering'' server side, i.e. queries where we know that all (live)
+record read will be returned (maybe partly) in the result set. The
+reasoning is that those ``non filtering'' queries have predictable
+performance in the sense that they will execute in a time that is
+proportional to the amount of data *returned* by the query (which can be
+controlled through `LIMIT`).
+
+The `ALLOW FILTERING` option allows to explicitly allow (some) queries
+that require filtering. Please note that a query using `ALLOW FILTERING`
+may thus have unpredictable performance (for the definition above), i.e.
+even a query that selects a handful of records *may* exhibit performance
+that depends on the total amount of data stored in the cluster.
+
+For instance, considering the following table holding user profiles with
+their year of birth (with a secondary index on it) and country of
+residence:
+
+bc(sample).. +
+CREATE TABLE users ( +
+username text PRIMARY KEY, +
+firstname text, +
+lastname text, +
+birth_year int, +
+country text +
+)
+
+CREATE INDEX ON users(birth_year); +
+p.
+
+Then the following queries are valid:
+
+bc(sample). +
+SELECT * FROM users; +
+SELECT firstname, lastname FROM users WHERE birth_year = 1981;
+
+because in both case, Cassandra guarantees that these queries
+performance will be proportional to the amount of data returned. In
+particular, if no users are born in 1981, then the second query
+performance will not depend of the number of user profile stored in the
+database (not directly at least: due to secondary index implementation
+consideration, this query may still depend on the number of node in the
+cluster, which indirectly depends on the amount of data stored.
+Nevertheless, the number of nodes will always be multiple number of
+magnitude lower than the number of user profile stored). Of course, both
+query may return very large result set in practice, but the amount of
+data returned can always be controlled by adding a `LIMIT`.
+
+However, the following query will be rejected:
+
+bc(sample). +
+SELECT firstname, lastname FROM users WHERE birth_year = 1981 AND
+country = `FR';
+
+because Cassandra cannot guarantee that it won’t have to scan large
+amount of data even if the result to those query is small. Typically, it
+will scan all the index entries for users born in 1981 even if only a
+handful are actually from France. However, if you ``know what you are
+doing'', you can force the execution of this query by using
+`ALLOW FILTERING` and so the following query is valid:
+
+bc(sample). +
+SELECT firstname, lastname FROM users WHERE birth_year = 1981 AND
+country = `FR' ALLOW FILTERING;
+
+[[databaseRoles]]
+=== Database Roles
+
+[[createRoleStmt]]
+==== CREATE ROLE
+
+_Syntax:_
+
+bc(syntax).. +
+::= CREATE ROLE ( IF NOT EXISTS )? ( WITH ( AND )* )?
+
+::= PASSWORD = +
+| LOGIN = +
+| SUPERUSER = +
+| OPTIONS = +
+p.
+
+_Sample:_
+
+bc(sample). +
+CREATE ROLE new_role; +
+CREATE ROLE alice WITH PASSWORD = `password_a' AND LOGIN = true; +
+CREATE ROLE bob WITH PASSWORD = `password_b' AND LOGIN = true AND
+SUPERUSER = true; +
+CREATE ROLE carlos WITH OPTIONS = \{ `custom_option1' : `option1_value',
+`custom_option2' : 99 };
+
+By default roles do not possess `LOGIN` privileges or `SUPERUSER`
+status.
+
+link:#permissions[Permissions] on database resources are granted to
+roles; types of resources include keyspaces, tables, functions and roles
+themselves. Roles may be granted to other roles to create hierarchical
+permissions structures; in these hierarchies, permissions and
+`SUPERUSER` status are inherited, but the `LOGIN` privilege is not.
+
+If a role has the `LOGIN` privilege, clients may identify as that role
+when connecting. For the duration of that connection, the client will
+acquire any roles and privileges granted to that role.
+
+Only a client with with the `CREATE` permission on the database roles
+resource may issue `CREATE ROLE` requests (see the
+link:#permissions[relevant section] below), unless the client is a
+`SUPERUSER`. Role management in Cassandra is pluggable and custom
+implementations may support only a subset of the listed options.
+
+Role names should be quoted if they contain non-alphanumeric characters.
+
+[[createRolePwd]]
+===== Setting credentials for internal authentication
+
+Use the `WITH PASSWORD` clause to set a password for internal
+authentication, enclosing the password in single quotation marks. +
+If internal authentication has not been set up or the role does not have
+`LOGIN` privileges, the `WITH PASSWORD` clause is not necessary.
+
+[[createRoleConditional]]
+===== Creating a role conditionally
+
+Attempting to create an existing role results in an invalid query
+condition unless the `IF NOT EXISTS` option is used. If the option is
+used and the role exists, the statement is a no-op.
+
+bc(sample). +
+CREATE ROLE other_role; +
+CREATE ROLE IF NOT EXISTS other_role;
+
+[[alterRoleStmt]]
+==== ALTER ROLE
+
+_Syntax:_
+
+bc(syntax).. +
+::= ALTER ROLE ( WITH ( AND )* )?
+
+::= PASSWORD = +
+| LOGIN = +
+| SUPERUSER = +
+| OPTIONS = +
+p.
+
+_Sample:_
+
+bc(sample). +
+ALTER ROLE bob WITH PASSWORD = `PASSWORD_B' AND SUPERUSER = false;
+
+Conditions on executing `ALTER ROLE` statements:
+
+* A client must have `SUPERUSER` status to alter the `SUPERUSER` status
+of another role
+* A client cannot alter the `SUPERUSER` status of any role it currently
+holds
+* A client can only modify certain properties of the role with which it
+identified at login (e.g. `PASSWORD`)
+* To modify properties of a role, the client must be granted `ALTER`
+link:#permissions[permission] on that role
+
+[[dropRoleStmt]]
+==== DROP ROLE
+
+_Syntax:_
+
+bc(syntax).. +
+::= DROP ROLE ( IF EXISTS )? +
+p.
+
+_Sample:_
+
+bc(sample). +
+DROP ROLE alice; +
+DROP ROLE IF EXISTS bob;
+
+`DROP ROLE` requires the client to have `DROP`
+link:#permissions[permission] on the role in question. In addition,
+client may not `DROP` the role with which it identified at login.
+Finaly, only a client with `SUPERUSER` status may `DROP` another
+`SUPERUSER` role. +
+Attempting to drop a role which does not exist results in an invalid
+query condition unless the `IF EXISTS` option is used. If the option is
+used and the role does not exist the statement is a no-op.
+
+[[grantRoleStmt]]
+==== GRANT ROLE
+
+_Syntax:_
+
+bc(syntax). +
+::= GRANT TO
+
+_Sample:_
+
+bc(sample). +
+GRANT report_writer TO alice;
+
+This statement grants the `report_writer` role to `alice`. Any
+permissions granted to `report_writer` are also acquired by `alice`. +
+Roles are modelled as a directed acyclic graph, so circular grants are
+not permitted. The following examples result in error conditions:
+
+bc(sample). +
+GRANT role_a TO role_b; +
+GRANT role_b TO role_a;
+
+bc(sample). +
+GRANT role_a TO role_b; +
+GRANT role_b TO role_c; +
+GRANT role_c TO role_a;
+
+[[revokeRoleStmt]]
+==== REVOKE ROLE
+
+_Syntax:_
+
+bc(syntax). +
+::= REVOKE FROM
+
+_Sample:_
+
+bc(sample). +
+REVOKE report_writer FROM alice;
+
+This statement revokes the `report_writer` role from `alice`. Any
+permissions that `alice` has acquired via the `report_writer` role are
+also revoked.
+
+[[listRolesStmt]]
+===== LIST ROLES
+
+_Syntax:_
+
+bc(syntax). +
+::= LIST ROLES ( OF )? ( NORECURSIVE )?
+
+_Sample:_
+
+bc(sample). +
+LIST ROLES;
+
+Return all known roles in the system, this requires `DESCRIBE`
+permission on the database roles resource.
+
+bc(sample). +
+LIST ROLES OF `alice`;
+
+Enumerate all roles granted to `alice`, including those transitively
+aquired.
+
+bc(sample). +
+LIST ROLES OF `bob` NORECURSIVE
+
+List all roles directly granted to `bob`.
+
+[[createUserStmt]]
+==== CREATE USER
+
+Prior to the introduction of roles in Cassandra 2.2, authentication and
+authorization were based around the concept of a `USER`. For backward
+compatibility, the legacy syntax has been preserved with `USER` centric
+statments becoming synonyms for the `ROLE` based equivalents.
+
+_Syntax:_
+
+bc(syntax).. +
+::= CREATE USER ( IF NOT EXISTS )? ( WITH PASSWORD )? ()?
+
+::= SUPERUSER +
+| NOSUPERUSER +
+p.
+
+_Sample:_
+
+bc(sample). +
+CREATE USER alice WITH PASSWORD `password_a' SUPERUSER; +
+CREATE USER bob WITH PASSWORD `password_b' NOSUPERUSER;
+
+`CREATE USER` is equivalent to `CREATE ROLE` where the `LOGIN` option is
+`true`. So, the following pairs of statements are equivalent:
+
+bc(sample).. +
+CREATE USER alice WITH PASSWORD `password_a' SUPERUSER; +
+CREATE ROLE alice WITH PASSWORD = `password_a' AND LOGIN = true AND
+SUPERUSER = true;
+
+CREATE USER IF NOT EXISTS alice WITH PASSWORD `password_a' SUPERUSER; +
+CREATE ROLE IF NOT EXISTS alice WITH PASSWORD = `password_a' AND LOGIN =
+true AND SUPERUSER = true;
+
+CREATE USER alice WITH PASSWORD `password_a' NOSUPERUSER; +
+CREATE ROLE alice WITH PASSWORD = `password_a' AND LOGIN = true AND
+SUPERUSER = false;
+
+CREATE USER alice WITH PASSWORD `password_a' NOSUPERUSER; +
+CREATE ROLE alice WITH PASSWORD = `password_a' AND LOGIN = true;
+
+CREATE USER alice WITH PASSWORD `password_a'; +
+CREATE ROLE alice WITH PASSWORD = `password_a' AND LOGIN = true; +
+p.
+
+[[alterUserStmt]]
+==== ALTER USER
+
+_Syntax:_
+
+bc(syntax).. +
+::= ALTER USER ( WITH PASSWORD )? ( )?
+
+::= SUPERUSER +
+| NOSUPERUSER +
+p.
+
+bc(sample). +
+ALTER USER alice WITH PASSWORD `PASSWORD_A'; +
+ALTER USER bob SUPERUSER;
+
+[[dropUserStmt]]
+==== DROP USER
+
+_Syntax:_
+
+bc(syntax).. +
+::= DROP USER ( IF EXISTS )? +
+p.
+
+_Sample:_
+
+bc(sample). +
+DROP USER alice; +
+DROP USER IF EXISTS bob;
+
+[[listUsersStmt]]
+==== LIST USERS
+
+_Syntax:_
+
+bc(syntax). +
+::= LIST USERS;
+
+_Sample:_
+
+bc(sample). +
+LIST USERS;
+
+This statement is equivalent to
+
+bc(sample). +
+LIST ROLES;
+
+but only roles with the `LOGIN` privilege are included in the output.
+
+[[dataControl]]
+=== Data Control
+
+==== Permissions
+
+Permissions on resources are granted to roles; there are several
+different types of resources in Cassandra and each type is modelled
+hierarchically:
+
+* The hierarchy of Data resources, Keyspaces and Tables has the
+structure `ALL KEYSPACES` -> `KEYSPACE` -> `TABLE`
+* Function resources have the structure `ALL FUNCTIONS` -> `KEYSPACE` ->
+`FUNCTION`
+* Resources representing roles have the structure `ALL ROLES` -> `ROLE`
+* Resources representing JMX ObjectNames, which map to sets of
+MBeans/MXBeans, have the structure `ALL MBEANS` -> `MBEAN`
+
+Permissions can be granted at any level of these hierarchies and they
+flow downwards. So granting a permission on a resource higher up the
+chain automatically grants that same permission on all resources lower
+down. For example, granting `SELECT` on a `KEYSPACE` automatically
+grants it on all `TABLES` in that `KEYSPACE`. Likewise, granting a
+permission on `ALL FUNCTIONS` grants it on every defined function,
+regardless of which keyspace it is scoped in. It is also possible to
+grant permissions on all functions scoped to a particular keyspace.
+
+Modifications to permissions are visible to existing client sessions;
+that is, connections need not be re-established following permissions
+changes.
+
+The full set of available permissions is:
+
+* `CREATE`
+* `ALTER`
+* `DROP`
+* `SELECT`
+* `MODIFY`
+* `AUTHORIZE`
+* `DESCRIBE`
+* `EXECUTE`
+
+Not all permissions are applicable to every type of resource. For
+instance, `EXECUTE` is only relevant in the context of functions or
+mbeans; granting `EXECUTE` on a resource representing a table is
+nonsensical. Attempting to `GRANT` a permission on resource to which it
+cannot be applied results in an error response. The following
+illustrates which permissions can be granted on which types of resource,
+and which statements are enabled by that permission.
+
+[cols=",,,,,",options="header",]
+|===
+|permission |resource |operations | | |
+|`CREATE` |`ALL KEYSPACES` |`CREATE KEYSPACE` `CREATE TABLE` in any
+keyspace | | |
+
+|`CREATE` |`KEYSPACE` |`CREATE TABLE` in specified keyspace | | |
+
+|`CREATE` |`ALL FUNCTIONS` |`CREATE FUNCTION` in any keyspace
+`CREATE AGGREGATE` in any keyspace | | |
+
+|`CREATE` |`ALL FUNCTIONS IN KEYSPACE` |`CREATE FUNCTION` in keyspace
+ `CREATE AGGREGATE` in keyspace | | |
+
+|`CREATE` |`ALL ROLES` |`CREATE ROLE` | | |
+
+|`ALTER` |`ALL KEYSPACES` |`ALTER KEYSPACE` `ALTER TABLE` in any
+keyspace | | |
+
+|`ALTER` |`KEYSPACE` |`ALTER KEYSPACE` `ALTER TABLE` in keyspace |
+| |
+
+|`ALTER` |`TABLE` |`ALTER TABLE` | | |
+
+|`ALTER` |`ALL FUNCTIONS` |`CREATE FUNCTION` replacing any existing
+`CREATE AGGREGATE` replacing any existing | | |
+
+|`ALTER` |`ALL FUNCTIONS IN KEYSPACE` |`CREATE FUNCTION` replacing
+existing in keyspace `CREATE AGGREGATE` replacing any existing in
+keyspace | | |
+
+|`ALTER` |`FUNCTION` |`CREATE FUNCTION` replacing existing
+`CREATE AGGREGATE` replacing existing | | |
+
+|`ALTER` |`ALL ROLES` |`ALTER ROLE` on any role | | |
+
+|`ALTER` |`ROLE` |`ALTER ROLE` | | |
+
+|`DROP` |`ALL KEYSPACES` |`DROP KEYSPACE` `DROP TABLE` in any
+keyspace | | |
+
+|`DROP` |`KEYSPACE` |`DROP TABLE` in specified keyspace | | |
+
+|`DROP` |`TABLE` |`DROP TABLE` | | |
+
+|`DROP` |`ALL FUNCTIONS` |`DROP FUNCTION` in any keyspace
+`DROP AGGREGATE` in any existing | | |
+
+|`DROP` |`ALL FUNCTIONS IN KEYSPACE` |`DROP FUNCTION` in keyspace
+`DROP AGGREGATE` in existing | | |
+
+|`DROP` |`FUNCTION` |`DROP FUNCTION` | | |
+
+|`DROP` |`ALL ROLES` |`DROP ROLE` on any role | | |
+
+|`DROP` |`ROLE` |`DROP ROLE` | | |
+
+|`SELECT` |`ALL KEYSPACES` |`SELECT` on any table | | |
+
+|`SELECT` |`KEYSPACE` |`SELECT` on any table in keyspace | | |
+
+|`SELECT` |`TABLE` |`SELECT` on specified table | | |
+
+|`SELECT` |`ALL MBEANS` |Call getter methods on any mbean | | |
+
+|`SELECT` |`MBEANS` |Call getter methods on any mbean matching a
+wildcard pattern | | |
+
+|`SELECT` |`MBEAN` |Call getter methods on named mbean | | |
+
+|`MODIFY` |`ALL KEYSPACES` |`INSERT` on any table `UPDATE` on any
+table `DELETE` on any table `TRUNCATE` on any table | | |
+
+|`MODIFY` |`KEYSPACE` |`INSERT` on any table in keyspace `UPDATE`
+on any table in keyspace `DELETE` on any table in keyspace
+`TRUNCATE` on any table in keyspace |`MODIFY` |`TABLE` |`INSERT`
+`UPDATE` `DELETE` `TRUNCATE`
+
+|`MODIFY` |`ALL MBEANS` |Call setter methods on any mbean | | |
+
+|`MODIFY` |`MBEANS` |Call setter methods on any mbean matching a
+wildcard pattern | | |
+
+|`MODIFY` |`MBEAN` |Call setter methods on named mbean | | |
+
+|`AUTHORIZE` |`ALL KEYSPACES` |`GRANT PERMISSION` on any table
+`REVOKE PERMISSION` on any table | | |
+
+|`AUTHORIZE` |`KEYSPACE` |`GRANT PERMISSION` on table in keyspace
+`REVOKE PERMISSION` on table in keyspace | | |
+
+|`AUTHORIZE` |`TABLE` |`GRANT PERMISSION` `REVOKE PERMISSION` | | |
+
+|`AUTHORIZE` |`ALL FUNCTIONS` |`GRANT PERMISSION` on any function
+`REVOKE PERMISSION` on any function | | |
+
+|`AUTHORIZE` |`ALL FUNCTIONS IN KEYSPACE` |`GRANT PERMISSION` in
+keyspace `REVOKE PERMISSION` in keyspace | | |
+
+|`AUTHORIZE` |`ALL FUNCTIONS IN KEYSPACE` |`GRANT PERMISSION` in
+keyspace `REVOKE PERMISSION` in keyspace | | |
+
+|`AUTHORIZE` |`FUNCTION` |`GRANT PERMISSION` `REVOKE PERMISSION` |
+| |
+
+|`AUTHORIZE` |`ALL MBEANS` |`GRANT PERMISSION` on any mbean
+`REVOKE PERMISSION` on any mbean | | |
+
+|`AUTHORIZE` |`MBEANS` |`GRANT PERMISSION` on any mbean matching a
+wildcard pattern `REVOKE PERMISSION` on any mbean matching a
+wildcard pattern | | |
+
+|`AUTHORIZE` |`MBEAN` |`GRANT PERMISSION` on named mbean
+`REVOKE PERMISSION` on named mbean | | |
+
+|`AUTHORIZE` |`ALL ROLES` |`GRANT ROLE` grant any role
+`REVOKE ROLE` revoke any role | | |
+
+|`AUTHORIZE` |`ROLES` |`GRANT ROLE` grant role `REVOKE ROLE` revoke
+role | | |
+
+|`DESCRIBE` |`ALL ROLES` |`LIST ROLES` all roles or only roles granted
+to another, specified role | | |
+
+|`DESCRIBE` |@ALL MBEANS |Retrieve metadata about any mbean from the
+platform’s MBeanServer | | |
+
+|`DESCRIBE` |@MBEANS |Retrieve metadata about any mbean matching a
+wildcard patter from the platform’s MBeanServer | | |
+
+|`DESCRIBE` |@MBEAN |Retrieve metadata about a named mbean from the
+platform’s MBeanServer | | |
+
+|`EXECUTE` |`ALL FUNCTIONS` |`SELECT`, `INSERT`, `UPDATE` using any
+function use of any function in `CREATE AGGREGATE` | | |
+
+|`EXECUTE` |`ALL FUNCTIONS IN KEYSPACE` |`SELECT`, `INSERT`, `UPDATE`
+using any function in keyspace use of any function in keyspace in
+`CREATE AGGREGATE` | | |
+
+|`EXECUTE` |`FUNCTION` |`SELECT`, `INSERT`, `UPDATE` using function
+use of function in `CREATE AGGREGATE` | | |
+
+|`EXECUTE` |`ALL MBEANS` |Execute operations on any mbean | | |
+
+|`EXECUTE` |`MBEANS` |Execute operations on any mbean matching a
+wildcard pattern | | |
+
+|`EXECUTE` |`MBEAN` |Execute operations on named mbean | | |
+|===
+
+[[grantPermissionsStmt]]
+==== GRANT PERMISSION
+
+_Syntax:_
+
+bc(syntax).. +
+::= GRANT ( ALL ( PERMISSIONS )? | ( PERMISSION )? ) ON TO
+
+::= CREATE | ALTER | DROP | SELECT | MODIFY | AUTHORIZE | DESRIBE |
+EXECUTE
+
+::= ALL KEYSPACES +
+| KEYSPACE +
+| ( TABLE )? +
+| ALL ROLES +
+| ROLE +
+| ALL FUNCTIONS ( IN KEYSPACE )? +
+| FUNCTION +
+| ALL MBEANS +
+| ( MBEAN | MBEANS ) +
+p.
+
+_Sample:_
+
+bc(sample). +
+GRANT SELECT ON ALL KEYSPACES TO data_reader;
+
+This gives any user with the role `data_reader` permission to execute
+`SELECT` statements on any table across all keyspaces
+
+bc(sample). +
+GRANT MODIFY ON KEYSPACE keyspace1 TO data_writer;
+
+This give any user with the role `data_writer` permission to perform
+`UPDATE`, `INSERT`, `UPDATE`, `DELETE` and `TRUNCATE` queries on all
+tables in the `keyspace1` keyspace
+
+bc(sample). +
+GRANT DROP ON keyspace1.table1 TO schema_owner;
+
+This gives any user with the `schema_owner` role permissions to `DROP`
+`keyspace1.table1`.
+
+bc(sample). +
+GRANT EXECUTE ON FUNCTION keyspace1.user_function( int ) TO
+report_writer;
+
+This grants any user with the `report_writer` role permission to execute
+`SELECT`, `INSERT` and `UPDATE` queries which use the function
+`keyspace1.user_function( int )`
+
+bc(sample). +
+GRANT DESCRIBE ON ALL ROLES TO role_admin;
+
+This grants any user with the `role_admin` role permission to view any
+and all roles in the system with a `LIST ROLES` statement
+
+[[grantAll]]
+===== GRANT ALL
+
+When the `GRANT ALL` form is used, the appropriate set of permissions is
+determined automatically based on the target resource.
+
+[[autoGrantPermissions]]
+===== Automatic Granting
+
+When a resource is created, via a `CREATE KEYSPACE`, `CREATE TABLE`,
+`CREATE FUNCTION`, `CREATE AGGREGATE` or `CREATE ROLE` statement, the
+creator (the role the database user who issues the statement is
+identified as), is automatically granted all applicable permissions on
+the new resource.
+
+[[revokePermissionsStmt]]
+==== REVOKE PERMISSION
+
+_Syntax:_
+
+bc(syntax).. +
+::= REVOKE ( ALL ( PERMISSIONS )? | ( PERMISSION )? ) ON FROM
+
+::= CREATE | ALTER | DROP | SELECT | MODIFY | AUTHORIZE | DESRIBE |
+EXECUTE
+
+::= ALL KEYSPACES +
+| KEYSPACE +
+| ( TABLE )? +
+| ALL ROLES +
+| ROLE +
+| ALL FUNCTIONS ( IN KEYSPACE )? +
+| FUNCTION +
+| ALL MBEANS +
+| ( MBEAN | MBEANS ) +
+p.
+
+_Sample:_
+
+bc(sample).. +
+REVOKE SELECT ON ALL KEYSPACES FROM data_reader; +
+REVOKE MODIFY ON KEYSPACE keyspace1 FROM data_writer; +
+REVOKE DROP ON keyspace1.table1 FROM schema_owner; +
+REVOKE EXECUTE ON FUNCTION keyspace1.user_function( int ) FROM
+report_writer; +
+REVOKE DESCRIBE ON ALL ROLES FROM role_admin; +
+p.
+
+[[listPermissionsStmt]]
+===== LIST PERMISSIONS
+
+_Syntax:_
+
+bc(syntax).. +
+::= LIST ( ALL ( PERMISSIONS )? | ) +
+( ON )? +
+( OF ( NORECURSIVE )? )?
+
+::= ALL KEYSPACES +
+| KEYSPACE +
+| ( TABLE )? +
+| ALL ROLES +
+| ROLE +
+| ALL FUNCTIONS ( IN KEYSPACE )? +
+| FUNCTION +
+| ALL MBEANS +
+| ( MBEAN | MBEANS ) +
+p.
+
+_Sample:_
+
+bc(sample). +
+LIST ALL PERMISSIONS OF alice;
+
+Show all permissions granted to `alice`, including those acquired
+transitively from any other roles.
+
+bc(sample). +
+LIST ALL PERMISSIONS ON keyspace1.table1 OF bob;
+
+Show all permissions on `keyspace1.table1` granted to `bob`, including
+those acquired transitively from any other roles. This also includes any
+permissions higher up the resource hierarchy which can be applied to
+`keyspace1.table1`. For example, should `bob` have `ALTER` permission on
+`keyspace1`, that would be included in the results of this query. Adding
+the `NORECURSIVE` switch restricts the results to only those permissions
+which were directly granted to `bob` or one of `bob`’s roles.
+
+bc(sample). +
+LIST SELECT PERMISSIONS OF carlos;
+
+Show any permissions granted to `carlos` or any of `carlos`’s roles,
+limited to `SELECT` permissions on any resource.
+
+[[types]]
+=== Data Types
+
+CQL supports a rich set of data types for columns defined in a table,
+including collection types. On top of those native +
+and collection types, users can also provide custom types (through a
+JAVA class extending `AbstractType` loadable by +
+Cassandra). The syntax of types is thus:
+
+bc(syntax).. +
+::= +
+| +
+| +
+| // Used for custom types. The fully-qualified name of a JAVA class
+
+::= ascii +
+| bigint +
+| blob +
+| boolean +
+| counter +
+| date +
+| decimal +
+| double +
+| float +
+| inet +
+| int +
+| smallint +
+| text +
+| time +
+| timestamp +
+| timeuuid +
+| tinyint +
+| uuid +
+| varchar +
+| varint
+
+::= list `<' `>' +
+| set `<' `>' +
+| map `<' `,' `>' +
+::= tuple `<' (`,' )* `>' +
+p. Note that the native types are keywords and as such are
+case-insensitive. They are however not reserved ones.
+
+The following table gives additional informations on the native data
+types, and on which kind of link:#constants[constants] each type
+supports:
+
+[cols=",,",options="header",]
+|===
+|type |constants supported |description
+|`ascii` |strings |ASCII character string
+
+|`bigint` |integers |64-bit signed long
+
+|`blob` |blobs |Arbitrary bytes (no validation)
+
+|`boolean` |booleans |true or false
+
+|`counter` |integers |Counter column (64-bit signed value). See
+link:#counters[Counters] for details
+
+|`date` |integers, strings |A date (with no corresponding time value).
+See link:#usingdates[Working with dates] below for more information.
+
+|`decimal` |integers, floats |Variable-precision decimal
+
+|`double` |integers |64-bit IEEE-754 floating point
+
+|`float` |integers, floats |32-bit IEEE-754 floating point
+
+|`inet` |strings |An IP address. It can be either 4 bytes long (IPv4) or
+16 bytes long (IPv6). There is no `inet` constant, IP address should be
+inputed as strings
+
+|`int` |integers |32-bit signed int
+
+|`smallint` |integers |16-bit signed int
+
+|`text` |strings |UTF8 encoded string
+
+|`time` |integers, strings |A time with nanosecond precision. See
+link:#usingtime[Working with time] below for more information.
+
+|`timestamp` |integers, strings |A timestamp. Strings constant are allow
+to input timestamps as dates, see link:#usingtimestamps[Working with
+timestamps] below for more information.
+
+|`timeuuid` |uuids |Type 1 UUID. This is generally used as a
+``conflict-free'' timestamp. Also see the link:#timeuuidFun[functions on
+Timeuuid]
+
+|`tinyint` |integers |8-bit signed int
+
+|`uuid` |uuids |Type 1 or type 4 UUID
+
+|`varchar` |strings |UTF8 encoded string
+
+|`varint` |integers |Arbitrary-precision integer
+|===
+
+For more information on how to use the collection types, see the
+link:#collections[Working with collections] section below.
+
+[[usingtimestamps]]
+==== Working with timestamps
+
+Values of the `timestamp` type are encoded as 64-bit signed integers
+representing a number of milliseconds since the standard base time known
+as ``the epoch'': January 1 1970 at 00:00:00 GMT.
+
+Timestamp can be input in CQL as simple long integers, giving the number
+of milliseconds since the epoch, as defined above.
+
+They can also be input as string literals in any of the following ISO
+8601 formats, each representing the time and date Mar 2, 2011, at
+04:05:00 AM, GMT.:
+
+* `2011-02-03 04:05+0000`
+* `2011-02-03 04:05:00+0000`
+* `2011-02-03 04:05:00.000+0000`
+* `2011-02-03T04:05+0000`
+* `2011-02-03T04:05:00+0000`
+* `2011-02-03T04:05:00.000+0000`
+
+The `+0000` above is an RFC 822 4-digit time zone specification; `+0000`
+refers to GMT. US Pacific Standard Time is `-0800`. The time zone may be
+omitted if desired— the date will be interpreted as being in the time
+zone under which the coordinating Cassandra node is configured.
+
+* `2011-02-03 04:05`
+* `2011-02-03 04:05:00`
+* `2011-02-03 04:05:00.000`
+* `2011-02-03T04:05`
+* `2011-02-03T04:05:00`
+* `2011-02-03T04:05:00.000`
+
+There are clear difficulties inherent in relying on the time zone
+configuration being as expected, though, so it is recommended that the
+time zone always be specified for timestamps when feasible.
+
+The time of day may also be omitted, if the date is the only piece that
+matters:
+
+* `2011-02-03`
+* `2011-02-03+0000`
+
+In that case, the time of day will default to 00:00:00, in the specified
+or default time zone.
+
+[[usingdates]]
+==== Working with dates
+
+Values of the `date` type are encoded as 32-bit unsigned integers
+representing a number of days with ``the epoch'' at the center of the
+range (2^31). Epoch is January 1st, 1970
+
+A date can be input in CQL as an unsigned integer as defined above.
+
+They can also be input as string literals in the following format:
+
+* `2014-01-01`
+
+[[usingtime]]
+==== Working with time
+
+Values of the `time` type are encoded as 64-bit signed integers
+representing the number of nanoseconds since midnight.
+
+A time can be input in CQL as simple long integers, giving the number of
+nanoseconds since midnight.
+
+They can also be input as string literals in any of the following
+formats:
+
+* `08:12:54`
+* `08:12:54.123`
+* `08:12:54.123456`
+* `08:12:54.123456789`
+
+==== Counters
+
+The `counter` type is used to define _counter columns_. A counter column
+is a column whose value is a 64-bit signed integer and on which 2
+operations are supported: incrementation and decrementation (see
+link:#updateStmt[`UPDATE`] for syntax). Note the value of a counter
+cannot be set. A counter doesn’t exist until first
+incremented/decremented, and the first incrementation/decrementation is
+made as if the previous value was 0. Deletion of counter columns is
+supported but have some limitations (see the
+http://wiki.apache.org/cassandra/Counters[Cassandra Wiki] for more
+information).
+
+The use of the counter type is limited in the following way:
+
+* It cannot be used for column that is part of the `PRIMARY KEY` of a
+table.
+* A table that contains a counter can only contain counters. In other
+words, either all the columns of a table outside the `PRIMARY KEY` have
+the counter type, or none of them have it.
+
+[[collections]]
+==== Working with collections
+
+===== Noteworthy characteristics
+
+Collections are meant for storing/denormalizing relatively small amount
+of data. They work well for things like ``the phone numbers of a given
+user'', ``labels applied to an email'', etc. But when items are expected
+to grow unbounded (``all the messages sent by a given user'', ``events
+registered by a sensor'', …), then collections are not appropriate
+anymore and a specific table (with clustering columns) should be used.
+Concretely, collections have the following limitations:
+
+* Collections are always read in their entirety (and reading one is not
+paged internally).
+* Collections cannot have more than 65535 elements. More precisely,
+while it may be possible to insert more than 65535 elements, it is not
+possible to read more than the 65535 first elements (see
+https://issues.apache.org/jira/browse/CASSANDRA-5428[CASSANDRA-5428] for
+details).
+* While insertion operations on sets and maps never incur a
+read-before-write internally, some operations on lists do (see the
+section on lists below for details). It is thus advised to prefer sets
+over lists when possible.
+
+Please note that while some of those limitations may or may not be
+loosen in the future, the general rule that collections are for
+denormalizing small amount of data is meant to stay.
+
+[[map]]
+===== Maps
+
+A `map` is a link:#types[typed] set of key-value pairs, where keys are
+unique. Furthermore, note that the map are internally sorted by their
+keys and will thus always be returned in that order. To create a column
+of type `map`, use the `map` keyword suffixed with comma-separated key
+and value types, enclosed in angle brackets. For example:
+
+bc(sample). +
+CREATE TABLE users ( +
+id text PRIMARY KEY, +
+given text, +
+surname text, +
+favs map // A map of text keys, and text values +
+)
+
+Writing `map` data is accomplished with a JSON-inspired syntax. To write
+a record using `INSERT`, specify the entire map as a JSON-style
+associative array. _Note: This form will always replace the entire map._
+
+bc(sample). +
+// Inserting (or Updating) +
+INSERT INTO users (id, given, surname, favs) +
+VALUES (`jsmith', `John', `Smith', \{ `fruit' : `apple', `band' :
+`Beatles' })
+
+Adding or updating key-values of a (potentially) existing map can be
+accomplished either by subscripting the map column in an `UPDATE`
+statement or by adding a new map literal:
+
+bc(sample). +
+// Updating (or inserting) +
+UPDATE users SET favs[`author'] = `Ed Poe' WHERE id = `jsmith' +
+UPDATE users SET favs = favs + \{ `movie' : `Cassablanca' } WHERE id =
+`jsmith'
+
+Note that TTLs are allowed for both `INSERT` and `UPDATE`, but in both
+case the TTL set only apply to the newly inserted/updated _values_. In
+other words,
+
+bc(sample). +
+// Updating (or inserting) +
+UPDATE users USING TTL 10 SET favs[`color'] = `green' WHERE id =
+`jsmith'
+
+will only apply the TTL to the `{ 'color' : 'green' }` record, the rest
+of the map remaining unaffected.
+
+Deleting a map record is done with:
+
+bc(sample). +
+DELETE favs[`author'] FROM users WHERE id = `jsmith'
+
+[[set]]
+===== Sets
+
+A `set` is a link:#types[typed] collection of unique values. Sets are
+ordered by their values. To create a column of type `set`, use the `set`
+keyword suffixed with the value type enclosed in angle brackets. For
+example:
+
+bc(sample). +
+CREATE TABLE images ( +
+name text PRIMARY KEY, +
+owner text, +
+date timestamp, +
+tags set +
+);
+
+Writing a `set` is accomplished by comma separating the set values, and
+enclosing them in curly braces. _Note: An `INSERT` will always replace
+the entire set._
+
+bc(sample). +
+INSERT INTO images (name, owner, date, tags) +
+VALUES (`cat.jpg', `jsmith', `now', \{ `kitten', `cat', `pet' });
+
+Adding and removing values of a set can be accomplished with an `UPDATE`
+by adding/removing new set values to an existing `set` column.
+
+bc(sample). +
+UPDATE images SET tags = tags + \{ `cute', `cuddly' } WHERE name =
+`cat.jpg'; +
+UPDATE images SET tags = tags - \{ `lame' } WHERE name = `cat.jpg';
+
+As with link:#map[maps], TTLs if used only apply to the newly
+inserted/updated _values_.
+
+[[list]]
+===== Lists
+
+A `list` is a link:#types[typed] collection of non-unique values where
+elements are ordered by there position in the list. To create a column
+of type `list`, use the `list` keyword suffixed with the value type
+enclosed in angle brackets. For example:
+
+bc(sample). +
+CREATE TABLE plays ( +
+id text PRIMARY KEY, +
+game text, +
+players int, +
+scores list +
+)
+
+Do note that as explained below, lists have some limitations and
+performance considerations to take into account, and it is advised to
+prefer link:#set[sets] over lists when this is possible.
+
+Writing `list` data is accomplished with a JSON-style syntax. To write a
+record using `INSERT`, specify the entire list as a JSON array. _Note:
+An `INSERT` will always replace the entire list._
+
+bc(sample). +
+INSERT INTO plays (id, game, players, scores) +
+VALUES (`123-afde', `quake', 3, [17, 4, 2]);
+
+Adding (appending or prepending) values to a list can be accomplished by
+adding a new JSON-style array to an existing `list` column.
+
+bc(sample). +
+UPDATE plays SET players = 5, scores = scores + [ 14, 21 ] WHERE id =
+`123-afde'; +
+UPDATE plays SET players = 5, scores = [ 12 ] + scores WHERE id =
+`123-afde';
+
+It should be noted that append and prepend are not idempotent
+operations. This means that if during an append or a prepend the
+operation timeout, it is not always safe to retry the operation (as this
+could result in the record appended or prepended twice).
+
+Lists also provides the following operation: setting an element by its
+position in the list, removing an element by its position in the list
+and remove all the occurrence of a given value in the list. _However,
+and contrarily to all the other collection operations, these three
+operations induce an internal read before the update, and will thus
+typically have slower performance characteristics_. Those operations
+have the following syntax:
+
+bc(sample). +
+UPDATE plays SET scores[1] = 7 WHERE id = `123-afde'; // sets the 2nd
+element of scores to 7 (raises an error is scores has less than 2
+elements) +
+DELETE scores[1] FROM plays WHERE id = `123-afde'; // deletes the 2nd
+element of scores (raises an error is scores has less than 2 elements) +
+UPDATE plays SET scores = scores - [ 12, 21 ] WHERE id = `123-afde'; //
+removes all occurrences of 12 and 21 from scores
+
+As with link:#map[maps], TTLs if used only apply to the newly
+inserted/updated _values_.
+
+=== Functions
+
+CQL3 distinguishes between built-in functions (so called `native
+functions') and link:#udfs[user-defined functions]. CQL3 includes
+several native functions, described below:
+
+[[castFun]]
+==== Cast
+
+The `cast` function can be used to converts one native datatype to
+another.
+
+The following table describes the conversions supported by the `cast`
+function. Cassandra will silently ignore any cast converting a datatype
+into its own datatype.
+
+[cols=",",options="header",]
+|===
+|from |to
+|`ascii` |`text`, `varchar`
+
+|`bigint` |`tinyint`, `smallint`, `int`, `float`, `double`, `decimal`,
+`varint`, `text`, `varchar`
+
+|`boolean` |`text`, `varchar`
+
+|`counter` |`tinyint`, `smallint`, `int`, `bigint`, `float`, `double`,
+`decimal`, `varint`, `text`, `varchar`
+
+|`date` |`timestamp`
+
+|`decimal` |`tinyint`, `smallint`, `int`, `bigint`, `float`, `double`,
+`varint`, `text`, `varchar`
+
+|`double` |`tinyint`, `smallint`, `int`, `bigint`, `float`, `decimal`,
+`varint`, `text`, `varchar`
+
+|`float` |`tinyint`, `smallint`, `int`, `bigint`, `double`, `decimal`,
+`varint`, `text`, `varchar`
+
+|`inet` |`text`, `varchar`
+
+|`int` |`tinyint`, `smallint`, `bigint`, `float`, `double`, `decimal`,
+`varint`, `text`, `varchar`
+
+|`smallint` |`tinyint`, `int`, `bigint`, `float`, `double`, `decimal`,
+`varint`, `text`, `varchar`
+
+|`time` |`text`, `varchar`
+
+|`timestamp` |`date`, `text`, `varchar`
+
+|`timeuuid` |`timestamp`, `date`, `text`, `varchar`
+
+|`tinyint` |`tinyint`, `smallint`, `int`, `bigint`, `float`, `double`,
+`decimal`, `varint`, `text`, `varchar`
+
+|`uuid` |`text`, `varchar`
+
+|`varint` |`tinyint`, `smallint`, `int`, `bigint`, `float`, `double`,
+`decimal`, `text`, `varchar`
+|===
+
+The conversions rely strictly on Java’s semantics. For example, the
+double value 1 will be converted to the text value `1.0'.
+
+bc(sample). +
+SELECT avg(cast(count as double)) FROM myTable
+
+[[tokenFun]]
+==== Token
+
+The `token` function allows to compute the token for a given partition
+key. The exact signature of the token function depends on the table
+concerned and of the partitioner used by the cluster.
+
+The type of the arguments of the `token` depend on the type of the
+partition key columns. The return type depend on the partitioner in use:
+
+* For Murmur3Partitioner, the return type is `bigint`.
+* For RandomPartitioner, the return type is `varint`.
+* For ByteOrderedPartitioner, the return type is `blob`.
+
+For instance, in a cluster using the default Murmur3Partitioner, if a
+table is defined by
+
+bc(sample). +
+CREATE TABLE users ( +
+userid text PRIMARY KEY, +
+username text, +
+… +
+)
+
+then the `token` function will take a single argument of type `text` (in
+that case, the partition key is `userid` (there is no clustering columns
+so the partition key is the same than the primary key)), and the return
+type will be `bigint`.
+
+[[uuidFun]]
+==== Uuid
+
+The `uuid` function takes no parameters and generates a random type 4
+uuid suitable for use in INSERT or SET statements.
+
+[[timeuuidFun]]
+==== Timeuuid functions
+
+===== `now`
+
+The `now` function takes no arguments and generates, on the coordinator
+node, a new unique timeuuid (at the time where the statement using it is
+executed). Note that this method is useful for insertion but is largely
+non-sensical in `WHERE` clauses. For instance, a query of the form
+
+bc(sample). +
+SELECT * FROM myTable WHERE t = now()
+
+will never return any result by design, since the value returned by
+`now()` is guaranteed to be unique.
+
+===== `minTimeuuid` and `maxTimeuuid`
+
+The `minTimeuuid` (resp. `maxTimeuuid`) function takes a `timestamp`
+value `t` (which can be link:#usingtimestamps[either a timestamp or a
+date string] ) and return a _fake_ `timeuuid` corresponding to the
+_smallest_ (resp. _biggest_) possible `timeuuid` having for timestamp
+`t`. So for instance:
+
+bc(sample). +
+SELECT * FROM myTable WHERE t > maxTimeuuid(`2013-01-01 00:05+0000') AND
+t < minTimeuuid(`2013-02-02 10:00+0000')
+
+will select all rows where the `timeuuid` column `t` is strictly older
+than `2013-01-01 00:05+0000' but strictly younger than `2013-02-02
+10:00+0000'. Please note that
+`t >= maxTimeuuid('2013-01-01 00:05+0000')` would still _not_ select a
+`timeuuid` generated exactly at `2013-01-01 00:05+0000' and is
+essentially equivalent to `t > maxTimeuuid('2013-01-01 00:05+0000')`.
+
+_Warning_: We called the values generated by `minTimeuuid` and
+`maxTimeuuid` _fake_ UUID because they do no respect the Time-Based UUID
+generation process specified by the
+http://www.ietf.org/rfc/rfc4122.txt[RFC 4122]. In particular, the value
+returned by these 2 methods will not be unique. This means you should
+only use those methods for querying (as in the example above). Inserting
+the result of those methods is almost certainly _a bad idea_.
+
+[[timeFun]]
+==== Time conversion functions
+
+A number of functions are provided to ``convert'' a `timeuuid`, a
+`timestamp` or a `date` into another `native` type.
+
+[cols=",,",options="header",]
+|===
+|function name |input type |description
+|`toDate` |`timeuuid` |Converts the `timeuuid` argument into a `date`
+type
+
+|`toDate` |`timestamp` |Converts the `timestamp` argument into a `date`
+type
+
+|`toTimestamp` |`timeuuid` |Converts the `timeuuid` argument into a
+`timestamp` type
+
+|`toTimestamp` |`date` |Converts the `date` argument into a `timestamp`
+type
+
+|`toUnixTimestamp` |`timeuuid` |Converts the `timeuuid` argument into a
+`bigInt` raw value
+
+|`toUnixTimestamp` |`timestamp` |Converts the `timestamp` argument into
+a `bigInt` raw value
+
+|`toUnixTimestamp` |`date` |Converts the `date` argument into a `bigInt`
+raw value
+
+|`dateOf` |`timeuuid` |Similar to `toTimestamp(timeuuid)` (DEPRECATED)
+
+|`unixTimestampOf` |`timeuuid` |Similar to `toUnixTimestamp(timeuuid)`
+(DEPRECATED)
+|===
+
+[[blobFun]]
+==== Blob conversion functions
+
+A number of functions are provided to ``convert'' the native types into
+binary data (`blob`). For every `` `type` supported by CQL3
+(a notable exceptions is `blob`, for obvious reasons), the function
+`typeAsBlob` takes a argument of type `type` and return it as a `blob`.
+Conversely, the function `blobAsType` takes a 64-bit `blob` argument and
+convert it to a `bigint` value. And so for instance, `bigintAsBlob(3)`
+is `0x0000000000000003` and `blobAsBigint(0x0000000000000003)` is `3`.
+
+=== Aggregates
+
+Aggregate functions work on a set of rows. They receive values for each
+row and returns one value for the whole set. +
+If `normal` columns, `scalar functions`, `UDT` fields, `writetime` or
+`ttl` are selected together with aggregate functions, the values
+returned for them will be the ones of the first row matching the query.
+
+CQL3 distinguishes between built-in aggregates (so called `native
+aggregates') and link:#udas[user-defined aggregates]. CQL3 includes
+several native aggregates, described below:
+
+[[countFct]]
+==== Count
+
+The `count` function can be used to count the rows returned by a query.
+Example:
+
+bc(sample). +
+SELECT COUNT (*) FROM plays; +
+SELECT COUNT (1) FROM plays;
+
+It also can be used to count the non null value of a given column.
+Example:
+
+bc(sample). +
+SELECT COUNT (scores) FROM plays;
+
+[[maxMinFcts]]
+==== Max and Min
+
+The `max` and `min` functions can be used to compute the maximum and the
+minimum value returned by a query for a given column.
+
+bc(sample). +
+SELECT MIN (players), MAX (players) FROM plays WHERE game = `quake';
+
+[[sumFct]]
+==== Sum
+
+The `sum` function can be used to sum up all the values returned by a
+query for a given column.
+
+bc(sample). +
+SELECT SUM (players) FROM plays;
+
+[[avgFct]]
+==== Avg
+
+The `avg` function can be used to compute the average of all the values
+returned by a query for a given column.
+
+bc(sample). +
+SELECT AVG (players) FROM plays;
+
+[[udfs]]
+=== User-Defined Functions
+
+User-defined functions allow execution of user-provided code in
+Cassandra. By default, Cassandra supports defining functions in _Java_
+and _JavaScript_. Support for other JSR 223 compliant scripting
+languages (such as Python, Ruby, and Scala) has been removed in 3.0.11.
+
+UDFs are part of the Cassandra schema. As such, they are automatically
+propagated to all nodes in the cluster.
+
+UDFs can be _overloaded_ - i.e. multiple UDFs with different argument
+types but the same function name. Example:
+
+bc(sample). +
+CREATE FUNCTION sample ( arg int ) …; +
+CREATE FUNCTION sample ( arg text ) …;
+
+User-defined functions are susceptible to all of the normal problems
+with the chosen programming language. Accordingly, implementations
+should be safe against null pointer exceptions, illegal arguments, or
+any other potential source of exceptions. An exception during function
+execution will result in the entire statement failing.
+
+It is valid to use _complex_ types like collections, tuple types and
+user-defined types as argument and return types. Tuple types and
+user-defined types are handled by the conversion functions of the
+DataStax Java Driver. Please see the documentation of the Java Driver
+for details on handling tuple types and user-defined types.
+
+Arguments for functions can be literals or terms. Prepared statement
+placeholders can be used, too.
+
+Note that you can use the double-quoted string syntax to enclose the UDF
+source code. For example:
+
+bc(sample).. +
+CREATE FUNCTION some_function ( arg int ) +
+RETURNS NULL ON NULL INPUT +
+RETURNS int +
+LANGUAGE java +
+AS $$ return arg; $$;
+
+SELECT some_function(column) FROM atable …; +
+UPDATE atable SET col = some_function(?) …; +
+p.
+
+bc(sample). +
+CREATE TYPE custom_type (txt text, i int); +
+CREATE FUNCTION fct_using_udt ( udtarg frozen ) +
+RETURNS NULL ON NULL INPUT +
+RETURNS text +
+LANGUAGE java +
+AS $$ return udtarg.getString(``txt''); $$;
+
+User-defined functions can be used in link:#selectStmt[`SELECT`],
+link:#insertStmt[`INSERT`] and link:#updateStmt[`UPDATE`] statements.
+
+The implicitly available `udfContext` field (or binding for script UDFs)
+provides the neccessary functionality to create new UDT and tuple
+values.
+
+bc(sample). +
+CREATE TYPE custom_type (txt text, i int); +
+CREATE FUNCTION fct_using_udt ( somearg int ) +
+RETURNS NULL ON NULL INPUT +
+RETURNS custom_type +
+LANGUAGE java +
+AS $$ +
+UDTValue udt = udfContext.newReturnUDTValue(); +
+udt.setString(``txt'', ``some string''); +
+udt.setInt(``i'', 42); +
+return udt; +
+$$;
+
+The definition of the `UDFContext` interface can be found in the Apache
+Cassandra source code for
+`org.apache.cassandra.cql3.functions.UDFContext`.
+
+bc(sample). +
+public interface UDFContext +
+\{ +
+UDTValue newArgUDTValue(String argName); +
+UDTValue newArgUDTValue(int argNum); +
+UDTValue newReturnUDTValue(); +
+UDTValue newUDTValue(String udtName); +
+TupleValue newArgTupleValue(String argName); +
+TupleValue newArgTupleValue(int argNum); +
+TupleValue newReturnTupleValue(); +
+TupleValue newTupleValue(String cqlDefinition); +
+}
+
+Java UDFs already have some imports for common interfaces and classes
+defined. These imports are: +
+Please note, that these convenience imports are not available for script
+UDFs.
+
+bc(sample). +
+import java.nio.ByteBuffer; +
+import java.util.List; +
+import java.util.Map; +
+import java.util.Set; +
+import org.apache.cassandra.cql3.functions.UDFContext; +
+import com.datastax.driver.core.TypeCodec; +
+import com.datastax.driver.core.TupleValue; +
+import com.datastax.driver.core.UDTValue;
+
+See link:#createFunctionStmt[`CREATE FUNCTION`] and
+link:#dropFunctionStmt[`DROP FUNCTION`].
+
+[[udas]]
+=== User-Defined Aggregates
+
+User-defined aggregates allow creation of custom aggregate functions
+using link:#udfs[UDFs]. Common examples of aggregate functions are
+_count_, _min_, and _max_.
+
+Each aggregate requires an _initial state_ (`INITCOND`, which defaults
+to `null`) of type `STYPE`. The first argument of the state function
+must have type `STYPE`. The remaining arguments of the state function
+must match the types of the user-defined aggregate arguments. The state
+function is called once for each row, and the value returned by the
+state function becomes the new state. After all rows are processed, the
+optional `FINALFUNC` is executed with last state value as its argument.
+
+`STYPE` is mandatory in order to be able to distinguish possibly
+overloaded versions of the state and/or final function (since the
+overload can appear after creation of the aggregate).
+
+User-defined aggregates can be used in link:#selectStmt[`SELECT`]
+statement.
+
+A complete working example for user-defined aggregates (assuming that a
+keyspace has been selected using the link:#useStmt[`USE`] statement):
+
+bc(sample).. +
+CREATE OR REPLACE FUNCTION averageState ( state tuple, val
+int ) +
+CALLED ON NULL INPUT +
+RETURNS tuple +
+LANGUAGE java +
+AS ’ +
+if (val != null) \{ +
+state.setInt(0, state.getInt(0)+1); +
+state.setLong(1, state.getLong(1)+val.intValue()); +
+} +
+return state; +
+’;
+
+CREATE OR REPLACE FUNCTION averageFinal ( state tuple ) +
+CALLED ON NULL INPUT +
+RETURNS double +
+LANGUAGE java +
+AS ’ +
+double r = 0; +
+if (state.getInt(0) == 0) return null; +
+r = state.getLong(1); +
+r /= state.getInt(0); +
+return Double.valueOf®; +
+’;
+
+CREATE OR REPLACE AGGREGATE average ( int ) +
+SFUNC averageState +
+STYPE tuple +
+FINALFUNC averageFinal +
+INITCOND (0, 0);
+
+CREATE TABLE atable ( +
+pk int PRIMARY KEY, +
+val int); +
+INSERT INTO atable (pk, val) VALUES (1,1); +
+INSERT INTO atable (pk, val) VALUES (2,2); +
+INSERT INTO atable (pk, val) VALUES (3,3); +
+INSERT INTO atable (pk, val) VALUES (4,4); +
+SELECT average(val) FROM atable; +
+p.
+
+See link:#createAggregateStmt[`CREATE AGGREGATE`] and
+link:#dropAggregateStmt[`DROP AGGREGATE`].
+
+[[json]]
+=== JSON Support
+
+Cassandra 2.2 introduces JSON support to link:#selectStmt[`SELECT`] and
+link:#insertStmt[`INSERT`] statements. This support does not
+fundamentally alter the CQL API (for example, the schema is still
+enforced), it simply provides a convenient way to work with JSON
+documents.
+
+[[selectJson]]
+==== SELECT JSON
+
+With `SELECT` statements, the new `JSON` keyword can be used to return
+each row as a single `JSON` encoded map. The remainder of the `SELECT`
+statment behavior is the same.
+
+The result map keys are the same as the column names in a normal result
+set. For example, a statement like ```SELECT JSON a, ttl(b) FROM ...`''
+would result in a map with keys `"a"` and `"ttl(b)"`. However, this is
+one notable exception: for symmetry with `INSERT JSON` behavior,
+case-sensitive column names with upper-case letters will be surrounded
+with double quotes. For example, ```SELECT JSON myColumn FROM ...`''
+would result in a map key `"\"myColumn\""` (note the escaped quotes).
+
+The map values will `JSON`-encoded representations (as described below)
+of the result set values.
+
+[[insertJson]]
+==== INSERT JSON
+
+With `INSERT` statements, the new `JSON` keyword can be used to enable
+inserting a `JSON` encoded map as a single row. The format of the `JSON`
+map should generally match that returned by a `SELECT JSON` statement on
+the same table. In particular, case-sensitive column names should be
+surrounded with double quotes. For example, to insert into a table with
+two columns named ``myKey'' and ``value'', you would do the following:
+
+bc(sample). +
+INSERT INTO mytable JSON `\{``\''myKey\``'': 0, ``value'': 0}'
+
+Any columns which are ommitted from the `JSON` map will be defaulted to
+a `NULL` value (which will result in a tombstone being created).
+
+[[jsonEncoding]]
+==== JSON Encoding of Cassandra Data Types
+
+Where possible, Cassandra will represent and accept data types in their
+native `JSON` representation. Cassandra will also accept string
+representations matching the CQL literal format for all single-field
+types. For example, floats, ints, UUIDs, and dates can be represented by
+CQL literal strings. However, compound types, such as collections,
+tuples, and user-defined types must be represented by native `JSON`
+collections (maps and lists) or a JSON-encoded string representation of
+the collection.
+
+The following table describes the encodings that Cassandra will accept
+in `INSERT JSON` values (and `fromJson()` arguments) as well as the
+format Cassandra will use when returning data for `SELECT JSON`
+statements (and `fromJson()`):
+
+[cols=",,,",options="header",]
+|===
+|type |formats accepted |return format |notes
+|`ascii` |string |string |Uses JSON’s `\u` character escape
+
+|`bigint` |integer, string |integer |String must be valid 64 bit integer
+
+|`blob` |string |string |String should be 0x followed by an even number
+of hex digits
+
+|`boolean` |boolean, string |boolean |String must be ``true'' or
+``false''
+
+|`date` |string |string |Date in format `YYYY-MM-DD`, timezone UTC
+
+|`decimal` |integer, float, string |float |May exceed 32 or 64-bit
+IEEE-754 floating point precision in client-side decoder
+
+|`double` |integer, float, string |float |String must be valid integer
+or float
+
+|`float` |integer, float, string |float |String must be valid integer or
+float
+
+|`inet` |string |string |IPv4 or IPv6 address
+
+|`int` |integer, string |integer |String must be valid 32 bit integer
+
+|`list` |list, string |list |Uses JSON’s native list representation
+
+|`map` |map, string |map |Uses JSON’s native map representation
+
+|`smallint` |integer, string |integer |String must be valid 16 bit
+integer
+
+|`set` |list, string |list |Uses JSON’s native list representation
+
+|`text` |string |string |Uses JSON’s `\u` character escape
+
+|`time` |string |string |Time of day in format `HH-MM-SS[.fffffffff]`
+
+|`timestamp` |integer, string |string |A timestamp. Strings constant are
+allow to input timestamps as dates, see link:#usingdates[Working with
+dates] below for more information. Datestamps with format
+`YYYY-MM-DD HH:MM:SS.SSS` are returned.
+
+|`timeuuid` |string |string |Type 1 UUID. See link:#constants[Constants]
+for the UUID format
+
+|`tinyint` |integer, string |integer |String must be valid 8 bit integer
+
+|`tuple` |list, string |list |Uses JSON’s native list representation
+
+|`UDT` |map, string |map |Uses JSON’s native map representation with
+field names as keys
+
+|`uuid` |string |string |See link:#constants[Constants] for the UUID
+format
+
+|`varchar` |string |string |Uses JSON’s `\u` character escape
+
+|`varint` |integer, string |integer |Variable length; may overflow 32 or
+64 bit integers in client-side decoder
+|===
+
+[[fromJson]]
+==== The fromJson() Function
+
+The `fromJson()` function may be used similarly to `INSERT JSON`, but
+for a single column value. It may only be used in the `VALUES` clause of
+an `INSERT` statement or as one of the column values in an `UPDATE`,
+`DELETE`, or `SELECT` statement. For example, it cannot be used in the
+selection clause of a `SELECT` statement.
+
+[[toJson]]
+==== The toJson() Function
+
+The `toJson()` function may be used similarly to `SELECT JSON`, but for
+a single column value. It may only be used in the selection clause of a
+`SELECT` statement.
+
+[[appendixA]]
+=== Appendix A: CQL Keywords
+
+CQL distinguishes between _reserved_ and _non-reserved_ keywords.
+Reserved keywords cannot be used as identifier, they are truly reserved
+for the language (but one can enclose a reserved keyword by
+double-quotes to use it as an identifier). Non-reserved keywords however
+only have a specific meaning in certain context but can used as
+identifer otherwise. The only _raison d’être_ of these non-reserved
+keywords is convenience: some keyword are non-reserved when it was
+always easy for the parser to decide whether they were used as keywords
+or not.
+
+[cols=",",options="header",]
+|===
+|Keyword |Reserved?
+|`ADD` |yes
+|`AGGREGATE` |no
+|`ALL` |no
+|`ALLOW` |yes
+|`ALTER` |yes
+|`AND` |yes
+|`APPLY` |yes
+|`AS` |no
+|`ASC` |yes
+|`ASCII` |no
+|`AUTHORIZE` |yes
+|`BATCH` |yes
+|`BEGIN` |yes
+|`BIGINT` |no
+|`BLOB` |no
+|`BOOLEAN` |no
+|`BY` |yes
+|`CALLED` |no
+|`CAST` |no
+|`CLUSTERING` |no
+|`COLUMNFAMILY` |yes
+|`COMPACT` |no
+|`CONTAINS` |no
+|`COUNT` |no
+|`COUNTER` |no
+|`CREATE` |yes
+|`CUSTOM` |no
+|`DATE` |no
+|`DECIMAL` |no
+|`DEFAULT` |yes
+|`DELETE` |yes
+|`DESC` |yes
+|`DESCRIBE` |yes
+|`DISTINCT` |no
+|`DOUBLE` |no
+|`DROP` |yes
+|`DURATION` |no
+|`ENTRIES` |yes
+|`EXECUTE` |yes
+|`EXISTS` |no
+|`FILTERING` |no
+|`FINALFUNC` |no
+|`FLOAT` |no
+|`FROM` |yes
+|`FROZEN` |no
+|`FULL` |yes
+|`FUNCTION` |no
+|`FUNCTIONS` |no
+|`GRANT` |yes
+|`GROUP` |no
+|`IF` |yes
+|`IN` |yes
+|`INDEX` |yes
+|`INET` |no
+|`INFINITY` |yes
+|`INITCOND` |no
+|`INPUT` |no
+|`INSERT` |yes
+|`INT` |no
+|`INTO` |yes
+|`IS` |yes
+|`JSON` |no
+|`KEY` |no
+|`KEYS` |no
+|`KEYSPACE` |yes
+|`KEYSPACES` |no
+|`LANGUAGE` |no
+|`LIKE` |no
+|`LIMIT` |yes
+|`LIST` |no
+|`LOGIN` |no
+|`MAP` |no
+|`MATERIALIZED` |yes
+|`MBEAN` |yes
+|`MBEANS` |yes
+|`MODIFY` |yes
+|`NAN` |yes
+|`NOLOGIN` |no
+|`NORECURSIVE` |yes
+|`NOSUPERUSER` |no
+|`NOT` |yes
+|`NULL` |yes
+|`OF` |yes
+|`ON` |yes
+|`OPTIONS` |no
+|`OR` |yes
+|`ORDER` |yes
+|`PARTITION` |no
+|`PASSWORD` |no
+|`PER` |no
+|`PERMISSION` |no
+|`PERMISSIONS` |no
+|`PRIMARY` |yes
+|`RENAME` |yes
+|`REPLACE` |yes
+|`RETURNS` |no
+|`REVOKE` |yes
+|`ROLE` |no
+|`ROLES` |no
+|`SCHEMA` |yes
+|`SELECT` |yes
+|`SET` |yes
+|`SFUNC` |no
+|`SMALLINT` |no
+|`STATIC` |no
+|`STORAGE` |no
+|`STYPE` |no
+|`SUPERUSER` |no
+|`TABLE` |yes
+|`TEXT` |no
+|`TIME` |no
+|`TIMESTAMP` |no
+|`TIMEUUID` |no
+|`TINYINT` |no
+|`TO` |yes
+|`TOKEN` |yes
+|`TRIGGER` |no
+|`TRUNCATE` |yes
+|`TTL` |no
+|`TUPLE` |no
+|`TYPE` |no
+|`UNLOGGED` |yes
+|`UNSET` |yes
+|`UPDATE` |yes
+|`USE` |yes
+|`USER` |no
+|`USERS` |no
+|`USING` |yes
+|`UUID` |no
+|`VALUES` |no
+|`VARCHAR` |no
+|`VARINT` |no
+|`VIEW` |yes
+|`WHERE` |yes
+|`WITH` |yes
+|`WRITETIME` |no
+|===
+
+[[appendixB]]
+=== Appendix B: CQL Reserved Types
+
+The following type names are not currently used by CQL, but are reserved
+for potential future use. User-defined types may not use reserved type
+names as their name.
+
+[cols="",options="header",]
+|===
+|type
+|`bitstring`
+|`byte`
+|`complex`
+|`date`
+|`enum`
+|`interval`
+|`macaddr`
+|===
+
+=== Changes
+
+The following describes the changes in each version of CQL.
+
+==== 3.4.3
+
+* Support for `GROUP BY`. See link:#selectGroupBy[``] (see
+https://issues.apache.org/jira/browse/CASSANDRA-10707)[CASSANDRA-10707].
+
+==== 3.4.2
+
+* Support for selecting elements and slices of a collection
+(https://issues.apache.org/jira/browse/CASSANDRA-7396)[CASSANDRA-7396].
+
+==== 3.4.2
+
+* link:#updateOptions[`INSERT/UPDATE options`] for tables having a
+default_time_to_live specifying a TTL of 0 will remove the TTL from the
+inserted or updated values
+* link:#alterTableStmt[`ALTER TABLE`] `ADD` and `DROP` now allow mutiple
+columns to be added/removed
+* New link:#selectLimit[`PER PARTITION LIMIT`] option (see
+https://issues.apache.org/jira/browse/CASSANDRA-7017)[CASSANDRA-7017].
+* link:#udfs[User-defined functions] can now instantiate `UDTValue` and
+`TupleValue` instances via the new `UDFContext` interface (see
+https://issues.apache.org/jira/browse/CASSANDRA-10818)[CASSANDRA-10818].
+* ``User-defined types''#createTypeStmt may now be stored in a
+non-frozen form, allowing individual fields to be updated and deleted in
+link:#updateStmt[`UPDATE` statements] and link:#deleteStmt[`DELETE`
+statements], respectively.
+(https://issues.apache.org/jira/browse/CASSANDRA-7423)[CASSANDRA-7423]
+
+==== 3.4.1
+
+* Adds `CAST` functions. See link:#castFun[`Cast`].
+
+==== 3.4.0
+
+* Support for link:#createMVStmt[materialized views]
+* link:#deleteStmt[`DELETE`] support for inequality expressions and `IN`
+restrictions on any primary key columns
+* link:#updateStmt[`UPDATE`] support for `IN` restrictions on any
+primary key columns
+
+==== 3.3.1
+
+* The syntax `TRUNCATE TABLE X` is now accepted as an alias for
+`TRUNCATE X`
+
+==== 3.3.0
+
+* Adds new link:#aggregates[aggregates]
+* User-defined functions are now supported through
+link:#createFunctionStmt[`CREATE FUNCTION`] and
+link:#dropFunctionStmt[`DROP FUNCTION`].
+* User-defined aggregates are now supported through
+link:#createAggregateStmt[`CREATE AGGREGATE`] and
+link:#dropAggregateStmt[`DROP AGGREGATE`].
+* Allows double-dollar enclosed strings literals as an alternative to
+single-quote enclosed strings.
+* Introduces Roles to supercede user based authentication and access
+control
+* link:#usingdates[`Date`] and link:usingtime[`Time`] data types have
+been added
+* link:#json[`JSON`] support has been added
+* `Tinyint` and `Smallint` data types have been added
+* Adds new time conversion functions and deprecate `dateOf` and
+`unixTimestampOf`. See link:#timeFun[`Time conversion functions`]
+
+==== 3.2.0
+
+* User-defined types are now supported through
+link:#createTypeStmt[`CREATE TYPE`], link:#alterTypeStmt[`ALTER TYPE`],
+and link:#dropTypeStmt[`DROP TYPE`]
+* link:#createIndexStmt[`CREATE INDEX`] now supports indexing collection
+columns, including indexing the keys of map collections through the
+`keys()` function
+* Indexes on collections may be queried using the new `CONTAINS` and
+`CONTAINS KEY` operators
+* Tuple types were added to hold fixed-length sets of typed positional
+fields (see the section on link:#types[types] )
+* link:#dropIndexStmt[`DROP INDEX`] now supports optionally specifying a
+keyspace
+
+==== 3.1.7
+
+* `SELECT` statements now support selecting multiple rows in a single
+partition using an `IN` clause on combinations of clustering columns.
+See link:#selectWhere[SELECT WHERE] clauses.
+* `IF NOT EXISTS` and `IF EXISTS` syntax is now supported by
+`CREATE USER` and `DROP USER` statmenets, respectively.
+
+==== 3.1.6
+
+* A new link:#uuidFun[`uuid` method] has been added.
+* Support for `DELETE ... IF EXISTS` syntax.
+
+==== 3.1.5
+
+* It is now possible to group clustering columns in a relatiion, see
+link:#selectWhere[SELECT WHERE] clauses.
+* Added support for `STATIC` columns, see link:#createTableStatic[static
+in CREATE TABLE].
+
+==== 3.1.4
+
+* `CREATE INDEX` now allows specifying options when creating CUSTOM
+indexes (see link:#createIndexStmt[CREATE INDEX reference] ).
+
+==== 3.1.3
+
+* Millisecond precision formats have been added to the timestamp parser
+(see link:#usingtimestamps[working with dates] ).
+
+==== 3.1.2
+
+* `NaN` and `Infinity` has been added as valid float contants. They are
+now reserved keywords. In the unlikely case you we using them as a
+column identifier (or keyspace/table one), you will noew need to double
+quote them (see link:#identifiers[quote identifiers] ).
+
+==== 3.1.1
+
+* `SELECT` statement now allows listing the partition keys (using the
+`DISTINCT` modifier). See
+https://issues.apache.org/jira/browse/CASSANDRA-4536[CASSANDRA-4536].
+* The syntax `c IN ?` is now supported in `WHERE` clauses. In that case,
+the value expected for the bind variable will be a list of whatever type
+`c` is.
+* It is now possible to use named bind variables (using `:name` instead
+of `?`).
+
+==== 3.1.0
+
+* link:#alterTableStmt[ALTER TABLE] `DROP` option has been reenabled for
+CQL3 tables and has new semantics now: the space formerly used by
+dropped columns will now be eventually reclaimed (post-compaction). You
+should not readd previously dropped columns unless you use timestamps
+with microsecond precision (see
+https://issues.apache.org/jira/browse/CASSANDRA-3919[CASSANDRA-3919] for
+more details).
+* `SELECT` statement now supports aliases in select clause. Aliases in
+WHERE and ORDER BY clauses are not supported. See the
+link:#selectStmt[section on select] for details.
+* `CREATE` statements for `KEYSPACE`, `TABLE` and `INDEX` now supports
+an `IF NOT EXISTS` condition. Similarly, `DROP` statements support a
+`IF EXISTS` condition.
+* `INSERT` statements optionally supports a `IF NOT EXISTS` condition
+and `UPDATE` supports `IF` conditions.
+
+==== 3.0.5
+
+* `SELECT`, `UPDATE`, and `DELETE` statements now allow empty `IN`
+relations (see
+https://issues.apache.org/jira/browse/CASSANDRA-5626)[CASSANDRA-5626].
+
+==== 3.0.4
+
+* Updated the syntax for custom link:#createIndexStmt[secondary
+indexes].
+* Non-equal condition on the partition key are now never supported, even
+for ordering partitioner as this was not correct (the order was *not*
+the one of the type of the partition key). Instead, the `token` method
+should always be used for range queries on the partition key (see
+link:#selectWhere[WHERE clauses] ).
+
+==== 3.0.3
+
+* Support for custom link:#createIndexStmt[secondary indexes] has been
+added.
+
+==== 3.0.2
+
+* Type validation for the link:#constants[constants] has been fixed. For
+instance, the implementation used to allow `'2'` as a valid value for an
+`int` column (interpreting it has the equivalent of `2`), or `42` as a
+valid `blob` value (in which case `42` was interpreted as an hexadecimal
+representation of the blob). This is no longer the case, type validation
+of constants is now more strict. See the link:#types[data types] section
+for details on which constant is allowed for which type.
+* The type validation fixed of the previous point has lead to the
+introduction of link:#constants[blobs constants] to allow inputing
+blobs. Do note that while inputing blobs as strings constant is still
+supported by this version (to allow smoother transition to blob
+constant), it is now deprecated (in particular the link:#types[data
+types] section does not list strings constants as valid blobs) and will
+be removed by a future version. If you were using strings as blobs, you
+should thus update your client code ASAP to switch blob constants.
+* A number of functions to convert native types to blobs have also been
+introduced. Furthermore the token function is now also allowed in select
+clauses. See the link:#functions[section on functions] for details.
+
+==== 3.0.1
+
+* link:#usingtimestamps[Date strings] (and timestamps) are no longer
+accepted as valid `timeuuid` values. Doing so was a bug in the sense
+that date string are not valid `timeuuid`, and it was thus resulting in
+https://issues.apache.org/jira/browse/CASSANDRA-4936[confusing
+behaviors]. However, the following new methods have been added to help
+working with `timeuuid`: `now`, `minTimeuuid`, `maxTimeuuid` , `dateOf`
+and `unixTimestampOf`. See the link:#timeuuidFun[section dedicated to
+these methods] for more detail.
+* ``Float constants''#constants now support the exponent notation. In
+other words, `4.2E10` is now a valid floating point value.
+
+=== Versioning
+
+Versioning of the CQL language adheres to the http://semver.org[Semantic
+Versioning] guidelines. Versions take the form X.Y.Z where X, Y, and Z
+are integer values representing major, minor, and patch level
+respectively. There is no correlation between Cassandra release versions
+and the CQL language version.
+
+[cols=",",options="header",]
+|===
+|version |description
+|Major |The major version _must_ be bumped when backward incompatible
+changes are introduced. This should rarely occur.
+
+|Minor |Minor version increments occur when new, but backward
+compatible, functionality is introduced.
+
+|Patch |The patch version is incremented when bugs are fixed.
+|===
diff --git a/doc/modules/cassandra/pages/cql/ddl.adoc b/doc/modules/cassandra/pages/cql/ddl.adoc
new file mode 100644
index 00000000000..be93bc211eb
--- /dev/null
+++ b/doc/modules/cassandra/pages/cql/ddl.adoc
@@ -0,0 +1,799 @@
+= Data Definition
+:tabs:
+
+CQL stores data in _tables_, whose schema defines the layout of the
+data in the table. Tables are located in _keyspaces_.
+A keyspace defines options that apply to all the keyspace's tables.
+The xref:cql/ddl.adoc#replication-strategy[replication strategy] is an important keyspace option, as is the replication factor.
+A good general rule is one keyspace per application.
+It is common for a cluster to define only one keyspace for an actie application.
+
+This section describes the statements used to create, modify, and remove
+those keyspace and tables.
+
+== Common definitions
+
+The names of the keyspaces and tables are defined by the following
+grammar:
+
+[source,bnf]
+----
+include::example$BNF/ks_table.bnf[]
+----
+
+Both keyspace and table name should be comprised of only alphanumeric
+characters, cannot be empty and are limited in size to 48 characters
+(that limit exists mostly to avoid filenames (which may include the
+keyspace and table name) to go over the limits of certain file systems).
+By default, keyspace and table names are case-insensitive (`myTable` is
+equivalent to `mytable`) but case sensitivity can be forced by using
+double-quotes (`"myTable"` is different from `mytable`).
+
+Further, a table is always part of a keyspace and a table name can be
+provided fully-qualified by the keyspace it is part of. If is is not
+fully-qualified, the table is assumed to be in the _current_ keyspace
+(see xref:cql/ddl.adoc#use-statement[USE] statement.
+
+Further, the valid names for columns are defined as:
+
+[source,bnf]
+----
+include::example$BNF/column.bnf[]
+----
+
+We also define the notion of statement options for use in the following
+section:
+
+[source,bnf]
+----
+include::example$BNF/options.bnf[]
+----
+
+[[create-keyspace-statement]]
+== CREATE KEYSPACE
+
+A keyspace is created with a `CREATE KEYSPACE` statement:
+
+[source,bnf]
+----
+include::example$BNF/create_ks.bnf[]
+----
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/create_ks.cql[]
+----
+
+Attempting to create a keyspace that already exists will return an error
+unless the `IF NOT EXISTS` option is used. If it is used, the statement
+will be a no-op if the keyspace already exists.
+
+The supported `options` are:
+
+[cols=",,,,",options="header",]
+|===
+|name | kind | mandatory | default | description
+|`replication` | _map_ | yes | n/a | The replication strategy and options to use for the keyspace (see
+details below).
+|`durable_writes` | _simple_ | no | true | Whether to use the commit log for updates on this keyspace (disable this
+option at your own risk!).
+|===
+
+The `replication` property is mandatory and must contain the `'class'` sub-option that defines the desired
+xref:cql/ddl.adoc#replication-strategy[replication strategy] class.
+The rest of the sub-options depend on which replication strategy is used.
+By default, Cassandra supports the following `'class'` values:
+
+[[replication-strategy]]
+=== `SimpleStrategy`
+
+A simple strategy that defines a replication factor for data to be
+spread across the entire cluster. This is generally not a wise choice
+for production, as it does not respect datacenter layouts and can
+lead to wildly varying query latency. For production, use
+`NetworkTopologyStrategy`. `SimpleStrategy` supports a single
+mandatory argument:
+
+[cols=",,,",options="header",]
+|===
+|sub-option |type |since |description
+|`'replication_factor'` | int | all | The number of replicas to store per range
+|===
+
+=== `NetworkTopologyStrategy`
+
+A production-ready replication strategy that sets the
+replication factor independently for each data-center. The rest of the
+sub-options are key-value pairs, with a key set to a data-center name and
+its value set to the associated replication factor. Options:
+
+[cols=",,,",options="header",]
+|===
+|sub-option |type |description
+|`''` | int | The number of replicas to store per range in the provided datacenter.
+|`'replication_factor'` | int | The number of replicas to use as a default per datacenter if not
+specifically provided. Note that this always defers to existing
+definitions or explicit datacenter settings. For example, to have three
+replicas per datacenter, set a value of 3.
+|===
+
+When later altering keyspaces and changing the `replication_factor`,
+auto-expansion will only _add_ new datacenters for safety, it will not
+alter existing datacenters or remove any, even if they are no longer in
+the cluster. If you want to remove datacenters while setting the
+`replication_factor`, explicitly zero out the datacenter you want to
+have zero replicas.
+
+An example of auto-expanding datacenters with two datacenters: `DC1` and
+`DC2`:
+
+[source,cql]
+----
+include::example$CQL/autoexpand_ks.cql[]
+----
+will result in:
+[source,plaintext]
+----
+include::example$RESULTS/autoexpand_ks.result[]
+----
+
+An example of auto-expanding and overriding a datacenter:
+
+[source,cql]
+----
+include::example$CQL/autoexpand_ks_override.cql[]
+----
+will result in:
+[source,plaintext]
+----
+include::example$RESULTS/autoexpand_ks_override.result[]
+----
+
+An example that excludes a datacenter while using `replication_factor`:
+
+[source,cql]
+----
+include::example$CQL/autoexpand_exclude_dc.cql[]
+----
+will result in:
+[source,plaintext]
+----
+include::example$RESULTS/autoexpand_exclude_dc.result[]
+----
+
+If xref:new/transientreplication.adoc[transient replication] has been enabled, transient replicas can be
+configured for both `SimpleStrategy` and `NetworkTopologyStrategy` by
+defining replication factors in the format
+`'/'`
+
+For instance, this keyspace will have 3 replicas in DC1, 1 of which is
+transient, and 5 replicas in DC2, 2 of which are transient:
+
+[source,cql]
+----
+include::example$CQL/create_ks_trans_repl.cql[]
+----
+
+[[use-statement]]
+== USE
+
+The `USE` statement changes the _current_ keyspace to the specified keyspace.
+A number of objects in CQL are bound to a keyspace (tables, user-defined types, functions, etc.) and the
+current keyspace is the default keyspace used when those objects are
+referred to in a query without a fully-qualified name (without a prefixed keyspace name).
+A `USE` statement specifies the keyspace to use as an argument:
+
+[source,bnf]
+----
+include::example$BNF/use_ks.bnf[]
+----
+Using CQL:
+[source,cql]
+----
+include::example$CQL/use_ks.cql[]
+----
+
+[[alter-keyspace-statement]]
+== ALTER KEYSPACE
+
+An `ALTER KEYSPACE` statement modifies the options of a keyspace:
+
+[source,bnf]
+----
+include::example$BNF/alter_ks.bnf[]
+----
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/alter_ks.cql[]
+----
+
+The supported options are the same as for xref:cql/ddl.adoc#create-keyspace-statement[creating a keyspace].
+
+[[drop-keyspace-statement]]
+== DROP KEYSPACE
+
+Dropping a keyspace is done with the `DROP KEYSPACE` statement:
+
+[source,bnf]
+----
+include::example$BNF/drop_ks.bnf[]
+----
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/drop_ks.cql[]
+----
+
+Dropping a keyspace results in the immediate, irreversible removal of
+that keyspace, including all the tables, user-defined types, user-defined functions, and
+all the data contained in those tables.
+
+If the keyspace does not exists, the statement will return an error,
+unless `IF EXISTS` is used in which case the operation is a no-op.
+
+[[create-table-statement]]
+== CREATE TABLE
+
+Creating a new table uses the `CREATE TABLE` statement:
+
+[source,bnf]
+----
+include::example$BNF/create_table.bnf[]
+----
+
+For example, here are some CQL statements to create tables:
+
+[source,cql]
+----
+include::example$CQL/create_table.cql[]
+----
+
+A CQL table has a name and is composed of a set of _rows_.
+Creating a table amounts to defining which xref:cql/ddl.adoc#column-definition[columns] each rows will have,
+which of those columns comprise the xref:cql/ddl.adoc#primary-key[primary key], as well as defined
+xref:cql/ddl.adoc#create-table-options[options] for the table.
+
+Attempting to create an already existing table will return an error
+unless the `IF NOT EXISTS` directive is used. If it is used, the
+statement will be a no-op if the table already exists.
+
+[[column-definition]]
+=== Column definitions
+
+Every row in a CQL table will have the predefined columns defined at table creation.
+Columns can be added later using an xref:cql/ddl.adoc#alter-table-statement[alter statement].
+
+A `column_definition` is comprised of the name of the column and its xref:cql/ddl.adoc#data-type[type],
+restricting the values that are accepted for that column. Additionally, a column definition can have the
+following modifiers:
+
+* `STATIC`: declares the column as a xref:cql/ddl.adoc#static-column[static column]
+* `PRIMARY KEY`: declares the column as the sole component of the xref:cql/ddl.adoc#primary-key[primary key] of the table
+
+[[static-column]]
+==== Static columns
+
+Some columns can be declared as `STATIC` in a table definition. A column
+that is static will be “shared” by all the rows belonging to the same
+partition (having the same xref:cql/ddl.adoc#partition-key[partition key].
+
+For example:
+
+[{tabs}]
+====
+Code::
++
+--
+[source,cql]
+----
+include::example$CQL/create_static_column.cql[]
+include::example$CQL/insert_static_data.cql[]
+include::example$CQL/select_static_data.cql[]
+----
+--
+
+Results::
++
+--
+[source,cql]
+----
+include::example$RESULTS/select_static_data.result[]
+----
+--
+====
+
+As can be seen, the `s` value is the same (`static1`) for both of the
+rows in the partition (the partition key being `pk`, and both
+rows are in the same partition): the second insertion overrides the
+value for `s`.
+
+The use of static columns has the following restrictions:
+
+* A table without clustering columns cannot have static columns.
+In a table without clustering columns, every partition has only one row, and
+so every column is inherently static)
+* Only non-primary key columns can be static.
+
+[[primary-key]]
+=== The Primary key
+
+Within a table, a row is uniquely identified by its `PRIMARY KEY`, and
+hence all tables *must* define a single PRIMARY KEY.
+A `PRIMARY KEY` is composed of one or more of the defined columns in the table.
+Syntactically, the primary key is defined with the phrase `PRIMARY KEY`
+followed by a comma-separated list of the column names within parenthesis.
+If the primary key has only one column, you can alternatively add the `PRIMARY KEY` phrase to
+that column in the table definition.
+The order of the columns in the primary key definition defines the partition key and
+clustering columns.
+
+A CQL primary key is composed of two parts:
+
+xref:cql/ddl.adoc#partition-key[partition key]::
+* It is the first component of the primary key definition.
+It can be a single column or, using an additional set of parenthesis, can be multiple columns.
+A table must have at least one partition key, the smallest possible table definition is:
++
+[source,cql]
+----
+include::example$CQL/create_table_single_pk.cql[]
+----
+xref:cql/ddl.adoc#clustering-columns[clustering columns]::
+* The columns are the columns that follow the partition key in the primary key definition.
+The order of those columns define the _clustering order_.
+
+Some examples of primary key definition are:
+
+* `PRIMARY KEY (a)`: `a` is the single partition key and there are no clustering columns
+* `PRIMARY KEY (a, b, c)` : `a` is the single partition key and `b` and `c` are the clustering columns
+* `PRIMARY KEY ((a, b), c)` : `a` and `b` compose the _composite_ partition key and `c` is the clustering column
+
+[IMPORTANT]
+====
+The primary key uniquely identifies a row in the table, as described above.
+A consequence of this uniqueness is that if another row is inserted using the same primary key,
+then an `UPSERT` occurs and an existing row with the same primary key is replaced.
+Columns that are not part of the primary key cannot define uniqueness.
+====
+
+[[partition-key]]
+==== Partition key
+
+Within a table, CQL defines the notion of a _partition_ that defines the location of data within a Cassandra cluster.
+A partition is the set of rows that share the same value for their partition key.
+
+Note that if the partition key is composed of multiple columns, then rows belong to the same partition
+when they have the same values for all those partition key columns.
+A hash is computed from the partition key columns and that hash value defines the partition location.
+So, for instance, given the following table definition and content:
+
+[source,cql]
+----
+include::example$CQL/create_table_compound_pk.cql[]
+include::example$CQL/insert_table_compound_pk.cql[]
+include::example$CQL/select_table_compound_pk.cql[]
+----
+
+will result in
+[source,cql]
+----
+include::example$RESULTS/select_table_compound_pk.result[]
+----
+<1> Rows 1 and 2 are in the same partition, because both columns `a` and `b` are zero.
+<2> Rows 3 and 4 are in the same partition, but a different one, because column `a` is zero and column `b` is 1 in both rows.
+<3> Row 5 is in a third partition by itself, because both columns `a` and `b` are 1.
+
+Note that a table always has a partition key, and that if the table has
+no `clustering columns`, then every partition of that table has a single row.
+because the partition key, compound or otherwise, identifies a single location.
+
+The most important property of partition is that all the rows belonging
+to the same partition are guaranteed to be stored on the same set of
+replica nodes.
+In other words, the partition key of a table defines which rows will be localized on the same
+node in the cluster.
+The localization of data is important to the efficient retrieval of data, requiring the Cassandra coordinator
+to contact as few nodes as possible.
+However, there is a flip-side to this guarantee, and all rows sharing a partition key will be stored on the same
+node, creating a hotspot for both reading and writing.
+While selecting a primary key that groups table rows assists batch updates and can ensure that the updates are
+_atomic_ and done in _isolation_, the partitions must be sized "just right, not too big nor too small".
+
+Data modeling that considers the querying patterns and assigns primary keys based on the queries will have the lowest
+latency in fetching data.
+
+[[clustering-columns]]
+==== Clustering columns
+
+The clustering columns of a table define the clustering order for the partition of that table.
+For a given `partition`, all rows are ordered by that clustering order. Clustering columns also add uniqueness to
+a row in a table.
+
+For instance, given:
+
+[source,cql]
+----
+include::example$CQL/create_table_clustercolumn.cql[]
+include::example$CQL/insert_table_clustercolumn.cql[]
+include::example$CQL/select_table_clustercolumn.cql[]
+----
+
+will result in
+[source,cql]
+----
+include::example$RESULTS/select_table_clustercolumn.result[]
+----
+<1> Row 1 is in one partition, and Rows 2-5 are in a different one. The display order is also different.
+
+Looking more closely at the four rows in the same partition, the `b` clustering column defines the order in which those rows
+are displayed.
+Whereas the partition key of the table groups rows on the same node, the clustering columns control
+how those rows are stored on the node.
+
+That sorting allows the very efficient retrieval of a range of rows within a partition:
+
+[source,cql]
+----
+include::example$CQL/select_range.cql[]
+----
+
+will result in
+[source,cql]
+----
+include::example$RESULTS/select_range.result[]
+----
+
+[[create-table-options]]
+=== Table options
+
+A CQL table has a number of options that can be set at creation (and,
+for most of them, altered later). These options are specified after the
+`WITH` keyword.
+
+One important option that cannot be changed after creation, `CLUSTERING ORDER BY`, influences how queries can be done against the table. It is worth discussing in more detail here.
+
+[[clustering-order]]
+==== Clustering order
+
+The clustering order of a table is defined by the clustering columns.
+By default, the clustering order is ascending for the clustering column's data types.
+For example, integers order from 1, 2, ... n, while text orders from A to Z.
+
+The `CLUSTERING ORDER BY` table option uses a comma-separated list of the
+clustering columns, each set for either `ASC` (for _ascending_ order) or `DESC` (for _descending order).
+The default is ascending for all clustering columns if the `CLUSTERING ORDER BY` option is not set.
+
+This option is basically a hint for the storage engine that changes the order in which it stores the row.
+Beware of the consequences of setting this option:
+
+* It changes the default ascending order of results when queried with a `SELECT` statement with no `ORDER BY` clause.
+
+* It limits how the `ORDER BY` clause is used in `SELECT` statements on that table.
+Results can only be ordered with either the original clustering order or the reverse clustering order.
+Suppose you create a table with two clustering columns `a` and `b`, defined `WITH CLUSTERING ORDER BY (a DESC, b ASC)`.
+Queries on the table can use `ORDER BY (a DESC, b ASC)` or `ORDER BY (a ASC, b DESC)`.
+Mixed order, such as `ORDER BY (a ASC, b ASC)` or `ORDER BY (a DESC, b DESC)` will not return expected order.
+
+* It has a performance impact on queries. Queries in reverse clustering order are slower than the default ascending order.
+If you plan to query mostly in descending order, declare the clustering order in the table schema using `WITH CLUSTERING ORDER BY ()`.
+This optimization is common for time series, to retrieve the data from newest to oldest.
+
+[[create-table-general-options]]
+==== Other table options
+
+A table supports the following options:
+
+[width="100%",cols="30%,9%,11%,50%",options="header",]
+|===
+|option | kind | default | description
+
+| `comment` | _simple_ | none | A free-form, human-readable comment
+| xref:cql/ddl.adoc#spec_retry[`speculative_retry`] | _simple_ | 99PERCENTILE | Speculative retry options
+| `cdc` |_boolean_ |false |Create a Change Data Capture (CDC) log on the table
+| `additional_write_policy` |_simple_ |99PERCENTILE | Same as `speculative_retry`
+| `gc_grace_seconds` |_simple_ |864000 |Time to wait before garbage collecting tombstones (deletion markers)
+| `bloom_filter_fp_chance` |_simple_ |0.00075 |The target probability of
+false positive of the sstable bloom filters. Said bloom filters will be
+sized to provide the provided probability, thus lowering this value
+impact the size of bloom filters in-memory and on-disk.
+| `default_time_to_live` |_simple_ |0 |Default expiration time (“TTL”) in seconds for a table
+| `compaction` |_map_ |_see below_ | xref:operating/compaction/index.adoc#cql-compaction-options[Compaction options]
+| `compression` |_map_ |_see below_ | xref:operating/compression/index.adoc#cql-compression-options[Compression options]
+| `caching` |_map_ |_see below_ |Caching options
+| `memtable_flush_period_in_ms` |_simple_ |0 |Time (in ms) before Cassandra flushes memtables to disk
+| `read_repair` |_simple_ |BLOCKING |Sets read repair behavior (see below)
+|===
+
+[[spec_retry]]
+===== Speculative retry options
+
+By default, Cassandra read coordinators only query as many replicas as
+necessary to satisfy consistency levels: one for consistency level
+`ONE`, a quorum for `QUORUM`, and so on. `speculative_retry` determines
+when coordinators may query additional replicas, a useful action when
+replicas are slow or unresponsive. Speculative retries reduce the latency.
+The speculative_retry option configures rapid read protection, where a coordinator sends more
+requests than needed to satisfy the consistency level.
+
+[IMPORTANT]
+====
+Frequently reading from additional replicas can hurt cluster
+performance. When in doubt, keep the default `99PERCENTILE`.
+====
+
+Pre-Cassandra 4.0 speculative retry policy takes a single string as a parameter:
+
+* `NONE`
+* `ALWAYS`
+* `99PERCENTILE` (PERCENTILE)
+* `50MS` (CUSTOM)
+
+An example of setting speculative retry sets a custom value:
+
+[source,cql]
+----
+include::example$CQL/alter_table_spec_retry.cql[]
+----
+
+This example uses a percentile for the setting:
+
+[source,cql]
+----
+include::example$CQL/alter_table_spec_retry_percent.cql[]
+----
+
+A percentile setting can backfire. If a single host becomes unavailable, it can
+force up the percentiles. A value of `p99` will not speculate as intended because the
+value at the specified percentile has increased too much. If the consistency level is set to `ALL`, all
+replicas are queried regardless of the speculative retry setting.
+
+Cassandra 4.0 supports case-insensitivity for speculative retry values (https://issues.apache.org/jira/browse/CASSANDRA-14293[CASSANDRA-14293]). For example, assigning the value as `none`, `None`, or `NONE` has the same effect.
+
+Additionally, the following values are added:
+
+[cols=",,",options="header",]
+|===
+|Format |Example |Description
+| `XPERCENTILE` | 90.5PERCENTILE | Coordinators record average per-table response times
+for all replicas. If a replica takes longer than `X` percent of this
+table's average response time, the coordinator queries an additional
+replica. `X` must be between 0 and 100.
+| `XP` | 90.5P | Same as `XPERCENTILE`
+| `Yms` | 25ms | If a replica takes more than `Y` milliseconds to respond, the
+coordinator queries an additional replica.
+| `MIN(XPERCENTILE,YMS)` | MIN(99PERCENTILE,35MS) | A hybrid policy that uses either the
+specified percentile or fixed milliseconds depending on which value is
+lower at the time of calculation. Parameters are `XPERCENTILE`, `XP`, or
+`Yms`. This setting helps protect against a single slow instance.
+
+| `MAX(XPERCENTILE,YMS)` `ALWAYS` `NEVER` | MAX(90.5P,25ms) | A hybrid policy that uses either the specified
+percentile or fixed milliseconds depending on which value is higher at
+the time of calculation.
+|===
+
+Cassandra 4.0 adds support for hybrid `MIN()` and `MAX()` speculative retry policies, with a mix and match of either `MIN(), MAX()`, `MIN(), MIN()`, or `MAX(), MAX()` (https://issues.apache.org/jira/browse/CASSANDRA-14293[CASSANDRA-14293]).
+The hybrid mode will still speculate if the normal `p99` for the table is < 50ms, the minimum value.
+But if the `p99` level goes higher than the maximum value, then that value can be used.
+In a hybrid value, one value must be a fixed time (ms) value and the other a percentile value.
+
+To illustrate variations, the following examples are all valid:
+
+[source,cql]
+----
+include::example$CQL/spec_retry_values.cql[]
+----
+
+The `additional_write_policy` setting specifies the threshold at which a cheap
+quorum write will be upgraded to include transient replicas.
+
+[[cql-compaction-options]]
+===== Compaction options
+
+The `compaction` options must minimally define the `'class'` sub-option,
+to specify the compaction strategy class to use.
+The supported classes are:
+
+* `'SizeTieredCompactionStrategy'`, xref:operating/compaction/stcs.adoc#stcs[STCS] (Default)
+* `'LeveledCompactionStrategy'`, xref:operating/compaction/lcs.adoc#lcs[LCS]
+* `'TimeWindowCompactionStrategy'`, xref:operating/compaction/twcs.adoc#twcs[TWCS]
+
+The `'DateTieredCompactionStrategy'` is also supported but deprecated;
+`'TimeWindowCompactionStrategy'` should be used.
+If a custom strategies is required, specify the full class name as a xref:cql/definitions.adoc#constants[string constant].
+
+All default strategies support a number of xref:operating/compaction/index.adoc#compaction-options[common options], as well as options specific to the strategy chosen. See the section corresponding to your strategy for details: xref:operating/compaction/stcs.adoc#stcs_options[STCS], xref:operating/compaction/lcs.adoc#lcs_options[LCS], xref:operating/compaction/twcs.adoc#twcs_options[TWCS].
+
+[[cql-compression-options]]
+===== Compression options
+
+The `compression` options define if and how the SSTables of the table
+are compressed. Compression is configured on a per-table basis as an
+optional argument to `CREATE TABLE` or `ALTER TABLE`. The following
+sub-options are available:
+
+[cols=",,",options="header",]
+|===
+|Option |Default |Description
+| `class` | LZ4Compressor | The compression algorithm to use. Default compressor are: LZ4Compressor,
+SnappyCompressor, DeflateCompressor and ZstdCompressor.
+Use `'enabled' : false` to disable compression.
+Custom compressor can be provided by specifying the full class name as a xref:cql/definitions.adoc#constants[string constant].
+
+| `enabled` | true | Enable/disable sstable compression.
+If the `enabled` option is set to `false`, no other options must be specified.
+
+| `chunk_length_in_kb` | 64 | On disk SSTables are compressed by block (to allow random reads).
+This option defines the size (in KB) of said block. See xref:cql/ddl.adoc#chunk_note[note] for further information.
+
+| `crc_check_chance` | 1.0 | Determines how likely Cassandra is to verify the checksum on each
+compression chunk during reads.
+
+| `compression_level` | 3 | Compression level. Only applicable for `ZstdCompressor`.
+Accepts values between `-131072` and `22`.
+|===
+
+[[chunk_note]]
+[NOTE]
+====
+Bigger values may improve the compression rate, but will increase the minimum size of data to be read from
+disk for a read.
+The default value is an optimal value for compressing tables.
+Chunk length must be a power of 2 when computing the chunk number from an uncompressed file offset.
+Block size may be adjusted based on read/write access patterns such as:
+
+* How much data is typically requested at once
+* Average size of rows in the table
+====
+
+For instance, to create a table with LZ4Compressor and a `chunk_length_in_kb` of 4 KB:
+
+[source,cql]
+----
+include::example$CQL/chunk_length.cql[]
+----
+
+[[cql-caching-options]]
+===== Caching options
+
+Caching optimizes the use of cache memory of a table. The cached data is
+weighed by size and access frequency.
+The `caching` options can configure both the `key cache` and the `row cache` for the table.
+The following sub-options are available:
+
+[cols=",,",options="header",]
+|===
+|Option |Default |Description
+| `keys` | ALL | Whether to cache keys (key cache) for this table. Valid values are: `ALL` and `NONE`.
+
+| `rows_per_partition` | NONE | The amount of rows to cache per partition (row cache).
+If an integer `n` is specified, the first `n` queried rows of a partition will be cached.
+Valid values are: `ALL`, to cache all rows of a queried partition, or `NONE` to disable row caching.
+|===
+
+For instance, to create a table with both a key cache and 10 rows cached per partition:
+
+[source,cql]
+----
+include::example$CQL/caching_option.cql[]
+----
+
+[[read-repair-options]]
+===== Read Repair options
+
+The `read_repair` options configure the read repair behavior, tuning for various performance and consistency behaviors.
+
+The values are:
+[cols=",,",options="header",]
+|===
+|Option |Default |Description
+|`BLOCKING` | yes | If a read repair is triggered, the read blocks writes sent to other replicas until the consistency level is reached by the writes.
+
+|`NONE` | no | If set, the coordinator reconciles any differences between replicas, but doesn't attempt to repair them.
+|===
+
+Two consistency properties are affected by read repair behavior.
+
+* Monotonic quorum reads: Monotonic quorum reads
+prevents reads from appearing to go back in time in some circumstances.
+When monotonic quorum reads are not provided and a write fails to reach
+a quorum of replicas, the read values may be visible in one read, and then disappear
+in a subsequent read. `BLOCKING` provides this behavior.
+* Write atomicity: Write atomicity prevents reads
+from returning partially-applied writes. Cassandra attempts to provide
+partition-level write atomicity, but since only the data covered by a
+SELECT statement is repaired by a read repair, read repair can break
+write atomicity when data is read at a more granular level than it is
+written. For example, read repair can break write atomicity if you write
+multiple rows to a clustered partition in a batch, but then select a
+single row by specifying the clustering column in a SELECT statement.
+`NONE` provides this behavior.
+
+===== Other considerations:
+
+* Adding new columns (see `ALTER TABLE` below) is a constant time
+operation. Thus, there is no need to anticipate future usage while initially creating a table.
+
+[[alter-table-statement]]
+== ALTER TABLE
+
+Altering an existing table uses the `ALTER TABLE` statement:
+
+[source,bnf]
+----
+include::example$BNF/alter_table.bnf[]
+----
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/alter_table_add_column.cql[]
+include::example$CQL/alter_table_with_comment.cql[]
+----
+
+The `ALTER TABLE` statement can:
+
+* `ADD` a new column to a table. The primary key of a table cannot ever be altered.
+A new column, thus, cannot be part of the primary key.
+Adding a column is a constant-time operation based on the amount of data in the table.
+* `DROP` a column from a table. This command drops both the column and all
+its content. Be aware that, while the column becomes immediately
+unavailable, its content are removed lazily during compaction. Because of this lazy removal,
+the command is a constant-time operation based on the amount of data in the table.
+Also, it is important to know that once a column is dropped, a column with the same name can be re-added,
+unless the dropped column was a non-frozen column like a collection.
+
+[WARNING]
+.Warning
+====
+Dropping a column assumes that the timestamps used for the value of this
+column are "real" timestamp in microseconds. Using "real" timestamps in
+microseconds is the default is and is *strongly* recommended but as
+Cassandra allows the client to provide any timestamp on any table, it is
+theoretically possible to use another convention. Please be aware that
+if you do so, dropping a column will not correctly execute.
+====
+
+* Use `WITH` to change a table option. The xref:CQL/ddl.adoc#create-table-options[supported options]
+are the same as those used when creating a table, with the exception of `CLUSTERING ORDER`.
+However, setting any `compaction` sub-options will erase *ALL* previous `compaction` options, so you need to re-specify
+all the sub-options you wish to keep. The same is true for `compression` sub-options.
+
+[[drop-table-statement]]
+== DROP TABLE
+
+Dropping a table uses the `DROP TABLE` statement:
+
+[source,bnf]
+----
+include::example$BNF/drop_table.bnf[]
+----
+
+Dropping a table results in the immediate, irreversible removal of the
+table, including all data it contains.
+
+If the table does not exist, the statement will return an error, unless
+`IF EXISTS` is used, when the operation is a no-op.
+
+[[truncate-statement]]
+== TRUNCATE
+
+A table can be truncated using the `TRUNCATE` statement:
+
+[source,bnf]
+----
+include::example$BNF/truncate_table.bnf[]
+----
+
+`TRUNCATE TABLE foo` is the preferred syntax for consistency with other DDL
+statements.
+However, tables are the only object that can be truncated currently, and the `TABLE` keyword can be omitted.
+
+Truncating a table permanently removes all existing data from the table, but without removing the table itself.
diff --git a/doc/modules/cassandra/pages/cql/definitions.adoc b/doc/modules/cassandra/pages/cql/definitions.adoc
new file mode 100644
index 00000000000..95be20ff1dc
--- /dev/null
+++ b/doc/modules/cassandra/pages/cql/definitions.adoc
@@ -0,0 +1,187 @@
+= Definitions
+
+== Conventions
+
+To aid in specifying the CQL syntax, we will use the following
+conventions in this document:
+
+* Language rules will be given in an informal
+http://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form#Variants[BNF
+variant] notation. In particular, we'll use square brakets (`[ item ]`)
+for optional items, `*` and `+` for repeated items (where `+` imply at
+least one).
+* The grammar will also use the following convention for convenience:
+non-terminal term will be lowercase (and link to their definition) while
+terminal keywords will be provided "all caps". Note however that
+keywords are `identifiers` and are thus case insensitive in practice. We
+will also define some early construction using regexp, which we'll
+indicate with `re()`.
+* The grammar is provided for documentation purposes and leave some
+minor details out. For instance, the comma on the last column definition
+in a `CREATE TABLE` statement is optional but supported if present even
+though the grammar in this document suggests otherwise. Also, not
+everything accepted by the grammar is necessarily valid CQL.
+* References to keywords or pieces of CQL code in running text will be
+shown in a `fixed-width font`.
+
+[[identifiers]]
+== Identifiers and keywords
+
+The CQL language uses _identifiers_ (or _names_) to identify tables,
+columns and other objects. An identifier is a token matching the regular
+expression `[a-zA-Z][a-zA-Z0-9_]*`.
+
+A number of such identifiers, like `SELECT` or `WITH`, are _keywords_.
+They have a fixed meaning for the language and most are reserved. The
+list of those keywords can be found in xref:cql/appendices.adoc#appendix-A[Appendix A].
+
+Identifiers and (unquoted) keywords are case insensitive. Thus `SELECT`
+is the same than `select` or `sElEcT`, and `myId` is the same than
+`myid` or `MYID`. A convention often used (in particular by the samples
+of this documentation) is to use uppercase for keywords and lowercase
+for other identifiers.
+
+There is a second kind of identifier called a _quoted identifier_
+defined by enclosing an arbitrary sequence of characters (non-empty) in
+double-quotes(`"`). Quoted identifiers are never keywords. Thus
+`"select"` is not a reserved keyword and can be used to refer to a
+column (note that using this is particularly ill-advised), while `select`
+would raise a parsing error. Also, unlike unquoted identifiers
+and keywords, quoted identifiers are case sensitive (`"My Quoted Id"` is
+_different_ from `"my quoted id"`). A fully lowercase quoted identifier
+that matches `[a-zA-Z][a-zA-Z0-9_]*` is however _equivalent_ to the
+unquoted identifier obtained by removing the double-quote (so `"myid"`
+is equivalent to `myid` and to `myId` but different from `"myId"`).
+Inside a quoted identifier, the double-quote character can be repeated
+to escape it, so `"foo "" bar"` is a valid identifier.
+
+[NOTE]
+.Note
+====
+The _quoted identifier_ can declare columns with arbitrary names, and
+these can sometime clash with specific names used by the server. For
+instance, when using conditional update, the server will respond with a
+result set containing a special result named `"[applied]"`. If you’ve
+declared a column with such a name, this could potentially confuse some
+tools and should be avoided. In general, unquoted identifiers should be
+preferred but if you use quoted identifiers, it is strongly advised that you
+avoid any name enclosed by squared brackets (like `"[applied]"`) and any
+name that looks like a function call (like `"f(x)"`).
+====
+
+More formally, we have:
+
+[source, bnf]
+----
+include::example$BNF/identifier.bnf[]
+----
+
+[[constants]]
+== Constants
+
+CQL defines the following _constants_:
+
+[source, bnf]
+----
+include::example$BNF/constant.bnf[]
+----
+
+In other words:
+
+* A string constant is an arbitrary sequence of characters enclosed by
+single-quote(`'`). A single-quote can be included by repeating it, e.g.
+`'It''s raining today'`. Those are not to be confused with quoted
+`identifiers` that use double-quotes. Alternatively, a string can be
+defined by enclosing the arbitrary sequence of characters by two dollar
+characters, in which case single-quote can be used without escaping
+(`$$It's raining today$$`). That latter form is often used when defining
+xref:cql/functions.adoc#udfs[user-defined functions] to avoid having to escape single-quote
+characters in function body (as they are more likely to occur than
+`$$`).
+* Integer, float and boolean constant are defined as expected. Note
+however than float allows the special `NaN` and `Infinity` constants.
+* CQL supports
+https://en.wikipedia.org/wiki/Universally_unique_identifier[UUID]
+constants.
+* Blobs content are provided in hexadecimal and prefixed by `0x`.
+* The special `NULL` constant denotes the absence of value.
+
+For how these constants are typed, see the xref:cql/types.adoc[Data types] section.
+
+== Terms
+
+CQL has the notion of a _term_, which denotes the kind of values that
+CQL support. Terms are defined by:
+
+[source, bnf]
+----
+include::example$BNF/term.bnf[]
+----
+
+A term is thus one of:
+
+* A xref:cql/defintions.adoc#constants[constant]
+* A literal for either a xref:cql/types.adoc#collections[collection],
+a xref:cql/types.adoc#udts[user-defined type] or a xref:cql/types.adoc#tuples[tuple]
+* A xref:cql/functions.adoc#cql-functions[function] call, either a xref:cql/functions.adoc#scalar-native-functions[native function]
+or a xref:cql/functions.adoc#user-defined-scalar-functions[user-defined function]
+* An xref:cql/operators.adoc#arithmetic_operators[arithmetic operation] between terms
+* A type hint
+* A bind marker, which denotes a variable to be bound at execution time.
+See the section on `prepared-statements` for details. A bind marker can
+be either anonymous (`?`) or named (`:some_name`). The latter form
+provides a more convenient way to refer to the variable for binding it
+and should generally be preferred.
+
+== Comments
+
+A comment in CQL is a line beginning by either double dashes (`--`) or
+double slash (`//`).
+
+Multi-line comments are also supported through enclosure within `/*` and
+`*/` (but nesting is not supported).
+
+[source,cql]
+----
+-- This is a comment
+// This is a comment too
+/* This is
+ a multi-line comment */
+----
+
+== Statements
+
+CQL consists of statements that can be divided in the following
+categories:
+
+* `data-definition` statements, to define and change how the data is
+stored (keyspaces and tables).
+* `data-manipulation` statements, for selecting, inserting and deleting
+data.
+* `secondary-indexes` statements.
+* `materialized-views` statements.
+* `cql-roles` statements.
+* `cql-permissions` statements.
+* `User-Defined Functions (UDFs)` statements.
+* `udts` statements.
+* `cql-triggers` statements.
+
+All the statements are listed below and are described in the rest of
+this documentation (see links above):
+
+[source, bnf]
+----
+include::example$BNF/cql_statement.bnf[]
+----
+
+== Prepared Statements
+
+CQL supports _prepared statements_. Prepared statements are an
+optimization that allows to parse a query only once but execute it
+multiple times with different concrete values.
+
+Any statement that uses at least one bind marker (see `bind_marker`)
+will need to be _prepared_. After which the statement can be _executed_
+by provided concrete values for each of its marker. The exact details of
+how a statement is prepared and then executed depends on the CQL driver
+used and you should refer to your driver documentation.
diff --git a/doc/modules/cassandra/pages/cql/dml.adoc b/doc/modules/cassandra/pages/cql/dml.adoc
new file mode 100644
index 00000000000..8a4df2fecb3
--- /dev/null
+++ b/doc/modules/cassandra/pages/cql/dml.adoc
@@ -0,0 +1,458 @@
+= Data Manipulation
+
+This section describes the statements supported by CQL to insert,
+update, delete and query data.
+
+[[select-statement]]
+== SELECT
+
+Querying data from data is done using a `SELECT` statement:
+
+[source,bnf]
+----
+include::example$BNF/select_statement.bnf[]
+----
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/select_statement.cql[]
+----
+
+The `SELECT` statements reads one or more columns for one or more rows
+in a table. It returns a result-set of the rows matching the request,
+where each row contains the values for the selection corresponding to
+the query. Additionally, xref:cql/functions.adoc#cql-functions[functions] including
+xref:cql/functions.adoc#aggregate-functions[aggregations] can be applied to the result.
+
+A `SELECT` statement contains at least a xref:cql/dml.adoc#selection-clause[selection clause] and the name of the table on which
+the selection is executed.
+CQL does *not* execute joins or sub-queries and a select statement only apply to a single table.
+A select statement can also have a xref:cql/dml.adoc#where-clause[where clause] that can further narrow the query results.
+Additional clauses can xref:cql/dml.adoc#ordering-clause[order] or xref:cql/dml.adoc#limit-clause[limit] the results.
+Lastly, xref:cql/dml.adoc#allow-filtering[queries that require full cluster filtering] can append `ALLOW FILTERING` to any query.
+
+[[selection-clause]]
+=== Selection clause
+
+The `select_clause` determines which columns will be queried and returned in the result set.
+This clause can also apply transformations to apply to the result before returning.
+The selection clause consists of a comma-separated list of specific _selectors_ or, alternatively, the wildcard character (`*`) to select all the columns defined in the table.
+
+==== Selectors
+
+A `selector` can be one of:
+
+* A column name of the table selected, to retrieve the values for that
+column.
+* A term, which is usually used nested inside other selectors like
+functions (if a term is selected directly, then the corresponding column
+of the result-set will simply have the value of this term for every row
+returned).
+* A casting, which allows to convert a nested selector to a (compatible)
+type.
+* A function call, where the arguments are selector themselves. See the
+section on xref:cql/functions.adoc#cql-functions[functions] for more details.
+* The special call `COUNT(*)` to the xref:cql/functions.adoc#count-function[COUNT function],
+which counts all non-null results.
+
+==== Aliases
+
+Every _top-level_ selector can also be aliased (using AS).
+If so, the name of the corresponding column in the result set will be
+that of the alias. For instance:
+
+[source,cql]
+----
+include::example$CQL/as.cql[]
+----
+
+[NOTE]
+====
+Currently, aliases aren't recognized in the `WHERE` or `ORDER BY` clauses in the statement.
+You must use the orignal column name instead.
+====
+
+[[writetime-and-ttl-function]]
+==== `WRITETIME` and `TTL` function
+
+Selection supports two special functions that aren't allowed anywhere
+else: `WRITETIME` and `TTL`.
+Both functions take only one argument, a column name.
+These functions retrieve meta-information that is stored internally for each column:
+
+* `WRITETIME` stores the timestamp of the value of the column
+* `TTL` stores the remaining time to live (in seconds) for the value of the column if it is set to expire; otherwise the value is `null`.
+
+[[where-clause]]
+=== The `WHERE` clause
+
+The `WHERE` clause specifies which rows are queried. It specifies
+a relationship for `PRIMARY KEY` columns or a column that has
+a xref:cql/indexes.adoc#create-index-statement[secondary index] defined, along with a set value.
+
+Not all relationships are allowed in a query. For instance, only an equality
+is allowed on a partition key. The `IN` clause is considered an equality for one or more values.
+The `TOKEN` clause can be used to query for partition key non-equalities.
+A partition key must be specified before clustering columns in the `WHERE` clause. The relationship
+for clustering columns must specify a *contiguous* set of rows to order.
+
+For instance, given:
+
+[source,cql]
+----
+include::example$CQL/table_for_where.cql[]
+----
+
+The following query is allowed:
+
+[source,cql]
+----
+include::example$CQL/where.cql[]
+----
+
+But the following one is not, as it does not select a contiguous set of
+rows (and we suppose no secondary indexes are set):
+
+[source,cql]
+----
+include::example$CQL/where_fail.cql[]
+----
+
+When specifying relationships, the `TOKEN` function can be applied to the `PARTITION KEY` column to query.
+Rows will be selected based on the token of the `PARTITION_KEY` rather than on the value.
+[IMPORTANT]
+====
+The token of a key depends on the partitioner in use, and that
+in particular the `RandomPartitioner` won't yield a meaningful order.
+Also note that ordering partitioners always order token values by bytes (so
+even if the partition key is of type int, `token(-1) > token(0)` in
+particular).
+====
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/token.cql[]
+----
+
+The `IN` relationship is only allowed on the last column of the
+partition key or on the last column of the full primary key.
+
+It is also possible to “group” `CLUSTERING COLUMNS` together in a
+relation using the tuple notation.
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/where_group_cluster_columns.cql[]
+----
+
+This query will return all rows that sort after the one having “John's Blog” as
+`blog_tile` and '2012-01-01' for `posted_at` in the clustering order. In
+particular, rows having a `post_at <= '2012-01-01'` will be returned, as
+long as their `blog_title > 'John''s Blog'`.
+
+That would not be the case for this example:
+
+[source,cql]
+----
+include::example$CQL/where_no_group_cluster_columns.cql[]
+----
+
+The tuple notation may also be used for `IN` clauses on clustering columns:
+
+[source,cql]
+----
+include::example$CQL/where_in_tuple.cql[]
+----
+
+The `CONTAINS` operator may only be used for collection columns (lists,
+sets, and maps). In the case of maps, `CONTAINS` applies to the map
+values. The `CONTAINS KEY` operator may only be used on map columns and
+applies to the map keys.
+
+[[group-by-clause]]
+=== Grouping results
+
+The `GROUP BY` option can condense all selected
+rows that share the same values for a set of columns into a single row.
+
+Using the `GROUP BY` option, rows can be grouped at the partition key or clustering column level.
+Consequently, the `GROUP BY` option only accepts primary key columns in defined order as arguments.
+If a primary key column is restricted by an equality restriction, it is not included in the `GROUP BY` clause.
+
+Aggregate functions will produce a separate value for each group.
+If no `GROUP BY` clause is specified, aggregates functions will produce a single value for all the rows.
+
+If a column is selected without an aggregate function, in a statement
+with a `GROUP BY`, the first value encounter in each group will be
+returned.
+
+[[ordering-clause]]
+=== Ordering results
+
+The `ORDER BY` clause selects the order of the returned results.
+The argument is a list of column names and each column's order
+(`ASC` for ascendant and `DESC` for descendant,
+The possible orderings are limited by the xref:cql/ddl.adoc#clustering-order[clustering order] defined on the table:
+
+* if the table has been defined without any specific `CLUSTERING ORDER`, then the order is as defined by the clustering columns
+or the reverse
+* otherwise, the order is defined by the `CLUSTERING ORDER` option and the reversed one.
+
+[[limit-clause]]
+=== Limiting results
+
+The `LIMIT` option to a `SELECT` statement limits the number of rows
+returned by a query. The `PER PARTITION LIMIT` option limits the
+number of rows returned for a given partition by the query. Both types of limits can used in the same statement.
+
+[[allow-filtering]]
+=== Allowing filtering
+
+By default, CQL only allows select queries that don't involve a full scan of all partitions.
+If all partitions are scanned, then returning the results may experience a significant latency proportional to the
+amount of data in the table. The `ALLOW FILTERING` option explicitly executes a full scan. Thus, the performance of
+the query can be unpredictable.
+
+For example, consider the following table of user profiles with birth year and country of residence.
+The birth year has a secondary index defined.
+
+[source,cql]
+----
+include::example$CQL/allow_filtering.cql[]
+----
+
+The following queries are valid:
+
+[source,cql]
+----
+include::example$CQL/query_allow_filtering.cql[]
+----
+
+In both cases, the query performance is proportional to the amount of data returned.
+The first query returns all rows, because all users are selected.
+The second query returns only the rows defined by the secondary index, a per-node implementation; the results will
+depend on the number of nodes in the cluster, and is indirectly proportional to the amount of data stored.
+The number of nodes will always be multiple number of magnitude lower than the number of user profiles stored.
+Both queries may return very large result sets, but the addition of a `LIMIT` clause can reduced the latency.
+
+The following query will be rejected:
+
+[source,cql]
+----
+include::example$CQL/query_fail_allow_filtering.cql[]
+----
+
+Cassandra cannot guarantee that large amounts of data won't have to scanned amount of data, even if the result is small.
+If you know that the dataset is small, and the performance will be reasonable, add `ALLOW FILTERING` to allow the query to
+execute:
+
+[source,cql]
+----
+include::example$CQL/query_nofail_allow_filtering.cql[]
+----
+
+[[insert-statement]]
+== INSERT
+
+Inserting data for a row is done using an `INSERT` statement:
+
+[source,bnf]
+----
+include::example$BNF/insert_statement.bnf[]
+----
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/insert_statement.cql[]
+----
+
+The `INSERT` statement writes one or more columns for a given row in a
+table.
+Since a row is identified by its `PRIMARY KEY`, at least one columns must be specified.
+The list of columns to insert must be supplied with the `VALUES` syntax.
+When using the `JSON` syntax, `VALUES` are optional.
+See the section on xref:cql/dml.adoc#cql-json[JSON support] for more detail.
+All updates for an `INSERT` are applied atomically and in isolation.
+
+Unlike in SQL, `INSERT` does not check the prior existence of the row by default.
+The row is created if none existed before, and updated otherwise.
+Furthermore, there is no means of knowing which action occurred.
+
+The `IF NOT EXISTS` condition can restrict the insertion if the row does not exist.
+However, note that using `IF NOT EXISTS` will incur a non-negligible performance cost, because Paxos is used,
+so this should be used sparingly.
+
+Please refer to the xref:cql/dml.adoc#update-parameters[UPDATE] section for informations on the `update_parameter`.
+Also note that `INSERT` does not support counters, while `UPDATE` does.
+
+[[update-statement]]
+== UPDATE
+
+Updating a row is done using an `UPDATE` statement:
+
+[source, bnf]
+----
+include::example$BNF/update_statement.bnf[]
+----
+
+For instance:
+
+[source,cql]
+----
+include::example$CQL/update_statement.cql[]
+----
+
+The `UPDATE` statement writes one or more columns for a given row in a
+table.
+The `WHERE`clause is used to select the row to update and must include all columns of the `PRIMARY KEY`.
+Non-primary key columns are set using the `SET` keyword.
+In an `UPDATE` statement, all updates within the same partition key are applied atomically and in isolation.
+
+Unlike in SQL, `UPDATE` does not check the prior existence of the row by default.
+The row is created if none existed before, and updated otherwise.
+Furthermore, there is no means of knowing which action occurred.
+
+The `IF` condition can be used to choose whether the row is updated or not if a particular condition is met.
+However, like the `IF NOT EXISTS` condition, a non-negligible performance cost can be incurred.
+
+Regarding the `SET` assignment:
+
+* `c = c + 3` will increment/decrement counters, the only operation allowed.
+The column name after the '=' sign *must* be the same than the one before the '=' sign.
+Increment/decrement is only allowed on counters.
+See the section on xref:cql/dml.adoc#counters[counters] for details.
+* `id = id + ` and `id[value1] = value2` are for collections.
+See the xref:cql/types.adoc#collections[collections] for details.
+* `id.field = 3` is for setting the value of a field on a non-frozen user-defined types.
+See the xref:cql/types.adoc#udts[UDTs] for details.
+
+=== Update parameters
+
+`UPDATE` and `INSERT` statements support the following parameters:
+
+* `TTL`: specifies an optional Time To Live (in seconds) for the
+inserted values. If set, the inserted values are automatically removed
+from the database after the specified time. Note that the TTL concerns
+the inserted values, not the columns themselves. This means that any
+subsequent update of the column will also reset the TTL (to whatever TTL
+is specified in that update). By default, values never expire. A TTL of
+0 is equivalent to no TTL. If the table has a default_time_to_live, a
+TTL of 0 will remove the TTL for the inserted or updated values. A TTL
+of `null` is equivalent to inserting with a TTL of 0.
+
+`UPDATE`, `INSERT`, `DELETE` and `BATCH` statements support the following parameters:
+
+* `TIMESTAMP`: sets the timestamp for the operation. If not specified,
+the coordinator will use the current time (in microseconds) at the start
+of statement execution as the timestamp. This is usually a suitable
+default.
+
+[[delete_statement]]
+== DELETE
+
+Deleting rows or parts of rows uses the `DELETE` statement:
+
+[source,bnf]
+----
+include::example$BNF/delete_statement.bnf[]
+----
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/delete_statement.cql[]
+----
+
+The `DELETE` statement deletes columns and rows. If column names are
+provided directly after the `DELETE` keyword, only those columns are
+deleted from the row indicated by the `WHERE` clause. Otherwise, whole
+rows are removed.
+
+The `WHERE` clause specifies which rows are to be deleted. Multiple rows
+may be deleted with one statement by using an `IN` operator. A range of
+rows may be deleted using an inequality operator (such as `>=`).
+
+`DELETE` supports the `TIMESTAMP` option with the same semantics as in
+xref:cql/dml.adoc#update-parameters[updates].
+
+In a `DELETE` statement, all deletions within the same partition key are
+applied atomically and in isolation.
+
+A `DELETE` operation can be conditional through the use of an `IF`
+clause, similar to `UPDATE` and `INSERT` statements. However, as with
+`INSERT` and `UPDATE` statements, this will incur a non-negligible
+performance cost because Paxos is used, and should be used sparingly.
+
+[[batch_statement]]
+== BATCH
+
+Multiple `INSERT`, `UPDATE` and `DELETE` can be executed in a single
+statement by grouping them through a `BATCH` statement:
+
+[source, bnf]
+----
+include::example$BNF/batch_statement.bnf[]
+----
+
+For instance:
+
+[source,cql]
+----
+include::example$CQL/batch_statement.cql[]
+----
+
+The `BATCH` statement group multiple modification statements
+(insertions/updates and deletions) into a single statement. It serves
+several purposes:
+
+* It saves network round-trips between the client and the server (and
+sometimes between the server coordinator and the replicas) when batching
+multiple updates.
+* All updates in a `BATCH` belonging to a given partition key are
+performed in isolation.
+* By default, all operations in the batch are performed as _logged_, to
+ensure all mutations eventually complete (or none will). See the notes
+on xref:cql/dml.adoc#unlogged-batches[UNLOGGED batches] for more details.
+
+Note that:
+
+* `BATCH` statements may only contain `UPDATE`, `INSERT` and `DELETE`
+statements (not other batches for instance).
+* Batches are _not_ a full analogue for SQL transactions.
+* If a timestamp is not specified for each operation, then all
+operations will be applied with the same timestamp (either one generated
+automatically, or the timestamp provided at the batch level). Due to
+Cassandra's conflict resolution procedure in the case of
+http://wiki.apache.org/cassandra/FAQ#clocktie[timestamp ties],
+operations may be applied in an order that is different from the order
+they are listed in the `BATCH` statement. To force a particular
+operation ordering, you must specify per-operation timestamps.
+* A LOGGED batch to a single partition will be converted to an UNLOGGED
+batch as an optimization.
+
+[[unlogged-batches]]
+=== `UNLOGGED` batches
+
+By default, Cassandra uses a batch log to ensure all operations in a
+batch eventually complete or none will (note however that operations are
+only isolated within a single partition).
+
+There is a performance penalty for batch atomicity when a batch spans
+multiple partitions. If you do not want to incur this penalty, you can
+tell Cassandra to skip the batchlog with the `UNLOGGED` option. If the
+`UNLOGGED` option is used, a failed batch might leave the patch only
+partly applied.
+
+=== `COUNTER` batches
+
+Use the `COUNTER` option for batched counter updates. Unlike other
+updates in Cassandra, counter updates are not idempotent.
diff --git a/doc/modules/cassandra/pages/cql/functions.adoc b/doc/modules/cassandra/pages/cql/functions.adoc
new file mode 100644
index 00000000000..157f46a6b31
--- /dev/null
+++ b/doc/modules/cassandra/pages/cql/functions.adoc
@@ -0,0 +1,504 @@
+// Need some intro for UDF and native functions in general and point those to it.
+// [[cql-functions]][[native-functions]]
+
+== Functions
+
+CQL supports 2 main categories of functions:
+
+* xref:cql/functions.adoc#scalar-functions[scalar functions] that take a number of values and produce an output
+* xref:cql/functions.adoc#aggregate-functions[aggregate functions] that aggregate multiple rows resulting from a `SELECT` statement
+
+In both cases, CQL provides a number of native "hard-coded" functions as
+well as the ability to create new user-defined functions.
+
+[NOTE]
+.Note
+====
+By default, the use of user-defined functions is disabled by default for
+security concerns (even when enabled, the execution of user-defined
+functions is sandboxed and a "rogue" function should not be allowed to
+do evil, but no sandbox is perfect so using user-defined functions is
+opt-in). See the `enable_user_defined_functions` in `cassandra.yaml` to
+enable them.
+====
+
+A function is identifier by its name:
+
+[source, bnf]
+----
+include::example$BNF/function.bnf[]
+----
+
+=== Scalar functions
+
+[[scalar-native-functions]]
+==== Native functions
+
+===== Cast
+
+The `cast` function can be used to converts one native datatype to
+another.
+
+The following table describes the conversions supported by the `cast`
+function. Cassandra will silently ignore any cast converting a datatype
+into its own datatype.
+
+[cols=",",options="header",]
+|===
+|From |To
+
+| `ascii` | `text`, `varchar`
+
+| `bigint` | `tinyint`, `smallint`, `int`, `float`, `double`, `decimal`, `varint`,
+`text`, `varchar`
+
+| `boolean` | `text`, `varchar`
+
+| `counter` | `tinyint`, `smallint`, `int`, `bigint`, `float`, `double`, `decimal`,
+`varint`, `text`, `varchar`
+
+| `date` | `timestamp`
+
+| `decimal` | `tinyint`, `smallint`, `int`, `bigint`, `float`, `double`, `varint`,
+`text`, `varchar`
+
+| `double` | `tinyint`, `smallint`, `int`, `bigint`, `float`, `decimal`, `varint`,
+`text`, `varchar`
+
+| `float` | `tinyint`, `smallint`, `int`, `bigint`, `double`, `decimal`, `varint`,
+`text`, `varchar`
+
+| `inet` | `text`, `varchar`
+
+| `int` | `tinyint`, `smallint`, `bigint`, `float`, `double`, `decimal`, `varint`,
+`text`, `varchar`
+
+| `smallint` | `tinyint`, `int`, `bigint`, `float`, `double`, `decimal`, `varint`,
+`text`, `varchar`
+
+| `time` | `text`, `varchar`
+
+| `timestamp` | `date`, `text`, `varchar`
+
+| `timeuuid` | `timestamp`, `date`, `text`, `varchar`
+
+| `tinyint` | `tinyint`, `smallint`, `int`, `bigint`, `float`, `double`, `decimal`,
+`varint`, `text`, `varchar`
+
+| `uuid` | `text`, `varchar`
+
+| `varint` | `tinyint`, `smallint`, `int`, `bigint`, `float`, `double`, `decimal`,
+`text`, `varchar`
+|===
+
+The conversions rely strictly on Java's semantics. For example, the
+double value 1 will be converted to the text value '1.0'. For instance:
+
+[source,cql]
+----
+SELECT avg(cast(count as double)) FROM myTable
+----
+
+===== Token
+
+The `token` function computes the token for a given partition key.
+The exact signature of the token function depends on the table concerned and the partitioner used by the cluster.
+
+The type of the arguments of the `token` depend on the partition key column type. The returned type depends on the defined partitioner:
+
+[cols=",",options="header",]
+|===
+|Partitioner | Returned type
+| Murmur3Partitioner | `bigint`
+| RandomPartitioner | `varint`
+| ByteOrderedPartitioner | `blob`
+|===
+
+For example, consider the following table:
+
+[source,cql]
+----
+include::example$CQL/create_table_simple.cql[]
+----
+
+The table uses the default Murmur3Partitioner.
+The `token` function uses the single argument `text`, because the partition key is `userid` of text type.
+The returned type will be `bigint`.
+
+===== Uuid
+
+The `uuid` function takes no parameters and generates a random type 4
+uuid suitable for use in `INSERT` or `UPDATE` statements.
+
+===== Timeuuid functions
+
+====== `now`
+
+The `now` function takes no arguments and generates, on the coordinator
+node, a new unique timeuuid at the time the function is invoked. Note
+that this method is useful for insertion but is largely non-sensical in
+`WHERE` clauses.
+
+For example, a query of the form:
+
+[source,cql]
+----
+include::example$CQL/timeuuid_now.cql[]
+----
+
+will not return a result, by design, since the value returned by
+`now()` is guaranteed to be unique.
+
+`currentTimeUUID` is an alias of `now`.
+
+====== `minTimeuuid` and `maxTimeuuid`
+
+The `minTimeuuid` function takes a `timestamp` value `t`, either a timestamp or a date string.
+It returns a _fake_ `timeuuid` corresponding to the _smallest_ possible `timeuuid` for timestamp `t`.
+The `maxTimeuuid` works similarly, but returns the _largest_ possible `timeuuid`.
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/timeuuid_min_max.cql[]
+----
+
+will select all rows where the `timeuuid` column `t` is later than `'2013-01-01 00:05+0000'` and earlier than `'2013-02-02 10:00+0000'`.
+The clause `t >= maxTimeuuid('2013-01-01 00:05+0000')` would still _not_ select a `timeuuid` generated exactly at '2013-01-01 00:05+0000', and is essentially equivalent to `t > maxTimeuuid('2013-01-01 00:05+0000')`.
+
+[NOTE]
+.Note
+====
+The values generated by `minTimeuuid` and `maxTimeuuid` are called _fake_ UUID because they do no respect the time-based UUID generation process
+specified by the http://www.ietf.org/rfc/rfc4122.txt[IETF RFC 4122].
+In particular, the value returned by these two methods will not be unique.
+Thus, only use these methods for *querying*, not for *insertion*, to prevent possible data overwriting.
+====
+
+===== Datetime functions
+
+====== Retrieving the current date/time
+
+The following functions can be used to retrieve the date/time at the
+time where the function is invoked:
+
+[cols=",",options="header",]
+|===
+|Function name |Output type
+
+| `currentTimestamp` | `timestamp`
+
+| `currentDate` | `date`
+
+| `currentTime` | `time`
+
+| `currentTimeUUID` | `timeUUID`
+|===
+
+For example the last two days of data can be retrieved using:
+
+[source,cql]
+----
+include::example$CQL/currentdate.cql[]
+----
+
+====== Time conversion functions
+
+A number of functions are provided to convert a `timeuuid`, a `timestamp` or a `date` into another `native` type.
+
+[cols=",,",options="header",]
+|===
+|Function name |Input type |Description
+
+| `toDate` | `timeuuid` | Converts the `timeuuid` argument into a `date` type
+
+| `toDate` | `timestamp` | Converts the `timestamp` argument into a `date` type
+
+| `toTimestamp` | `timeuuid` | Converts the `timeuuid` argument into a `timestamp` type
+
+| `toTimestamp` | `date` | Converts the `date` argument into a `timestamp` type
+
+| `toUnixTimestamp` | `timeuuid` | Converts the `timeuuid` argument into a `bigInt` raw value
+
+| `toUnixTimestamp` | `timestamp` | Converts the `timestamp` argument into a `bigInt` raw value
+
+| `toUnixTimestamp` | `date` | Converts the `date` argument into a `bigInt` raw value
+
+| `dateOf` | `timeuuid` | Similar to `toTimestamp(timeuuid)` (DEPRECATED)
+
+| `unixTimestampOf` | `timeuuid` | Similar to `toUnixTimestamp(timeuuid)` (DEPRECATED)
+|===
+
+===== Blob conversion functions
+
+A number of functions are provided to convert the native types into
+binary data, or a `blob`.
+For every xref:cql/types.adoc#native-types[type] supported by CQL, the function `typeAsBlob` takes a argument of type `type` and returns it as a `blob`.
+Conversely, the function `blobAsType` takes a 64-bit `blob` argument and converts it to a `bigint` value.
+For example, `bigintAsBlob(3)` returns `0x0000000000000003` and `blobAsBigint(0x0000000000000003)` returns `3`.
+
+[[user-defined-scalar-functions]]
+==== User-defined functions
+
+User-defined functions (UDFs) execute user-provided code in Cassandra.
+By default, Cassandra supports defining functions in _Java_ and _JavaScript_.
+Support for other JSR 223 compliant scripting languages, such as Python, Ruby, and Scala, is possible by adding a JAR to the classpath.
+
+UDFs are part of the Cassandra schema, and are automatically propagated to all nodes in the cluster.
+UDFs can be _overloaded_, so that multiple UDFs with different argument types can have the same function name.
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/function_overload.cql[]
+----
+
+UDFs are susceptible to all of the normal problems with the chosen programming language.
+Accordingly, implementations should be safe against null pointer exceptions, illegal arguments, or any other potential source of exceptions.
+An exception during function execution will result in the entire statement failing.
+Valid queries for UDF use are `SELECT`, `INSERT` and `UPDATE` statements.
+
+_Complex_ types like collections, tuple types and user-defined types are valid argument and return types in UDFs.
+Tuple types and user-defined types use the DataStax Java Driver conversion functions.
+Please see the Java Driver documentation for details on handling tuple types and user-defined types.
+
+Arguments for functions can be literals or terms.
+Prepared statement placeholders can be used, too.
+
+Note the use the double dollar-sign syntax to enclose the UDF source code.
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/function_dollarsign.cql[]
+----
+
+The implicitly available `udfContext` field (or binding for script UDFs) provides the necessary functionality to create new UDT and tuple values:
+
+[source,cql]
+----
+include::example$CQL/function_udfcontext.cql[]
+----
+
+The definition of the `UDFContext` interface can be found in the Apache Cassandra source code for `org.apache.cassandra.cql3.functions.UDFContext`.
+
+[source,java]
+----
+include::example$JAVA/udfcontext.java[]
+----
+
+Java UDFs already have some imports for common interfaces and classes defined. These imports are:
+
+[source,java]
+----
+include::example$JAVA/udf_imports.java[]
+----
+
+Please note, that these convenience imports are not available for script UDFs.
+
+[[create-function-statement]]
+==== CREATE FUNCTION statement
+
+Creating a new user-defined function uses the `CREATE FUNCTION` statement:
+
+[source,bnf]
+----
+include::example$BNF/create_function_statement.bnf[]
+----
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/create_function.cql[]
+----
+
+`CREATE FUNCTION` with the optional `OR REPLACE` keywords creates either a function or replaces an existing one with the same signature.
+A `CREATE FUNCTION` without `OR REPLACE` fails if a function with the same signature already exists.
+If the optional `IF NOT EXISTS` keywords are used, the function will only be created only if another function with the same signature does not
+exist.
+`OR REPLACE` and `IF NOT EXISTS` cannot be used together.
+
+Behavior for `null` input values must be defined for each function:
+
+* `RETURNS NULL ON NULL INPUT` declares that the function will always return `null` if any of the input arguments is `null`.
+* `CALLED ON NULL INPUT` declares that the function will always be executed.
+
+===== Function Signature
+
+Signatures are used to distinguish individual functions. The signature consists of a fully-qualified function name of the . and a concatenated list of all the argument types.
+
+Note that keyspace names, function names and argument types are subject to the default naming conventions and case-sensitivity rules.
+
+Functions belong to a keyspace; if no keyspace is specified, the current keyspace is used.
+User-defined functions are not allowed in the system keyspaces.
+
+[[drop-function-statement]]
+==== DROP FUNCTION statement
+
+Dropping a function uses the `DROP FUNCTION` statement:
+
+[source, bnf]
+----
+include::example$BNF/drop_function_statement.bnf[]
+----
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/drop_function.cql[]
+----
+
+You must specify the argument types of the function, the arguments_signature, in the drop command if there are multiple overloaded functions with the same name but different signatures.
+`DROP FUNCTION` with the optional `IF EXISTS` keywords drops a function if it exists, but does not throw an error if it doesn't.
+
+[[aggregate-functions]]
+=== Aggregate functions
+
+Aggregate functions work on a set of rows.
+Values for each row are input, to return a single value for the set of rows aggregated.
+
+If `normal` columns, `scalar functions`, `UDT` fields, `writetime`, or `ttl` are selected together with aggregate functions, the values
+returned for them will be the ones of the first row matching the query.
+
+==== Native aggregates
+
+[[count-function]]
+===== Count
+
+The `count` function can be used to count the rows returned by a query.
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/count.cql[]
+----
+
+It also can count the non-null values of a given column:
+
+[source,cql]
+----
+include::example$CQL/count_nonnull.cql[]
+----
+
+===== Max and Min
+
+The `max` and `min` functions compute the maximum and the minimum value returned by a query for a given column.
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/min_max.cql[]
+----
+
+===== Sum
+
+The `sum` function sums up all the values returned by a query for a given column.
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/sum.cql[]
+----
+
+===== Avg
+
+The `avg` function computes the average of all the values returned by a query for a given column.
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/avg.cql[]
+----
+
+[[user-defined-aggregates-functions]]
+==== User-Defined Aggregates (UDAs)
+
+User-defined aggregates allow the creation of custom aggregate functions.
+User-defined aggregates can be used in `SELECT` statement.
+
+Each aggregate requires an _initial state_ of type `STYPE` defined with the `INITCOND`value (default value: `null`).
+The first argument of the state function must have type `STYPE`.
+The remaining arguments of the state function must match the types of the user-defined aggregate arguments.
+The state function is called once for each row, and the value returned by the state function becomes the new state.
+After all rows are processed, the optional `FINALFUNC` is executed with last state value as its argument.
+
+The `STYPE` value is mandatory in order to distinguish possibly overloaded versions of the state and/or final function, since the
+overload can appear after creation of the aggregate.
+
+
+A complete working example for user-defined aggregates (assuming that a
+keyspace has been selected using the `USE` statement):
+
+[source,cql]
+----
+include::example$CQL/uda.cql[]
+----
+
+[[create-aggregate-statement]]
+==== CREATE AGGREGATE statement
+
+Creating (or replacing) a user-defined aggregate function uses the
+`CREATE AGGREGATE` statement:
+
+[source, bnf]
+----
+include::example$BNF/create_aggregate_statement.bnf[]
+----
+
+See above for a complete example.
+
+The `CREATE AGGREGATE` command with the optional `OR REPLACE` keywords creates either an aggregate or replaces an existing one with the same
+signature.
+A `CREATE AGGREGATE` without `OR REPLACE` fails if an aggregate with the same signature already exists.
+The `CREATE AGGREGATE` command with the optional `IF NOT EXISTS` keywords creates an aggregate if it does not already exist.
+The `OR REPLACE` and `IF NOT EXISTS` phrases cannot be used together.
+
+The `STYPE` value defines the type of the state value and must be specified.
+The optional `INITCOND` defines the initial state value for the aggregate; the default value is `null`.
+A non-null `INITCOND` must be specified for state functions that are declared with `RETURNS NULL ON NULL INPUT`.
+
+The `SFUNC` value references an existing function to use as the state-modifying function.
+The first argument of the state function must have type `STYPE`.
+The remaining arguments of the state function must match the types of the user-defined aggregate arguments.
+The state function is called once for each row, and the value returned by the state function becomes the new state.
+State is not updated for state functions declared with `RETURNS NULL ON NULL INPUT` and called with `null`.
+After all rows are processed, the optional `FINALFUNC` is executed with last state value as its argument.
+It must take only one argument with type `STYPE`, but the return type of the `FINALFUNC` may be a different type.
+A final function declared with `RETURNS NULL ON NULL INPUT` means that the aggregate's return value will be `null`, if the last state is `null`.
+
+If no `FINALFUNC` is defined, the overall return type of the aggregate function is `STYPE`.
+If a `FINALFUNC` is defined, it is the return type of that function.
+
+[[drop-aggregate-statement]]
+==== DROP AGGREGATE statement
+
+Dropping an user-defined aggregate function uses the `DROP AGGREGATE`
+statement:
+
+[source, bnf]
+----
+include::example$BNF/drop_aggregate_statement.bnf[]
+----
+
+For instance:
+
+[source,cql]
+----
+include::example$CQL/drop_aggregate.cql[]
+----
+
+The `DROP AGGREGATE` statement removes an aggregate created using `CREATE AGGREGATE`.
+You must specify the argument types of the aggregate to drop if there are multiple overloaded aggregates with the same name but a
+different signature.
+
+The `DROP AGGREGATE` command with the optional `IF EXISTS` keywords drops an aggregate if it exists, and does nothing if a function with the
+signature does not exist.
diff --git a/doc/modules/cassandra/pages/cql/index.adoc b/doc/modules/cassandra/pages/cql/index.adoc
new file mode 100644
index 00000000000..4b43be369c5
--- /dev/null
+++ b/doc/modules/cassandra/pages/cql/index.adoc
@@ -0,0 +1,24 @@
+= The Cassandra Query Language (CQL)
+
+This document describes the Cassandra Query Language
+(CQL) version 3.
+Note that this document describes the last version of the language.
+However, the xref:cql/changes.adoc[changes] section provides the differences between the versions of CQL since version 3.0.
+
+CQL offers a model similar to SQL.
+The data is stored in *tables* containing *rows* of *columns*.
+For that reason, when used in this document, these terms (tables, rows and columns) have the same definition that they have in SQL.
+
+* xref:cql/definitions.adoc[Definitions]
+* xref:cql/types.adoc[Data types]
+* xref:cql/ddl.adoc[Data definition language]
+* xref:cql/dml.adoc[Data manipulation language]
+* xref:cql/operators.adoc[Operators]
+* xref:cql/indexes.adoc[Secondary indexes]
+* xref:cql/mvs.adoc[Materialized views]
+* xref:cql/functions.adoc[Functions]
+* xref:cql/json.adoc[JSON]
+* xref:cql/security.adoc[CQL security]
+* xref:cql/triggers.adoc[Triggers]
+* xref:cql/appendices.adoc[Appendices]
+* xref:cql/changes.adoc[Changes]
diff --git a/doc/modules/cassandra/pages/cql/indexes.adoc b/doc/modules/cassandra/pages/cql/indexes.adoc
new file mode 100644
index 00000000000..32b45b59f59
--- /dev/null
+++ b/doc/modules/cassandra/pages/cql/indexes.adoc
@@ -0,0 +1,63 @@
+= Secondary Indexes
+
+CQL supports creating secondary indexes on tables, allowing queries on
+the table to use those indexes. A secondary index is identified by a
+name defined by:
+
+[source,bnf]
+----
+include::example$BNF/index_name.bnf[]
+----
+
+[[create-index-statement]]
+== CREATE INDEX
+
+Creating a secondary index on a table uses the `CREATE INDEX` statement:
+
+[source,bnf]
+----
+include::example$BNF/create_index_statement.bnf[]
+----
+
+For instance:
+
+[source,cql]
+----
+include::example$CQL/create_index.cql[]
+----
+
+The `CREATE INDEX` statement is used to create a new (automatic)
+secondary index for a given (existing) column in a given table. A name
+for the index itself can be specified before the `ON` keyword, if
+desired. If data already exists for the column, it will be indexed
+asynchronously. After the index is created, new data for the column is
+indexed automatically at insertion time.
+
+Attempting to create an already existing index will return an error
+unless the `IF NOT EXISTS` option is used. If it is used, the statement
+will be a no-op if the index already exists.
+
+=== Indexes on Map Keys
+
+When creating an index on a `maps `, you may index either the keys
+or the values. If the column identifier is placed within the `keys()`
+function, the index will be on the map keys, allowing you to use
+`CONTAINS KEY` in `WHERE` clauses. Otherwise, the index will be on the
+map values.
+
+[[drop-index-statement]]
+== DROP INDEX
+
+Dropping a secondary index uses the `DROP INDEX` statement:
+
+[source,bnf]
+----
+include::example$BNF/drop_index_statement.bnf[]
+----
+
+The `DROP INDEX` statement is used to drop an existing secondary index.
+The argument of the statement is the index name, which may optionally
+specify the keyspace of the index.
+
+If the index does not exists, the statement will return an error, unless
+`IF EXISTS` is used in which case the operation is a no-op.
diff --git a/doc/modules/cassandra/pages/cql/json.adoc b/doc/modules/cassandra/pages/cql/json.adoc
new file mode 100644
index 00000000000..7d0aa268b0d
--- /dev/null
+++ b/doc/modules/cassandra/pages/cql/json.adoc
@@ -0,0 +1,125 @@
+= JSON Support
+
+Cassandra 2.2 introduces JSON support to `SELECT ` and
+`INSERT ` statements.
+This support does not fundamentally alter the CQL API (for example, the schema is still
+enforced).
+It simply provides a convenient way to work with JSON documents.
+
+== SELECT JSON
+
+With `SELECT` statements, the `JSON` keyword is used to return each row as a single `JSON` encoded map.
+The remainder of the `SELECT` statement behavior is the same.
+
+The result map keys match the column names in a normal result set.
+For example, a statement like `SELECT JSON a, ttl(b) FROM ...` would result in a map with keys `"a"` and `"ttl(b)"`.
+However, there is one notable exception: for symmetry with `INSERT JSON` behavior, case-sensitive column names with upper-case letters will be surrounded with double quotes.
+For example, `SELECT JSON myColumn FROM ...` would result in a map key `"\"myColumn\""` with escaped quotes).
+
+The map values will JSON-encoded representations (as described below) of the result set values.
+
+== INSERT JSON
+
+With `INSERT` statements, the new `JSON` keyword can be used to enable
+inserting a `JSON` encoded map as a single row. The format of the `JSON`
+map should generally match that returned by a `SELECT JSON` statement on
+the same table. In particular, case-sensitive column names should be
+surrounded with double quotes. For example, to insert into a table with
+two columns named "myKey" and "value", you would do the following:
+
+[source,cql]
+----
+include::example$CQL/insert_json.cql[]
+----
+
+By default (or if `DEFAULT NULL` is explicitly used), a column omitted
+from the `JSON` map will be set to `NULL`, meaning that any pre-existing
+value for that column will be removed (resulting in a tombstone being
+created). Alternatively, if the `DEFAULT UNSET` directive is used after
+the value, omitted column values will be left unset, meaning that
+pre-existing values for those column will be preserved.
+
+== JSON Encoding of Cassandra Data Types
+
+Where possible, Cassandra will represent and accept data types in their
+native `JSON` representation. Cassandra will also accept string
+representations matching the CQL literal format for all single-field
+types. For example, floats, ints, UUIDs, and dates can be represented by
+CQL literal strings. However, compound types, such as collections,
+tuples, and user-defined types must be represented by native `JSON`
+collections (maps and lists) or a JSON-encoded string representation of
+the collection.
+
+The following table describes the encodings that Cassandra will accept
+in `INSERT JSON` values (and `fromJson()` arguments) as well as the
+format Cassandra will use when returning data for `SELECT JSON`
+statements (and `fromJson()`):
+
+[cols=",,,",options="header",]
+|===
+|Type |Formats accepted |Return format |Notes
+
+| `ascii` | string | string | Uses JSON's `\u` character escape
+
+| `bigint` | integer, string | integer | String must be valid 64 bit integer
+
+| `blob` | string | string | String should be 0x followed by an even number of hex digits
+
+| `boolean` | boolean, string | boolean | String must be "true" or "false"
+
+| `date` | string | string | Date in format `YYYY-MM-DD`, timezone UTC
+
+| `decimal` | integer, float, string | float | May exceed 32 or 64-bit IEEE-754 floating point precision in client-side decoder
+
+| `double` | integer, float, string | float | String must be valid integer or float
+
+| `float` | integer, float, string | float | String must be valid integer or float
+
+| `inet` | string | string | IPv4 or IPv6 address
+
+| `int` | integer, string | integer | String must be valid 32 bit integer
+
+| `list` | list, string | list | Uses JSON's native list representation
+
+| `map` | map, string | map | Uses JSON's native map representation
+
+| `smallint` | integer, string | integer | String must be valid 16 bit integer
+
+| `set` | list, string | list | Uses JSON's native list representation
+
+| `text` | string | string | Uses JSON's `\u` character escape
+
+| `time` | string | string | Time of day in format `HH-MM-SS[.fffffffff]`
+
+| `timestamp` | integer, string | string | A timestamp. Strings constant allows to input `timestamps
+as dates `. Datestamps with format `YYYY-MM-DD HH:MM:SS.SSS`
+are returned.
+
+| `timeuuid` | string | string | Type 1 UUID. See `constant` for the UUID format
+
+| `tinyint` | integer, string | integer | String must be valid 8 bit integer
+
+| `tuple` | list, string | list | Uses JSON's native list representation
+
+| `UDT` | map, string | map | Uses JSON's native map representation with field names as keys
+
+| `uuid` | string | string | See `constant` for the UUID format
+
+| `varchar` | string | string | Uses JSON's `\u` character escape
+
+| `varint` | integer, string | integer | Variable length; may overflow 32 or 64 bit integers in client-side decoder
+|===
+
+== The fromJson() Function
+
+The `fromJson()` function may be used similarly to `INSERT JSON`, but
+for a single column value. It may only be used in the `VALUES` clause of
+an `INSERT` statement or as one of the column values in an `UPDATE`,
+`DELETE`, or `SELECT` statement. For example, it cannot be used in the
+selection clause of a `SELECT` statement.
+
+== The toJson() Function
+
+The `toJson()` function may be used similarly to `SELECT JSON`, but for
+a single column value. It may only be used in the selection clause of a
+`SELECT` statement.
diff --git a/doc/modules/cassandra/pages/cql/mvs.adoc b/doc/modules/cassandra/pages/cql/mvs.adoc
new file mode 100644
index 00000000000..6da0fa4ffe5
--- /dev/null
+++ b/doc/modules/cassandra/pages/cql/mvs.adoc
@@ -0,0 +1,158 @@
+= Materialized Views
+
+Materialized views names are defined by:
+
+[source,bnf]
+----
+include::example$BNF/view_name.bnf[]
+----
+
+[[create-materialized-view-statement]]
+== CREATE MATERIALIZED VIEW
+
+You can create a materialized view on a table using a
+`CREATE MATERIALIZED VIEW` statement:
+
+[source,bnf]
+----
+include::example$BNF/create_mv_statement.bnf[]
+----
+
+For instance:
+
+[source,cql]
+----
+include::example$CQL/create_mv_statement.cql[]
+----
+
+The `CREATE MATERIALIZED VIEW` statement creates a new materialized
+view. Each such view is a set of _rows_ which corresponds to rows which
+are present in the underlying, or base, table specified in the `SELECT`
+statement. A materialized view cannot be directly updated, but updates
+to the base table will cause corresponding updates in the view.
+
+Creating a materialized view has 3 main parts:
+
+* The xref:cql/mvs.adoc#mv-select[select statement] that restrict the data included in
+the view.
+* The xref:cql/mvs.adoc#mv-primary-key[primary key] definition for the view.
+* The xref:cql/mvs.adoc#mv-options[options] for the view.
+
+Attempting to create an already existing materialized view will return
+an error unless the `IF NOT EXISTS` option is used. If it is used, the
+statement will be a no-op if the materialized view already exists.
+
+[NOTE]
+.Note
+====
+By default, materialized views are built in a single thread. The initial
+build can be parallelized by increasing the number of threads specified
+by the property `concurrent_materialized_view_builders` in
+`cassandra.yaml`. This property can also be manipulated at runtime
+through both JMX and the `setconcurrentviewbuilders` and
+`getconcurrentviewbuilders` nodetool commands.
+====
+
+[[mv-select]]
+=== MV select statement
+
+The select statement of a materialized view creation defines which of
+the base table is included in the view. That statement is limited in a
+number of ways:
+
+* the xref:cql/mvs.adoc#selection-clause[selection] is limited to those that only
+select columns of the base table. In other words, you can't use any
+function (aggregate or not), casting, term, etc. Aliases are also not
+supported.
+You can however use * as a shortcut of selecting all columns.
+Further, xref:cql/types.adoc#static-columns[static columns] cannot be included in a materialized view.
+Thus, a `SELECT *` command isn't allowed if the base table has static columns.
+The `WHERE` clause has the following restrictions:
+
+** cannot include any `bind_marker`
+** cannot have columns that are not part of the _base table_ primary key that are not restricted by an `IS NOT NULL` restriction
+** no other restriction is allowed
+** cannot have columns that are part of the _view_ primary key be null, they must always be at least restricted by a `IS NOT NULL`
+restriction (or any other restriction, but they must have one).
+* cannot have an xref:cql/dml.adoc#ordering-clause[ordering clause], a xref:cql/dml.adoc#limit-clause[limit], or xref:cql/dml.adoc#allow-filtering[ALLOW FILTERING
+
+=== MV primary key
+
+A view must have a primary key and that primary key must conform to the
+following restrictions:
+
+* it must contain all the primary key columns of the base table. This
+ensures that every row of the view correspond to exactly one row of the
+base table.
+* it can only contain a single column that is not a primary key column
+in the base table.
+
+So for instance, give the following base table definition:
+
+[source,cql]
+----
+include::example$CQL/mv_table_def.cql[]
+----
+
+then the following view definitions are allowed:
+
+[source,cql]
+----
+include::example$CQL/mv_table_from_base.cql[]
+----
+
+but the following ones are *not* allowed:
+
+[source,cql]
+----
+include::example$CQL/mv_table_error.cql[]
+----
+
+=== MV options
+
+A materialized view is internally implemented by a table and as such,
+creating a MV allows the `same options than
+creating a table `.
+
+[[alter-materialized-view-statement]]
+== ALTER MATERIALIZED VIEW
+
+After creation, you can alter the options of a materialized view using
+the `ALTER MATERIALIZED VIEW` statement:
+
+[source,bnf]
+----
+include::example$BNF/alter_mv_statement.bnf[]
+----
+
+The options that can be updated are the same than at creation time and
+thus the `same than for tables
+`.
+
+[[drop-materialized-view-statement]]
+== DROP MATERIALIZED VIEW
+
+Dropping a materialized view using the `DROP MATERIALIZED VIEW`
+statement:
+
+[source, bnf]
+----
+include::example$BNF/drop_mv_statement.bnf[]
+----
+
+If the materialized view does not exists, the statement will return an
+error, unless `IF EXISTS` is used in which case the operation is a
+no-op.
+
+=== MV Limitations
+
+[NOTE]
+.Note
+====
+Removal of columns not selected in the Materialized View (via
+`UPDATE base SET unselected_column = null` or
+`DELETE unselected_column FROM base`) may shadow missed updates to other
+columns received by hints or repair. For this reason, we advise against
+doing deletions on base columns not selected in views until this is
+fixed on CASSANDRA-13826.
+====
diff --git a/doc/modules/cassandra/pages/cql/operators.adoc b/doc/modules/cassandra/pages/cql/operators.adoc
new file mode 100644
index 00000000000..9d858f91821
--- /dev/null
+++ b/doc/modules/cassandra/pages/cql/operators.adoc
@@ -0,0 +1,68 @@
+= Arithmetic Operators
+
+CQL supports the following operators:
+
+[cols=",",options="header",]
+|===
+|Operator |Description
+
+| - (unary) | Negates operand
+
+| + | Addition
+
+| - | Substraction
+
+| * | Multiplication
+
+| / | Division
+
+| % | Returns the remainder of a division
+|===
+
+== Number Arithmetic
+
+All arithmetic operations are supported on numeric types or counters.
+
+The return type of the operation will be based on the operand types:
+
+[cols=",,,,,,,,,",options="header",]
+|===
+|left/right |tinyint |smallint |int |bigint |counter |float |double |varint |decimal
+
+| *tinyint* | tinyint | smallint | int | bigint | bigint | float | double | varint | decimal
+
+| *smallint* | smallint | smallint | int | bigint | bigint | float | double | varint | decimal
+
+| *int* | int | int | int | bigint | bigint | float | double | varint | decimal
+
+| *bigint* | bigint | bigint | bigint | bigint | bigint | double | double | varint | decimal
+
+| *counter* | bigint | bigint | bigint | bigint | bigint | double | double | varint | decimal
+
+| *float* | float | float | float | double | double | float | double | decimal | decimal
+
+| *double* | double | double | double | double | double | double | double | decimal | decimal
+
+| *varint* | varint | varint | varint | decimal | decimal | decimal | decimal | decimal | decimal
+
+| *decimal* | decimal | decimal | decimal | decimal | decimal | decimal | decimal | decimal | decimal
+|===
+
+`*`, `/` and `%` operators have a higher precedence level than `+` and
+`-` operator. By consequence, they will be evaluated before. If two
+operator in an expression have the same precedence level, they will be
+evaluated left to right based on their position in the expression.
+
+[[datetime--arithmetic]]
+== Datetime Arithmetic
+
+A `duration` can be added (+) or substracted (-) from a `timestamp` or a
+`date` to create a new `timestamp` or `date`. So for instance:
+
+[source,cql]
+----
+include::example$CQL/datetime_arithmetic.cql[]
+----
+
+will select all the records with a value of `t` which is in the last 2
+days of 2016.
diff --git a/doc/modules/cassandra/pages/cql/security.adoc b/doc/modules/cassandra/pages/cql/security.adoc
new file mode 100644
index 00000000000..7ea0620ac85
--- /dev/null
+++ b/doc/modules/cassandra/pages/cql/security.adoc
@@ -0,0 +1,611 @@
+role_name ::= identifier | string= Security
+
+[[cql-roles]]
+== Database Roles
+
+CQL uses database roles to represent users and group of users.
+Syntactically, a role is defined by:
+
+[source, bnf]
+----
+include::example$BNF/role_name.bnf[]
+----
+
+
+[[create-role-statement]]
+=== CREATE ROLE
+
+Creating a role uses the `CREATE ROLE` statement:
+
+[source, bnf]
+----
+include::example$BNF/create_role_statement.bnf[]
+----
+
+For instance:
+
+[source,cql]
+----
+include::example$CQL/create_role.cql[]
+----
+
+By default roles do not possess `LOGIN` privileges or `SUPERUSER`
+status.
+
+xref:cql/security.adoc#cql-permissions[Permissions] on database resources are granted to
+roles; types of resources include keyspaces, tables, functions and roles
+themselves. Roles may be granted to other roles to create hierarchical
+permissions structures; in these hierarchies, permissions and
+`SUPERUSER` status are inherited, but the `LOGIN` privilege is not.
+
+If a role has the `LOGIN` privilege, clients may identify as that role
+when connecting. For the duration of that connection, the client will
+acquire any roles and privileges granted to that role.
+
+Only a client with with the `CREATE` permission on the database roles
+resource may issue `CREATE ROLE` requests (see the
+xref:cql/security.adoc#cql-permissions[relevant section]), unless the client is a
+`SUPERUSER`. Role management in Cassandra is pluggable and custom
+implementations may support only a subset of the listed options.
+
+Role names should be quoted if they contain non-alphanumeric characters.
+
+==== Setting credentials for internal authentication
+
+Use the `WITH PASSWORD` clause to set a password for internal
+authentication, enclosing the password in single quotation marks.
+
+If internal authentication has not been set up or the role does not have
+`LOGIN` privileges, the `WITH PASSWORD` clause is not necessary.
+
+==== Restricting connections to specific datacenters
+
+If a `network_authorizer` has been configured, you can restrict login
+roles to specific datacenters with the `ACCESS TO DATACENTERS` clause
+followed by a set literal of datacenters the user can access. Not
+specifiying datacenters implicitly grants access to all datacenters. The
+clause `ACCESS TO ALL DATACENTERS` can be used for explicitness, but
+there's no functional difference.
+
+==== Creating a role conditionally
+
+Attempting to create an existing role results in an invalid query
+condition unless the `IF NOT EXISTS` option is used. If the option is
+used and the role exists, the statement is a no-op:
+
+[source,cql]
+----
+include::example$CQL/create_role_ifnotexists.cql[]
+----
+
+[[alter-role-statement]]
+=== ALTER ROLE
+
+Altering a role options uses the `ALTER ROLE` statement:
+
+[source, bnf]
+----
+include::example$BNF/alter_role_statement.bnf[]
+----
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/alter_role.cql[]
+----
+
+==== Restricting connections to specific datacenters
+
+If a `network_authorizer` has been configured, you can restrict login
+roles to specific datacenters with the `ACCESS TO DATACENTERS` clause
+followed by a set literal of datacenters the user can access. To remove
+any data center restrictions, use the `ACCESS TO ALL DATACENTERS`
+clause.
+
+Conditions on executing `ALTER ROLE` statements:
+
+* a client must have `SUPERUSER` status to alter the `SUPERUSER` status
+of another role
+* a client cannot alter the `SUPERUSER` status of any role it currently
+holds
+* a client can only modify certain properties of the role with which it
+identified at login (e.g. `PASSWORD`)
+* to modify properties of a role, the client must be granted `ALTER`
+`permission ` on that role
+
+[[drop-role-statement]]
+=== DROP ROLE
+
+Dropping a role uses the `DROP ROLE` statement:
+
+[source, bnf]
+----
+include::example$BNF/drop_role_statement.bnf[]
+----
+
+`DROP ROLE` requires the client to have `DROP`
+`permission ` on the role in question. In addition,
+client may not `DROP` the role with which it identified at login.
+Finally, only a client with `SUPERUSER` status may `DROP` another
+`SUPERUSER` role.
+
+Attempting to drop a role which does not exist results in an invalid
+query condition unless the `IF EXISTS` option is used. If the option is
+used and the role does not exist the statement is a no-op.
+
+[NOTE]
+.Note
+====
+DROP ROLE intentionally does not terminate any open user sessions.
+Currently connected sessions will remain connected and will retain the
+ability to perform any database actions which do not require
+xref:cql/security.adoc#authorization[authorization].
+However, if authorization is enabled, xref:cql/security.adoc#cql-permissions[permissions] of the dropped role are also revoked,
+subject to the xref:cql/security.adoc#auth-caching[caching options] configured in xref:cql/configuring.adoc#cassandra.yaml[cassandra-yaml] file.
+Should a dropped role be subsequently recreated and have new xref:security.adoc#grant-permission-statement[permissions] or
+xref:security.adoc#grant-role-statement[roles]` granted to it, any client sessions still
+connected will acquire the newly granted permissions and roles.
+====
+
+[[grant-role-statement]]
+=== GRANT ROLE
+
+Granting a role to another uses the `GRANT ROLE` statement:
+
+[source, bnf]
+----
+include::example$BNF/grant_role_statement.bnf[]
+----
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/grant_role.cql[]
+----
+
+This statement grants the `report_writer` role to `alice`. Any
+permissions granted to `report_writer` are also acquired by `alice`.
+
+Roles are modelled as a directed acyclic graph, so circular grants are
+not permitted. The following examples result in error conditions:
+
+[source,cql]
+----
+include::example$CQL/role_error.cql[]
+----
+
+[[revoke-role-statement]]
+=== REVOKE ROLE
+
+Revoking a role uses the `REVOKE ROLE` statement:
+
+[source, bnf]
+----
+include::example$BNF/revoke_role_statement.bnf[]
+----
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/revoke_role.cql[]
+----
+
+This statement revokes the `report_writer` role from `alice`. Any
+permissions that `alice` has acquired via the `report_writer` role are
+also revoked.
+
+[[list-roles-statement]]
+=== LIST ROLES
+
+All the known roles (in the system or granted to specific role) can be
+listed using the `LIST ROLES` statement:
+
+[source, bnf]
+----
+include::example$BNF/list_roles_statement.bnf[]
+----
+
+For instance:
+
+[source,cql]
+----
+include::example$CQL/list_roles.cql[]
+----
+
+returns all known roles in the system, this requires `DESCRIBE`
+permission on the database roles resource.
+
+This example enumerates all roles granted to `alice`, including those transitively
+acquired:
+
+[source,cql]
+----
+include::example$CQL/list_roles_of.cql[]
+----
+
+This example lists all roles directly granted to `bob` without including any of the
+transitively acquired ones:
+
+[source,cql]
+----
+include::example$CQL/list_roles_nonrecursive.cql[]
+----
+
+== Users
+
+Prior to the introduction of roles in Cassandra 2.2, authentication and
+authorization were based around the concept of a `USER`. For backward
+compatibility, the legacy syntax has been preserved with `USER` centric
+statements becoming synonyms for the `ROLE` based equivalents. In other
+words, creating/updating a user is just a different syntax for
+creating/updating a role.
+
+[[create-user-statement]]
+=== CREATE USER
+
+Creating a user uses the `CREATE USER` statement:
+
+[source, bnf]
+----
+include::example$BNF/create_user_statement.bnf[]
+----
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/create_user.cql[]
+----
+
+The `CREATE USER` command is equivalent to `CREATE ROLE` where the `LOGIN` option is `true`.
+So, the following pairs of statements are equivalent:
+
+[source,cql]
+----
+include::example$CQL/create_user_role.cql[]
+----
+
+[[alter-user-statement]]
+=== ALTER USER
+
+Altering the options of a user uses the `ALTER USER` statement:
+
+[source, bnf]
+----
+include::example$BNF/alter_user_statement.bnf[]
+----
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/alter_user.cql[]
+----
+
+[[drop-user-statement]]
+=== DROP USER
+
+Dropping a user uses the `DROP USER` statement:
+
+[source, bnf]
+----
+include::example$BNF/drop_user_statement.bnf[]
+----
+
+[[list-users-statement]]
+=== LIST USERS
+
+Existing users can be listed using the `LIST USERS` statement:
+
+[source, bnf]
+----
+include::example$BNF/list_users_statement.bnf[]
+----
+
+Note that this statement is equivalent to xref:security.adoc#list-roles-statement[`LIST ROLES], but only roles with the `LOGIN` privilege are included in the output.
+
+== Data Control
+
+[[cql-permissions]]
+=== Permissions
+
+Permissions on resources are granted to roles; there are several
+different types of resources in Cassandra and each type is modelled
+hierarchically:
+
+* The hierarchy of Data resources, Keyspaces and Tables has the
+structure `ALL KEYSPACES` -> `KEYSPACE` -> `TABLE`.
+* Function resources have the structure `ALL FUNCTIONS` -> `KEYSPACE` ->
+`FUNCTION`
+* Resources representing roles have the structure `ALL ROLES` -> `ROLE`
+* Resources representing JMX ObjectNames, which map to sets of
+MBeans/MXBeans, have the structure `ALL MBEANS` -> `MBEAN`
+
+Permissions can be granted at any level of these hierarchies and they
+flow downwards. So granting a permission on a resource higher up the
+chain automatically grants that same permission on all resources lower
+down. For example, granting `SELECT` on a `KEYSPACE` automatically
+grants it on all `TABLES` in that `KEYSPACE`. Likewise, granting a
+permission on `ALL FUNCTIONS` grants it on every defined function,
+regardless of which keyspace it is scoped in. It is also possible to
+grant permissions on all functions scoped to a particular keyspace.
+
+Modifications to permissions are visible to existing client sessions;
+that is, connections need not be re-established following permissions
+changes.
+
+The full set of available permissions is:
+
+* `CREATE`
+* `ALTER`
+* `DROP`
+* `SELECT`
+* `MODIFY`
+* `AUTHORIZE`
+* `DESCRIBE`
+* `EXECUTE`
+
+Not all permissions are applicable to every type of resource. For
+instance, `EXECUTE` is only relevant in the context of functions or
+mbeans; granting `EXECUTE` on a resource representing a table is
+nonsensical. Attempting to `GRANT` a permission on resource to which it
+cannot be applied results in an error response. The following
+illustrates which permissions can be granted on which types of resource,
+and which statements are enabled by that permission.
+
+[cols=",,",options="header",]
+|===
+|Permission |Resource |Operations
+
+| `CREATE` | `ALL KEYSPACES` | `CREATE KEYSPACE` and `CREATE TABLE` in any keyspace
+
+| `CREATE` | `KEYSPACE` | `CREATE TABLE` in specified keyspace
+
+| `CREATE` | `ALL FUNCTIONS` | `CREATE FUNCTION` in any keyspace and `CREATE AGGREGATE` in any keyspace
+
+| `CREATE` | `ALL FUNCTIONS IN KEYSPACE` | `CREATE FUNCTION` and `CREATE AGGREGATE` in specified keyspace
+
+| `CREATE` | `ALL ROLES` | `CREATE ROLE`
+
+| `ALTER` | `ALL KEYSPACES` | `ALTER KEYSPACE` and `ALTER TABLE` in any keyspace
+
+| `ALTER` | `KEYSPACE` | `ALTER KEYSPACE` and `ALTER TABLE` in specified keyspace
+
+| `ALTER` | `TABLE` | `ALTER TABLE`
+
+| `ALTER` | `ALL FUNCTIONS` | `CREATE FUNCTION` and `CREATE AGGREGATE`: replacing any existing
+
+| `ALTER` | `ALL FUNCTIONS IN KEYSPACE` | `CREATE FUNCTION` and `CREATE AGGREGATE`: replacing existing in specified keyspace
+
+| `ALTER` | `FUNCTION` | `CREATE FUNCTION` and `CREATE AGGREGATE`: replacing existing
+
+| `ALTER` | `ALL ROLES` | `ALTER ROLE` on any role
+
+| `ALTER` | `ROLE` | `ALTER ROLE`
+
+| `DROP` | `ALL KEYSPACES` | `DROP KEYSPACE` and `DROP TABLE` in any keyspace
+
+| `DROP` | `KEYSPACE` | `DROP TABLE` in specified keyspace
+
+| `DROP` | `TABLE` | `DROP TABLE`
+
+| `DROP` | `ALL FUNCTIONS` | `DROP FUNCTION` and `DROP AGGREGATE` in any keyspace
+
+| `DROP` | `ALL FUNCTIONS IN KEYSPACE` | `DROP FUNCTION` and `DROP AGGREGATE` in specified keyspace
+
+| `DROP` | `FUNCTION` | `DROP FUNCTION`
+
+| `DROP` | `ALL ROLES` | `DROP ROLE` on any role
+
+| `DROP` | `ROLE` | `DROP ROLE`
+
+| `SELECT` | `ALL KEYSPACES` | `SELECT` on any table
+
+| `SELECT` | `KEYSPACE` | `SELECT` on any table in specified keyspace
+
+| `SELECT` | `TABLE` | `SELECT` on specified table
+
+| `SELECT` | `ALL MBEANS` | Call getter methods on any mbean
+
+| `SELECT` | `MBEANS` | Call getter methods on any mbean matching a wildcard pattern
+
+| `SELECT` | `MBEAN` | Call getter methods on named mbean
+
+| `MODIFY` | `ALL KEYSPACES` | `INSERT`, `UPDATE`, `DELETE` and `TRUNCATE` on any table
+
+| `MODIFY` | `KEYSPACE` | `INSERT`, `UPDATE`, `DELETE` and `TRUNCATE` on any table in specified
+keyspace
+
+| `MODIFY` | `TABLE` | `INSERT`, `UPDATE`, `DELETE` and `TRUNCATE` on specified table
+
+| `MODIFY` | `ALL MBEANS` | Call setter methods on any mbean
+
+| `MODIFY` | `MBEANS` | Call setter methods on any mbean matching a wildcard pattern
+
+| `MODIFY` | `MBEAN` | Call setter methods on named mbean
+
+| `AUTHORIZE` | `ALL KEYSPACES` | `GRANT PERMISSION` and `REVOKE PERMISSION` on any table
+
+| `AUTHORIZE` | `KEYSPACE` | `GRANT PERMISSION` and `REVOKE PERMISSION` on any table in specified keyspace
+
+| `AUTHORIZE` | `TABLE` | `GRANT PERMISSION` and `REVOKE PERMISSION` on specified table
+
+| `AUTHORIZE` | `ALL FUNCTIONS` | `GRANT PERMISSION` and `REVOKE PERMISSION` on any function
+
+| `AUTHORIZE` | `ALL FUNCTIONS IN KEYSPACE` | `GRANT PERMISSION` and `REVOKE PERMISSION` in specified keyspace
+
+| `AUTHORIZE` | `FUNCTION` | `GRANT PERMISSION` and `REVOKE PERMISSION` on specified function
+
+| `AUTHORIZE` | `ALL MBEANS` | `GRANT PERMISSION` and `REVOKE PERMISSION` on any mbean
+
+| `AUTHORIZE` | `MBEANS` | `GRANT PERMISSION` and `REVOKE PERMISSION` on any mbean matching a wildcard pattern
+
+| `AUTHORIZE` | `MBEAN` | `GRANT PERMISSION` and `REVOKE PERMISSION` on named mbean
+
+| `AUTHORIZE` | `ALL ROLES` | `GRANT ROLE` and `REVOKE ROLE` on any role
+
+| `AUTHORIZE` | `ROLES` | `GRANT ROLE` and `REVOKE ROLE` on specified roles
+
+| `DESCRIBE` | `ALL ROLES` | `LIST ROLES` on all roles or only roles granted to another, specified role
+
+| `DESCRIBE` | `ALL MBEANS` | Retrieve metadata about any mbean from the platform's MBeanServer
+
+
+| `DESCRIBE` | `MBEANS` | Retrieve metadata about any mbean matching a wildcard patter from the
+platform's MBeanServer
+
+| `DESCRIBE` | `MBEAN` | Retrieve metadata about a named mbean from the platform's MBeanServer
+
+| `EXECUTE` | `ALL FUNCTIONS` | `SELECT`, `INSERT` and `UPDATE` using any function, and use of any
+function in `CREATE AGGREGATE`
+
+| `EXECUTE` | `ALL FUNCTIONS IN KEYSPACE` | `SELECT`, `INSERT` and `UPDATE` using any function in specified keyspace
+and use of any function in keyspace in `CREATE AGGREGATE`
+
+| `EXECUTE` | `FUNCTION` | `SELECT`, `INSERT` and `UPDATE` using specified function and use of the function in `CREATE AGGREGATE`
+
+| `EXECUTE` | `ALL MBEANS` | Execute operations on any mbean
+
+| `EXECUTE` | `MBEANS` | Execute operations on any mbean matching a wildcard pattern
+
+| `EXECUTE` | `MBEAN` | Execute operations on named mbean
+|===
+
+[[grant-permission-statement]]
+=== GRANT PERMISSION
+
+Granting a permission uses the `GRANT PERMISSION` statement:
+
+[source, bnf]
+----
+include::example$BNF/grant_permission_statement.bnf[]
+----
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/grant_perm.cql[]
+----
+
+This example gives any user with the role `data_reader` permission to execute
+`SELECT` statements on any table across all keyspaces:
+
+[source,cql]
+----
+include::example$CQL/grant_modify.cql[]
+----
+
+To give any user with the role `data_writer` permission to perform
+`UPDATE`, `INSERT`, `UPDATE`, `DELETE` and `TRUNCATE` queries on all
+tables in the `keyspace1` keyspace:
+
+[source,cql]
+----
+include::example$CQL/grant_drop.cql[]
+----
+
+To give any user with the `schema_owner` role permissions to `DROP` a specific
+`keyspace1.table1`:
+
+[source,cql]
+----
+include::example$CQL/grant_execute.cql[]
+----
+
+This command grants any user with the `report_writer` role permission to execute
+`SELECT`, `INSERT` and `UPDATE` queries which use the function
+`keyspace1.user_function( int )`:
+
+[source,cql]
+----
+include::example$CQL/grant_describe.cql[]
+----
+
+This grants any user with the `role_admin` role permission to view any
+and all roles in the system with a `LIST ROLES` statement.
+
+==== GRANT ALL
+
+When the `GRANT ALL` form is used, the appropriate set of permissions is
+determined automatically based on the target resource.
+
+==== Automatic Granting
+
+When a resource is created, via a `CREATE KEYSPACE`, `CREATE TABLE`,
+`CREATE FUNCTION`, `CREATE AGGREGATE` or `CREATE ROLE` statement, the
+creator (the role the database user who issues the statement is
+identified as), is automatically granted all applicable permissions on
+the new resource.
+
+[[revoke-permission-statement]]
+=== REVOKE PERMISSION
+
+Revoking a permission from a role uses the `REVOKE PERMISSION`
+statement:
+
+[source, bnf]
+----
+include::example$BNF/revoke_permission_statement.bnf[]
+----
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/revoke_perm.cql[]
+----
+
+Because of their function in normal driver operations, certain tables
+cannot have their `SELECT` permissions revoked. The
+following tables will be available to all authorized users regardless of
+their assigned role:
+
+[source,cql]
+----
+include::example$CQL/no_revoke.cql[]
+----
+
+[[list-permissions-statement]]
+=== LIST PERMISSIONS
+
+Listing granted permissions uses the `LIST PERMISSIONS` statement:
+
+[source, bnf]
+----
+include::example$BNF/list_permissions_statement.bnf[]
+----
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/list_perm.cql[]
+----
+
+Show all permissions granted to `alice`, including those acquired
+transitively from any other roles:
+
+[source,cql]
+----
+include::example$CQL/list_all_perm.cql[]
+----
+
+Show all permissions on `keyspace1.table1` granted to `bob`, including
+those acquired transitively from any other roles. This also includes any
+permissions higher up the resource hierarchy which can be applied to
+`keyspace1.table1`. For example, should `bob` have `ALTER` permission on
+`keyspace1`, that would be included in the results of this query. Adding
+the `NORECURSIVE` switch restricts the results to only those permissions
+which were directly granted to `bob` or one of `bob`'s roles:
+
+[source,cql]
+----
+include::example$CQL/list_select_perm.cql[]
+----
+
+Show any permissions granted to `carlos` or any of `carlos`'s roles,
+limited to `SELECT` permissions on any resource.
diff --git a/doc/modules/cassandra/pages/cql/triggers.adoc b/doc/modules/cassandra/pages/cql/triggers.adoc
new file mode 100644
index 00000000000..9ec67579061
--- /dev/null
+++ b/doc/modules/cassandra/pages/cql/triggers.adoc
@@ -0,0 +1,50 @@
+= Triggers
+
+Triggers are identified with a name defined by:
+
+[source,bnf]
+----
+include::example$BNF/trigger_name.bnf[]
+----
+
+[[create-trigger-statement]]
+== CREATE TRIGGER
+
+Creating a new trigger uses the `CREATE TRIGGER` statement:
+
+[source,bnf]
+----
+include::example$BNF/create_trigger_statement.bnf[]
+----
+
+For instance:
+
+[source,cql]
+----
+include::example$CQL/create_trigger.cql[]
+----
+
+The actual logic that makes up the trigger can be written in any Java
+(JVM) language and exists outside the database. You place the trigger
+code in a `lib/triggers` subdirectory of the Cassandra installation
+directory, it loads during cluster startup, and exists on every node
+that participates in a cluster. The trigger defined on a table fires
+before a requested DML statement occurs, which ensures the atomicity of
+the transaction.
+
+[[drop-trigger-statement]]
+== DROP TRIGGER
+
+Dropping a trigger uses the `DROP TRIGGER` statement:
+
+[source,bnf]
+----
+include::example$BNF/drop_trigger_statement.bnf[]
+----
+
+For instance:
+
+[source,cql]
+----
+include::example$CQL/drop_trigger.cql[]
+----
diff --git a/doc/modules/cassandra/pages/cql/types.adoc b/doc/modules/cassandra/pages/cql/types.adoc
new file mode 100644
index 00000000000..0cee1f3d0db
--- /dev/null
+++ b/doc/modules/cassandra/pages/cql/types.adoc
@@ -0,0 +1,539 @@
+= Data Types
+
+CQL is a typed language and supports a rich set of data types, including
+xref:cql/types.adoc#native-types[native types], xref:cql/types.adoc#collections[collection types],
+xref:cql/types.adoc#udts[user-defined types], xref:cql/types.adoc#tuples[tuple types], and xref:cql/types.adoc#custom-types[custom
+types]:
+
+[source, bnf]
+----
+include::example$BNF/cql_type.bnf[]
+----
+
+== Native types
+
+The native types supported by CQL are:
+
+[source, bnf]
+----
+include::example$BNF/native_type.bnf[]
+----
+
+The following table gives additional informations on the native data
+types, and on which kind of xref:cql/definitions.adoc#constants[constants] each type supports:
+
+[cols=",,",options="header",]
+|===
+| Type | Constants supported | Description
+
+| `ascii` | `string` | ASCII character string
+| `bigint` | `integer` | 64-bit signed long
+| `blob` | `blob` | Arbitrary bytes (no validation)
+| `boolean` | `boolean` | Either `true` or `false`
+| `counter` | `integer` | Counter column (64-bit signed value). See `counters` for details.
+| `date` | `integer`, `string` | A date (with no corresponding time value). See `dates` below for details.
+| `decimal` | `integer`, `float` | Variable-precision decimal
+| `double` | `integer` `float` | 64-bit IEEE-754 floating point
+| `duration` | `duration`, | A duration with nanosecond precision. See `durations` below for details.
+| `float` | `integer`, `float` | 32-bit IEEE-754 floating point
+| `inet` | `string` | An IP address, either IPv4 (4 bytes long) or IPv6 (16 bytes long). Note
+that there is no `inet` constant, IP address should be input as strings.
+| `int` | `integer` | 32-bit signed int
+| `smallint` | `integer` | 16-bit signed int
+| `text` | `string` | UTF8 encoded string
+| `time` | `integer`, `string` | A time (with no corresponding date value) with nanosecond precision. See
+`times` below for details.
+| `timestamp` | `integer`, `string` | A timestamp (date and time) with millisecond precision. See `timestamps`
+below for details.
+| `timeuuid` | `uuid` | Version 1 https://en.wikipedia.org/wiki/Universally_unique_identifier[UUID],
+generally used as a “conflict-free” timestamp. Also see `timeuuid-functions`.
+| `tinyint` | `integer` | 8-bit signed int
+| `uuid` | `uuid` | A https://en.wikipedia.org/wiki/Universally_unique_identifier[UUID] (of any version)
+| `varchar` | `string` | UTF8 encoded string
+| `varint` | `integer` | Arbitrary-precision integer
+|===
+
+=== Counters
+
+The `counter` type is used to define _counter columns_. A counter column
+is a column whose value is a 64-bit signed integer and on which 2
+operations are supported: incrementing and decrementing (see the
+xref:cql/dml.adoc#update-statement[UPDATE] statement for syntax).
+Note that the value of a counter cannot
+be set: a counter does not exist until first incremented/decremented,
+and that first increment/decrement is made as if the prior value was 0.
+
+[[counter-limitations]]
+Counters have a number of important limitations:
+
+* They cannot be used for columns part of the `PRIMARY KEY` of a table.
+* A table that contains a counter can only contain counters. In other
+words, either all the columns of a table outside the `PRIMARY KEY` have
+the `counter` type, or none of them have it.
+* Counters do not support xref:cql/dml.adoc#writetime-and-ttl-function[expiration].
+* The deletion of counters is supported, but is only guaranteed to work
+the first time you delete a counter. In other words, you should not
+re-update a counter that you have deleted (if you do, proper behavior is
+not guaranteed).
+* Counter updates are, by nature, not
+https://en.wikipedia.org/wiki/Idempotence[idemptotent]. An important
+consequence is that if a counter update fails unexpectedly (timeout or
+loss of connection to the coordinator node), the client has no way to
+know if the update has been applied or not. In particular, replaying the
+update may or may not lead to an over count.
+
+[[timestamps]]
+== Working with timestamps
+
+Values of the `timestamp` type are encoded as 64-bit signed integers
+representing a number of milliseconds since the standard base time known
+as https://en.wikipedia.org/wiki/Unix_time[the epoch]: January 1 1970 at
+00:00:00 GMT.
+
+Timestamps can be input in CQL either using their value as an `integer`,
+or using a `string` that represents an
+https://en.wikipedia.org/wiki/ISO_8601[ISO 8601] date. For instance, all
+of the values below are valid `timestamp` values for Mar 2, 2011, at
+04:05:00 AM, GMT:
+
+* `1299038700000`
+* `'2011-02-03 04:05+0000'`
+* `'2011-02-03 04:05:00+0000'`
+* `'2011-02-03 04:05:00.000+0000'`
+* `'2011-02-03T04:05+0000'`
+* `'2011-02-03T04:05:00+0000'`
+* `'2011-02-03T04:05:00.000+0000'`
+
+The `+0000` above is an RFC 822 4-digit time zone specification; `+0000`
+refers to GMT. US Pacific Standard Time is `-0800`. The time zone may be
+omitted if desired (`'2011-02-03 04:05:00'`), and if so, the date will
+be interpreted as being in the time zone under which the coordinating
+Cassandra node is configured. There are however difficulties inherent in
+relying on the time zone configuration being as expected, so it is
+recommended that the time zone always be specified for timestamps when
+feasible.
+
+The time of day may also be omitted (`'2011-02-03'` or
+`'2011-02-03+0000'`), in which case the time of day will default to
+00:00:00 in the specified or default time zone. However, if only the
+date part is relevant, consider using the xref:cql/types.adoc#dates[date] type.
+
+[[dates]]
+== Date type
+
+Values of the `date` type are encoded as 32-bit unsigned integers
+representing a number of days with “the epoch” at the center of the
+range (2^31). Epoch is January 1st, 1970
+
+For xref:cql/types.adoc#timestamps[timestamps], a date can be input either as an
+`integer` or using a date `string`. In the later case, the format should
+be `yyyy-mm-dd` (so `'2011-02-03'` for instance).
+
+[[times]]
+== Time type
+
+Values of the `time` type are encoded as 64-bit signed integers
+representing the number of nanoseconds since midnight.
+
+For xref:cql/types.adoc#timestamps[timestamps], a time can be input either as an
+`integer` or using a `string` representing the time. In the later case,
+the format should be `hh:mm:ss[.fffffffff]` (where the sub-second
+precision is optional and if provided, can be less than the nanosecond).
+So for instance, the following are valid inputs for a time:
+
+* `'08:12:54'`
+* `'08:12:54.123'`
+* `'08:12:54.123456'`
+* `'08:12:54.123456789'`
+
+[[durations]]
+== Duration type
+
+Values of the `duration` type are encoded as 3 signed integer of
+variable lengths. The first integer represents the number of months, the
+second the number of days and the third the number of nanoseconds. This
+is due to the fact that the number of days in a month can change, and a
+day can have 23 or 25 hours depending on the daylight saving.
+Internally, the number of months and days are decoded as 32 bits
+integers whereas the number of nanoseconds is decoded as a 64 bits
+integer.
+
+A duration can be input as:
+
+* `(quantity unit)+` like `12h30m` where the unit can be:
+** `y`: years (12 months)
+** `mo`: months (1 month)
+** `w`: weeks (7 days)
+** `d`: days (1 day)
+** `h`: hours (3,600,000,000,000 nanoseconds)
+** `m`: minutes (60,000,000,000 nanoseconds)
+** `s`: seconds (1,000,000,000 nanoseconds)
+** `ms`: milliseconds (1,000,000 nanoseconds)
+** `us` or `µs` : microseconds (1000 nanoseconds)
+** `ns`: nanoseconds (1 nanosecond)
+* ISO 8601 format: `P[n]Y[n]M[n]DT[n]H[n]M[n]S or P[n]W`
+* ISO 8601 alternative format: `P[YYYY]-[MM]-[DD]T[hh]:[mm]:[ss]`
+
+For example:
+
+[source,cql]
+----
+include::example$CQL/insert_duration.cql[]
+----
+
+[[duration-limitation]]
+Duration columns cannot be used in a table's `PRIMARY KEY`. This
+limitation is due to the fact that durations cannot be ordered. It is
+effectively not possible to know if `1mo` is greater than `29d` without
+a date context.
+
+A `1d` duration is not equal to a `24h` one as the duration type has
+been created to be able to support daylight saving.
+
+== Collections
+
+CQL supports three kinds of collections: `maps`, `sets` and `lists`. The
+types of those collections is defined by:
+
+[source,bnf]
+----
+include::example$BNF/collection_type.bnf[]
+----
+
+and their values can be inputd using collection literals:
+
+[source,bnf]
+----
+include::example$BNF/collection_literal.bnf[]
+----
+
+Note however that neither `bind_marker` nor `NULL` are supported inside
+collection literals.
+
+=== Noteworthy characteristics
+
+Collections are meant for storing/denormalizing relatively small amount
+of data. They work well for things like “the phone numbers of a given
+user”, “labels applied to an email”, etc. But when items are expected to
+grow unbounded (“all messages sent by a user”, “events registered by a
+sensor”...), then collections are not appropriate and a specific table
+(with clustering columns) should be used. Concretely, (non-frozen)
+collections have the following noteworthy characteristics and
+limitations:
+
+* Individual collections are not indexed internally. Which means that
+even to access a single element of a collection, the while collection
+has to be read (and reading one is not paged internally).
+* While insertion operations on sets and maps never incur a
+read-before-write internally, some operations on lists do. Further, some
+lists operations are not idempotent by nature (see the section on
+xref:cql/types.adoc#lists[lists] below for details), making their retry in case of
+timeout problematic. It is thus advised to prefer sets over lists when
+possible.
+
+Please note that while some of those limitations may or may not be
+removed/improved upon in the future, it is a anti-pattern to use a
+(single) collection to store large amounts of data.
+
+=== Maps
+
+A `map` is a (sorted) set of key-value pairs, where keys are unique and
+the map is sorted by its keys. You can define and insert a map with:
+
+[source,cql]
+----
+include::example$CQL/map.cql[]
+----
+
+Further, maps support:
+
+* Updating or inserting one or more elements:
++
+[source,cql]
+----
+include::example$CQL/update_map.cql[]
+----
+* Removing one or more element (if an element doesn't exist, removing it
+is a no-op but no error is thrown):
++
+[source,cql]
+----
+include::example$CQL/delete_map.cql[]
+----
++
+Note that for removing multiple elements in a `map`, you remove from it
+a `set` of keys.
+
+Lastly, TTLs are allowed for both `INSERT` and `UPDATE`, but in both
+case the TTL set only apply to the newly inserted/updated elements. In
+other words:
+
+[source,cql]
+----
+include::example$CQL/update_ttl_map.cql[]
+----
+
+will only apply the TTL to the `{ 'color' : 'green' }` record, the rest
+of the map remaining unaffected.
+
+=== Sets
+
+A `set` is a (sorted) collection of unique values. You can define and
+insert a map with:
+
+[source,cql]
+----
+include::example$CQL/set.cql[]
+----
+
+Further, sets support:
+
+* Adding one or multiple elements (as this is a set, inserting an
+already existing element is a no-op):
++
+[source,cql]
+----
+include::example$CQL/update_set.cql[]
+----
+* Removing one or multiple elements (if an element doesn't exist,
+removing it is a no-op but no error is thrown):
++
+[source,cql]
+----
+include::example$CQL/delete_set.cql[]
+----
+
+Lastly, for xref:cql/types.adoc#sets[sets], TTLs are only applied to newly inserted values.
+
+=== Lists
+
+[NOTE]
+.Note
+====
+As mentioned above and further discussed at the end of this section,
+lists have limitations and specific performance considerations that you
+should take into account before using them. In general, if you can use a
+xref:cql/types.adoc#sets[set] instead of list, always prefer a set.
+====
+
+A `list` is a (sorted) collection of non-unique values where
+elements are ordered by there position in the list. You can define and
+insert a list with:
+
+[source,cql]
+----
+include::example$CQL/list.cql[]
+----
+
+Further, lists support:
+
+* Appending and prepending values to a list:
++
+[source,cql]
+----
+include::example$CQL/update_list.cql[]
+----
+
+[WARNING]
+.Warning
+====
+The append and prepend operations are not idempotent by nature. So in
+particular, if one of these operation timeout, then retrying the
+operation is not safe and it may (or may not) lead to
+appending/prepending the value twice.
+====
+
+* Setting the value at a particular position in a list that has a pre-existing element for that position. An error
+will be thrown if the list does not have the position.:
++
+[source,cql]
+----
+include::example$CQL/update_particular_list_element.cql[]
+----
+* Removing an element by its position in the list that has a pre-existing element for that position. An error
+will be thrown if the list does not have the position. Further, as the operation removes an
+element from the list, the list size will decrease by one element, shifting
+the position of all the following elements one forward:
++
+[source,cql]
+----
+include::example$CQL/delete_element_list.cql[]
+----
+
+* Deleting _all_ the occurrences of particular values in the list (if a
+particular element doesn't occur at all in the list, it is simply
+ignored and no error is thrown):
++
+[source,cql]
+----
+include::example$CQL/delete_all_elements_list.cql[]
+----
+
+[WARNING]
+.Warning
+====
+Setting and removing an element by position and removing occurences of
+particular values incur an internal _read-before-write_. These operations will
+run slowly and use more resources than usual updates (with the
+exclusion of conditional write that have their own cost).
+====
+
+Lastly, for xref:cql/types.adoc#lists[lists], TTLs only apply to newly inserted values.
+
+[[udts]]
+== User-Defined Types (UDTs)
+
+CQL support the definition of user-defined types (UDTs). Such a
+type can be created, modified and removed using the
+`create_type_statement`, `alter_type_statement` and
+`drop_type_statement` described below. But once created, a UDT is simply
+referred to by its name:
+
+[source, bnf]
+----
+include::example$BNF/udt.bnf[]
+----
+
+=== Creating a UDT
+
+Creating a new user-defined type is done using a `CREATE TYPE` statement
+defined by:
+
+[source, bnf]
+----
+include::example$BNF/create_type.bnf[]
+----
+
+A UDT has a name (used to declared columns of that type) and is a set of
+named and typed fields. Fields name can be any type, including
+collections or other UDT. For instance:
+
+[source,cql]
+----
+include::example$CQL/udt.cql[]
+----
+
+Things to keep in mind about UDTs:
+
+* Attempting to create an already existing type will result in an error
+unless the `IF NOT EXISTS` option is used. If it is used, the statement
+will be a no-op if the type already exists.
+* A type is intrinsically bound to the keyspace in which it is created,
+and can only be used in that keyspace. At creation, if the type name is
+prefixed by a keyspace name, it is created in that keyspace. Otherwise,
+it is created in the current keyspace.
+* As of Cassandra , UDT have to be frozen in most cases, hence the
+`frozen` in the table definition above. Please see the section
+on xref:cql/types.adoc#frozen[frozen] for more details.
+
+=== UDT literals
+
+Once a used-defined type has been created, value can be input using a
+UDT literal:
+
+[source,bnf]
+----
+include::example$BNF/udt_literal.bnf[]
+----
+
+In other words, a UDT literal is like a xref:cql/types.adoc#maps[map]` literal but its
+keys are the names of the fields of the type. For instance, one could
+insert into the table define in the previous section using:
+
+[source,cql]
+----
+include::example$CQL/insert_udt.cql[]
+----
+
+To be valid, a UDT literal can only include fields defined by the
+type it is a literal of, but it can omit some fields (these will be set to `NULL`).
+
+=== Altering a UDT
+
+An existing user-defined type can be modified using an `ALTER TYPE`
+statement:
+
+[source,bnf]
+----
+include::example$BNF/alter_udt_statement.bnf[]
+----
+
+You can:
+
+* Add a new field to the type (`ALTER TYPE address ADD country text`).
+That new field will be `NULL` for any values of the type created before
+the addition.
+* Rename the fields of the type.
+
+[source,cql]
+----
+include::example$CQL/rename_udt_field.cql[]
+----
+
+=== Dropping a UDT
+
+You can drop an existing user-defined type using a `DROP TYPE`
+statement:
+
+[source,bnf]
+----
+include::example$BNF/drop_udt_statement.bnf[]
+----
+
+Dropping a type results in the immediate, irreversible removal of that
+type. However, attempting to drop a type that is still in use by another
+type, table or function will result in an error.
+
+If the type dropped does not exist, an error will be returned unless
+`IF EXISTS` is used, in which case the operation is a no-op.
+
+== Tuples
+
+CQL also support tuples and tuple types (where the elements can be of
+different types). Functionally, tuples can be though as anonymous UDT
+with anonymous fields. Tuple types and tuple literals are defined by:
+
+[source,bnf]
+----
+include::example$BNF/tuple.bnf[]
+----
+
+and can be created:
+
+[source,cql]
+----
+include::example$CQL/tuple.cql[]
+----
+
+Unlike other composed types, like collections and UDTs, a tuple is always
+`frozen ` (without the need of the `frozen` keyword)
+and it is not possible to update only some elements of a tuple (without
+updating the whole tuple). Also, a tuple literal should always have the
+same number of value than declared in the type it is a tuple of (some of
+those values can be null but they need to be explicitly declared as so).
+
+== Custom Types
+
+[NOTE]
+.Note
+====
+Custom types exists mostly for backward compatibility purposes and their
+usage is discouraged. Their usage is complex, not user friendly and the
+other provided types, particularly xref:cql/types.adoc#udts[user-defined types], should
+almost always be enough.
+====
+
+A custom type is defined by:
+
+[source,bnf]
+----
+include::example$BNF/custom_type.bnf[]
+----
+
+A custom type is a `string` that contains the name of Java class that
+extends the server side `AbstractType` class and that can be loaded by
+Cassandra (it should thus be in the `CLASSPATH` of every node running
+Cassandra). That class will define what values are valid for the type
+and how the time sorts when used for a clustering column. For any other
+purpose, a value of a custom type is the same than that of a `blob`, and
+can in particular be input using the `blob` literal syntax.
diff --git a/doc/source/data_modeling/data_modeling_conceptual.rst b/doc/modules/cassandra/pages/data_modeling/data_modeling_conceptual.adoc
similarity index 52%
rename from doc/source/data_modeling/data_modeling_conceptual.rst
rename to doc/modules/cassandra/pages/data_modeling/data_modeling_conceptual.adoc
index 8749b799ea3..c1e1027f6dc 100644
--- a/doc/source/data_modeling/data_modeling_conceptual.rst
+++ b/doc/modules/cassandra/pages/data_modeling/data_modeling_conceptual.adoc
@@ -1,33 +1,14 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements. See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership. The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License. You may obtain a copy of the License at
-..
-.. http://www.apache.org/licenses/LICENSE-2.0
-..
-.. Unless required by applicable law or agreed to in writing, software
-.. distributed under the License is distributed on an "AS IS" BASIS,
-.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-.. See the License for the specific language governing permissions and
-.. limitations under the License.
-
-.. conceptual_data_modeling
-
-Conceptual Data Modeling
-^^^^^^^^^^^^^^^^^^^^^^^^
+= Conceptual Data Modeling
First, let’s create a simple domain model that is easy to understand in
-the relational world, and then see how you might map it from a relational
-to a distributed hashtable model in Cassandra.
+the relational world, and then see how you might map it from a
+relational to a distributed hashtable model in Cassandra.
-Let's use an example that is complex enough
-to show the various data structures and design patterns, but not
-something that will bog you down with details. Also, a domain that’s
-familiar to everyone will allow you to concentrate on how to work with
-Cassandra, not on what the application domain is all about.
+Let's use an example that is complex enough to show the various data
+structures and design patterns, but not something that will bog you down
+with details. Also, a domain that’s familiar to everyone will allow you
+to concentrate on how to work with Cassandra, not on what the
+application domain is all about.
For example, let's use a domain that is easily understood and that
everyone can relate to: making hotel reservations.
@@ -49,7 +30,7 @@ underlined. Relationships between entities are represented as diamonds,
and the connectors between the relationship and each entity show the
multiplicity of the connection.
-.. image:: images/data_modeling_hotel_erd.png
+image::data_modeling_hotel_erd.png[image]
Obviously, in the real world, there would be many more considerations
and much more complexity. For example, hotel rates are notoriously
@@ -58,6 +39,6 @@ you’re defining something complex enough to be interesting and touch on
the important points, but simple enough to maintain the focus on
learning Cassandra.
-*Material adapted from Cassandra, The Definitive Guide. Published by
-O'Reilly Media, Inc. Copyright © 2020 Jeff Carpenter, Eben Hewitt.
-All rights reserved. Used with permission.*
\ No newline at end of file
+_Material adapted from Cassandra, The Definitive Guide. Published by
+O'Reilly Media, Inc. Copyright © 2020 Jeff Carpenter, Eben Hewitt. All
+rights reserved. Used with permission._
diff --git a/doc/modules/cassandra/pages/data_modeling/data_modeling_logical.adoc b/doc/modules/cassandra/pages/data_modeling/data_modeling_logical.adoc
new file mode 100644
index 00000000000..bcbfa781b09
--- /dev/null
+++ b/doc/modules/cassandra/pages/data_modeling/data_modeling_logical.adoc
@@ -0,0 +1,195 @@
+= Logical Data Modeling
+
+Now that you have defined your queries, you’re ready to begin designing
+Cassandra tables. First, create a logical model containing a table for
+each query, capturing entities and relationships from the conceptual
+model.
+
+To name each table, you’ll identify the primary entity type for which
+you are querying and use that to start the entity name. If you are
+querying by attributes of other related entities, append those to the
+table name, separated with `_by_`. For example, `hotels_by_poi`.
+
+Next, you identify the primary key for the table, adding partition key
+columns based on the required query attributes, and clustering columns
+in order to guarantee uniqueness and support desired sort ordering.
+
+The design of the primary key is extremely important, as it will
+determine how much data will be stored in each partition and how that
+data is organized on disk, which in turn will affect how quickly
+Cassandra processes reads.
+
+Complete each table by adding any additional attributes identified by
+the query. If any of these additional attributes are the same for every
+instance of the partition key, mark the column as static.
+
+Now that was a pretty quick description of a fairly involved process, so
+it will be worthwhile to work through a detailed example. First, let’s
+introduce a notation that you can use to represent logical models.
+
+Several individuals within the Cassandra community have proposed
+notations for capturing data models in diagrammatic form. This document
+uses a notation popularized by Artem Chebotko which provides a simple,
+informative way to visualize the relationships between queries and
+tables in your designs. This figure shows the Chebotko notation for a
+logical data model.
+
+image::data_modeling_chebotko_logical.png[image]
+
+Each table is shown with its title and a list of columns. Primary key
+columns are identified via symbols such as *K* for partition key columns
+and **C**↑ or **C**↓ to represent clustering columns. Lines are shown
+entering tables or between tables to indicate the queries that each
+table is designed to support.
+
+== Hotel Logical Data Model
+
+The figure below shows a Chebotko logical data model for the queries
+involving hotels, points of interest, rooms, and amenities. One thing
+you'll notice immediately is that the Cassandra design doesn’t include
+dedicated tables for rooms or amenities, as you had in the relational
+design. This is because the workflow didn’t identify any queries
+requiring this direct access.
+
+image::data_modeling_hotel_logical.png[image]
+
+Let’s explore the details of each of these tables.
+
+The first query Q1 is to find hotels near a point of interest, so you’ll
+call this table `hotels_by_poi`. Searching by a named point of interest
+is a clue that the point of interest should be a part of the primary
+key. Let’s reference the point of interest by name, because according to
+the workflow that is how users will start their search.
+
+You’ll note that you certainly could have more than one hotel near a
+given point of interest, so you’ll need another component in the primary
+key in order to make sure you have a unique partition for each hotel. So
+you add the hotel key as a clustering column.
+
+An important consideration in designing your table’s primary key is
+making sure that it defines a unique data element. Otherwise you run the
+risk of accidentally overwriting data.
+
+Now for the second query (Q2), you’ll need a table to get information
+about a specific hotel. One approach would have been to put all of the
+attributes of a hotel in the `hotels_by_poi` table, but you added only
+those attributes that were required by the application workflow.
+
+From the workflow diagram, you know that the `hotels_by_poi` table is
+used to display a list of hotels with basic information on each hotel,
+and the application knows the unique identifiers of the hotels returned.
+When the user selects a hotel to view details, you can then use Q2,
+which is used to obtain details about the hotel. Because you already
+have the `hotel_id` from Q1, you use that as a reference to the hotel
+you’re looking for. Therefore the second table is just called `hotels`.
+
+Another option would have been to store a set of `poi_names` in the
+hotels table. This is an equally valid approach. You’ll learn through
+experience which approach is best for your application.
+
+Q3 is just a reverse of Q1—looking for points of interest near a hotel,
+rather than hotels near a point of interest. This time, however, you
+need to access the details of each point of interest, as represented by
+the `pois_by_hotel` table. As previously, you add the point of interest
+name as a clustering key to guarantee uniqueness.
+
+At this point, let’s now consider how to support query Q4 to help the
+user find available rooms at a selected hotel for the nights they are
+interested in staying. Note that this query involves both a start date
+and an end date. Because you’re querying over a range instead of a
+single date, you know that you’ll need to use the date as a clustering
+key. Use the `hotel_id` as a primary key to group room data for each
+hotel on a single partition, which should help searches be super fast.
+Let’s call this the `available_rooms_by_hotel_date` table.
+
+To support searching over a range, use `clustering columns
+` to store attributes that you need to access in a
+range query. Remember that the order of the clustering columns is
+important.
+
+The design of the `available_rooms_by_hotel_date` table is an instance
+of the *wide partition* pattern. This pattern is sometimes called the
+*wide row* pattern when discussing databases that support similar
+models, but wide partition is a more accurate description from a
+Cassandra perspective. The essence of the pattern is to group multiple
+related rows in a partition in order to support fast access to multiple
+rows within the partition in a single query.
+
+In order to round out the shopping portion of the data model, add the
+`amenities_by_room` table to support Q5. This will allow users to view
+the amenities of one of the rooms that is available for the desired stay
+dates.
+
+== Reservation Logical Data Model
+
+Now let's switch gears to look at the reservation queries. The figure
+shows a logical data model for reservations. You’ll notice that these
+tables represent a denormalized design; the same data appears in
+multiple tables, with differing keys.
+
+image::data_modeling_reservation_logical.png[image]
+
+In order to satisfy Q6, the `reservations_by_guest` table can be used to
+look up the reservation by guest name. You could envision query Q7 being
+used on behalf of a guest on a self-serve website or a call center agent
+trying to assist the guest. Because the guest name might not be unique,
+you include the guest ID here as a clustering column as well.
+
+Q8 and Q9 in particular help to remind you to create queries that
+support various stakeholders of the application, not just customers but
+staff as well, and perhaps even the analytics team, suppliers, and so
+on.
+
+The hotel staff might wish to see a record of upcoming reservations by
+date in order to get insight into how the hotel is performing, such as
+what dates the hotel is sold out or undersold. Q8 supports the retrieval
+of reservations for a given hotel by date.
+
+Finally, you create a `guests` table. This provides a single location
+that used to store guest information. In this case, you specify a
+separate unique identifier for guest records, as it is not uncommon for
+guests to have the same name. In many organizations, a customer database
+such as the `guests` table would be part of a separate customer
+management application, which is why other guest access patterns were
+omitted from the example.
+
+== Patterns and Anti-Patterns
+
+As with other types of software design, there are some well-known
+patterns and anti-patterns for data modeling in Cassandra. You’ve
+already used one of the most common patterns in this hotel model—the
+wide partition pattern.
+
+The *time series* pattern is an extension of the wide partition pattern.
+In this pattern, a series of measurements at specific time intervals are
+stored in a wide partition, where the measurement time is used as part
+of the partition key. This pattern is frequently used in domains
+including business analysis, sensor data management, and scientific
+experiments.
+
+The time series pattern is also useful for data other than measurements.
+Consider the example of a banking application. You could store each
+customer’s balance in a row, but that might lead to a lot of read and
+write contention as various customers check their balance or make
+transactions. You’d probably be tempted to wrap a transaction around
+writes just to protect the balance from being updated in error. In
+contrast, a time series–style design would store each transaction as a
+timestamped row and leave the work of calculating the current balance to
+the application.
+
+One design trap that many new users fall into is attempting to use
+Cassandra as a queue. Each item in the queue is stored with a timestamp
+in a wide partition. Items are appended to the end of the queue and read
+from the front, being deleted after they are read. This is a design that
+seems attractive, especially given its apparent similarity to the time
+series pattern. The problem with this approach is that the deleted items
+are now `tombstones ` that Cassandra must scan past in
+order to read from the front of the queue. Over time, a growing number
+of tombstones begins to degrade read performance.
+
+The queue anti-pattern serves as a reminder that any design that relies
+on the deletion of data is potentially a poorly performing design.
+
+_Material adapted from Cassandra, The Definitive Guide. Published by
+O'Reilly Media, Inc. Copyright © 2020 Jeff Carpenter, Eben Hewitt. All
+rights reserved. Used with permission._
diff --git a/doc/modules/cassandra/pages/data_modeling/data_modeling_physical.adoc b/doc/modules/cassandra/pages/data_modeling/data_modeling_physical.adoc
new file mode 100644
index 00000000000..09340670145
--- /dev/null
+++ b/doc/modules/cassandra/pages/data_modeling/data_modeling_physical.adoc
@@ -0,0 +1,96 @@
+= Physical Data Modeling
+
+Once you have a logical data model defined, creating the physical model
+is a relatively simple process.
+
+You walk through each of the logical model tables, assigning types to
+each item. You can use any valid `CQL data type `, including
+the basic types, collections, and user-defined types. You may identify
+additional user-defined types that can be created to simplify your
+design.
+
+After you’ve assigned data types, you analyze the model by performing
+size calculations and testing out how the model works. You may make some
+adjustments based on your findings. Once again let's cover the data
+modeling process in more detail by working through an example.
+
+Before getting started, let’s look at a few additions to the Chebotko
+notation for physical data models. To draw physical models, you need to
+be able to add the typing information for each column. This figure shows
+the addition of a type for each column in a sample table.
+
+image::data_modeling_chebotko_physical.png[image]
+
+The figure includes a designation of the keyspace containing each table
+and visual cues for columns represented using collections and
+user-defined types. Note the designation of static columns and secondary
+index columns. There is no restriction on assigning these as part of a
+logical model, but they are typically more of a physical data modeling
+concern.
+
+== Hotel Physical Data Model
+
+Now let’s get to work on the physical model. First, you need keyspaces
+to contain the tables. To keep the design relatively simple, create a
+`hotel` keyspace to contain tables for hotel and availability data, and
+a `reservation` keyspace to contain tables for reservation and guest
+data. In a real system, you might divide the tables across even more
+keyspaces in order to separate concerns.
+
+For the `hotels` table, use Cassandra’s `text` type to represent the
+hotel’s `id`. For the address, create an `address` user defined type.
+Use the `text` type to represent the phone number, as there is
+considerable variance in the formatting of numbers between countries.
+
+While it would make sense to use the `uuid` type for attributes such as
+the `hotel_id`, this document uses mostly `text` attributes as
+identifiers, to keep the samples simple and readable. For example, a
+common convention in the hospitality industry is to reference properties
+by short codes like "AZ123" or "NY229". This example uses these values
+for `hotel_ids`, while acknowledging they are not necessarily globally
+unique.
+
+You’ll find that it’s often helpful to use unique IDs to uniquely
+reference elements, and to use these `uuids` as references in tables
+representing other entities. This helps to minimize coupling between
+different entity types. This may prove especially effective if you are
+using a microservice architectural style for your application, in which
+there are separate services responsible for each entity type.
+
+As you work to create physical representations of various tables in the
+logical hotel data model, you use the same approach. The resulting
+design is shown in this figure:
+
+image::data_modeling_hotel_physical.png[image]
+
+Note that the `address` type is also included in the design. It is
+designated with an asterisk to denote that it is a user-defined type,
+and has no primary key columns identified. This type is used in the
+`hotels` and `hotels_by_poi` tables.
+
+User-defined types are frequently used to help reduce duplication of
+non-primary key columns, as was done with the `address` user-defined
+type. This can reduce complexity in the design.
+
+Remember that the scope of a UDT is the keyspace in which it is defined.
+To use `address` in the `reservation` keyspace defined below design,
+you’ll have to declare it again. This is just one of the many trade-offs
+you have to make in data model design.
+
+== Reservation Physical Data Model
+
+Now, let’s examine reservation tables in the design. Remember that the
+logical model contained three denormalized tables to support queries for
+reservations by confirmation number, guest, and hotel and date. For the
+first iteration of your physical data model design, assume you're going
+to manage this denormalization manually. Note that this design could be
+revised to use Cassandra’s (experimental) materialized view feature.
+
+image::data_modeling_reservation_physical.png[image]
+
+Note that the `address` type is reproduced in this keyspace and
+`guest_id` is modeled as a `uuid` type in all of the tables.
+
+_Material adapted from Cassandra, The Definitive Guide. Published by
+O'Reilly Media, Inc. Copyright © 2020 Jeff Carpenter, Eben Hewitt. All
+rights reserved. Used with permission._
diff --git a/doc/modules/cassandra/pages/data_modeling/data_modeling_queries.adoc b/doc/modules/cassandra/pages/data_modeling/data_modeling_queries.adoc
new file mode 100644
index 00000000000..21f98018122
--- /dev/null
+++ b/doc/modules/cassandra/pages/data_modeling/data_modeling_queries.adoc
@@ -0,0 +1,60 @@
+= Defining Application Queries
+
+Let’s try the query-first approach to start designing the data model for
+a hotel application. The user interface design for the application is
+often a great artifact to use to begin identifying queries. Let’s assume
+that you’ve talked with the project stakeholders and your UX designers
+have produced user interface designs or wireframes for the key use
+cases. You’ll likely have a list of shopping queries like the following:
+
+* Q1. Find hotels near a given point of interest.
+* Q2. Find information about a given hotel, such as its name and
+location.
+* Q3. Find points of interest near a given hotel.
+* Q4. Find an available room in a given date range.
+* Q5. Find the rate and amenities for a room.
+
+It is often helpful to be able to refer to queries by a shorthand number
+rather that explaining them in full. The queries listed here are
+numbered Q1, Q2, and so on, which is how they are referenced in diagrams
+throughout the example.
+
+Now if the application is to be a success, you’ll certainly want
+customers to be able to book reservations at hotels. This includes steps
+such as selecting an available room and entering their guest
+information. So clearly you will also need some queries that address the
+reservation and guest entities from the conceptual data model. Even
+here, however, you’ll want to think not only from the customer
+perspective in terms of how the data is written, but also in terms of
+how the data will be queried by downstream use cases.
+
+You natural tendency as might be to focus first on designing the tables
+to store reservation and guest records, and only then start thinking
+about the queries that would access them. You may have felt a similar
+tension already when discussing the shopping queries before, thinking
+“but where did the hotel and point of interest data come from?” Don’t
+worry, you will see soon enough. Here are some queries that describe how
+users will access reservations:
+
+* Q6. Lookup a reservation by confirmation number.
+* Q7. Lookup a reservation by hotel, date, and guest name.
+* Q8. Lookup all reservations by guest name.
+* Q9. View guest details.
+
+All of the queries are shown in the context of the workflow of the
+application in the figure below. Each box on the diagram represents a
+step in the application workflow, with arrows indicating the flows
+between steps and the associated query. If you’ve modeled the
+application well, each step of the workflow accomplishes a task that
+“unlocks” subsequent steps. For example, the “View hotels near POI” task
+helps the application learn about several hotels, including their unique
+keys. The key for a selected hotel may be used as part of Q2, in order
+to obtain detailed description of the hotel. The act of booking a room
+creates a reservation record that may be accessed by the guest and hotel
+staff at a later time through various additional queries.
+
+image::data_modeling_hotel_queries.png[image]
+
+_Material adapted from Cassandra, The Definitive Guide. Published by
+O'Reilly Media, Inc. Copyright © 2020 Jeff Carpenter, Eben Hewitt. All
+rights reserved. Used with permission._
diff --git a/doc/source/data_modeling/data_modeling_rdbms.rst b/doc/modules/cassandra/pages/data_modeling/data_modeling_rdbms.adoc
similarity index 74%
rename from doc/source/data_modeling/data_modeling_rdbms.rst
rename to doc/modules/cassandra/pages/data_modeling/data_modeling_rdbms.adoc
index 7d67d69fcc0..b478df14a1c 100644
--- a/doc/source/data_modeling/data_modeling_rdbms.rst
+++ b/doc/modules/cassandra/pages/data_modeling/data_modeling_rdbms.adoc
@@ -1,46 +1,25 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements. See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership. The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License. You may obtain a copy of the License at
-..
-.. http://www.apache.org/licenses/LICENSE-2.0
-..
-.. Unless required by applicable law or agreed to in writing, software
-.. distributed under the License is distributed on an "AS IS" BASIS,
-.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-.. See the License for the specific language governing permissions and
-.. limitations under the License.
-
-RDBMS Design
-============
+= RDBMS Design
When you set out to build a new data-driven application that will use a
relational database, you might start by modeling the domain as a set of
properly normalized tables and use foreign keys to reference related
data in other tables.
-The figure below shows how you might represent the data storage for your application
-using a relational database model. The relational model includes a
-couple of “join” tables in order to realize the many-to-many
+The figure below shows how you might represent the data storage for your
+application using a relational database model. The relational model
+includes a couple of “join” tables in order to realize the many-to-many
relationships from the conceptual model of hotels-to-points of interest,
rooms-to-amenities, rooms-to-availability, and guests-to-rooms (via a
reservation).
-.. image:: images/data_modeling_hotel_relational.png
+image::data_modeling_hotel_relational.png[image]
-.. design_differences_between_rdbms_and_cassandra
-
-Design Differences Between RDBMS and Cassandra
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+== Design Differences Between RDBMS and Cassandra
Let’s take a minute to highlight some of the key differences in doing
ata modeling for Cassandra versus a relational database.
-No joins
-~~~~~~~~
+=== No joins
You cannot perform joins in Cassandra. If you have designed a data model
and find that you need something like a join, you’ll have to either do
@@ -49,8 +28,7 @@ represents the join results for you. This latter option is preferred in
Cassandra data modeling. Performing joins on the client should be a very
rare case; you really want to duplicate (denormalize) the data instead.
-No referential integrity
-~~~~~~~~~~~~~~~~~~~~~~~~
+=== No referential integrity
Although Cassandra supports features such as lightweight transactions
and batches, Cassandra itself has no concept of referential integrity
@@ -60,8 +38,7 @@ But Cassandra does not enforce this. It is still a common design
requirement to store IDs related to other entities in your tables, but
operations such as cascading deletes are not available.
-Denormalization
-~~~~~~~~~~~~~~~
+=== Denormalization
In relational database design, you are often taught the importance of
normalization. This is not an advantage when working with Cassandra
@@ -93,16 +70,14 @@ perfectly normal. It’s not required if your data model is simple. But
don’t be afraid of it.
Historically, denormalization in Cassandra has required designing and
-managing multiple tables using techniques described in this documentation.
-Beginning with the 3.0 release, Cassandra provides a feature known
-as :ref:`materialized views `
-which allows you to create multiple denormalized
-views of data based on a base table design. Cassandra manages
-materialized views on the server, including the work of keeping the
-views in sync with the table.
+managing multiple tables using techniques described in this
+documentation. Beginning with the 3.0 release, Cassandra provides a
+feature known as `materialized views ` which allows
+you to create multiple denormalized views of data based on a base table
+design. Cassandra manages materialized views on the server, including
+the work of keeping the views in sync with the table.
-Query-first design
-~~~~~~~~~~~~~~~~~~
+=== Query-first design
Relational modeling, in simple terms, means that you start from the
conceptual domain and then represent the nouns in the domain in tables.
@@ -134,8 +109,7 @@ over time, and then you’ll have to work to update your data set. But
this is no different from defining the wrong tables, or needing
additional tables, in an RDBMS.
-Designing for optimal storage
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+=== Designing for optimal storage
In a relational database, it is frequently transparent to the user how
tables are stored on disk, and it is rare to hear of recommendations
@@ -150,11 +124,10 @@ in order to satisfy a given query. Because the partition is a unit of
storage that does not get divided across nodes, a query that searches a
single partition will typically yield the best performance.
-Sorting is a design decision
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+=== Sorting is a design decision
In an RDBMS, you can easily change the order in which records are
-returned to you by using ``ORDER BY`` in your query. The default sort
+returned to you by using `ORDER BY` in your query. The default sort
order is not configurable; by default, records are returned in the order
in which they are written. If you want to change the order, you just
modify your query, and you can sort by any list of columns.
@@ -162,10 +135,10 @@ modify your query, and you can sort by any list of columns.
In Cassandra, however, sorting is treated differently; it is a design
decision. The sort order available on queries is fixed, and is
determined entirely by the selection of clustering columns you supply in
-the ``CREATE TABLE`` command. The CQL ``SELECT`` statement does support
-``ORDER BY`` semantics, but only in the order specified by the
-clustering columns.
+the `CREATE TABLE` command. The CQL `SELECT` statement does support
+`ORDER BY` semantics, but only in the order specified by the clustering
+columns.
-*Material adapted from Cassandra, The Definitive Guide. Published by
-O'Reilly Media, Inc. Copyright © 2020 Jeff Carpenter, Eben Hewitt.
-All rights reserved. Used with permission.*
\ No newline at end of file
+_Material adapted from Cassandra, The Definitive Guide. Published by
+O'Reilly Media, Inc. Copyright © 2020 Jeff Carpenter, Eben Hewitt. All
+rights reserved. Used with permission._
diff --git a/doc/modules/cassandra/pages/data_modeling/data_modeling_refining.adoc b/doc/modules/cassandra/pages/data_modeling/data_modeling_refining.adoc
new file mode 100644
index 00000000000..09f16da094b
--- /dev/null
+++ b/doc/modules/cassandra/pages/data_modeling/data_modeling_refining.adoc
@@ -0,0 +1,201 @@
+= Evaluating and Refining Data Models
+
+Once you’ve created a physical model, there are some steps you’ll want
+to take to evaluate and refine table designs to help ensure optimal
+performance.
+
+== Calculating Partition Size
+
+The first thing that you want to look for is whether your tables will
+have partitions that will be overly large, or to put it another way, too
+wide. Partition size is measured by the number of cells (values) that
+are stored in the partition. Cassandra’s hard limit is 2 billion cells
+per partition, but you’ll likely run into performance issues before
+reaching that limit.
+
+In order to calculate the size of partitions, use the following formula:
+
+[latexmath]
+++++
+\[N_v = N_r (N_c - N_{pk} - N_s) + N_s\]
+++++
+
+The number of values (or cells) in the partition (N~v~) is equal to the
+number of static columns (N~s~) plus the product of the number of rows
+(N~r~) and the number of of values per row. The number of values per row
+is defined as the number of columns (N~c~) minus the number of primary
+key columns (N~pk~) and static columns (N~s~).
+
+The number of columns tends to be relatively static, although it is
+possible to alter tables at runtime. For this reason, a primary driver
+of partition size is the number of rows in the partition. This is a key
+factor that you must consider in determining whether a partition has the
+potential to get too large. Two billion values sounds like a lot, but in
+a sensor system where tens or hundreds of values are measured every
+millisecond, the number of values starts to add up pretty fast.
+
+Let’s take a look at one of the tables to analyze the partition size.
+Because it has a wide partition design with one partition per hotel,
+look at the `available_rooms_by_hotel_date` table. The table has four
+columns total (N~c~ = 4), including three primary key columns (N~pk~ =
+3) and no static columns (N~s~ = 0). Plugging these values into the
+formula, the result is:
+
+[latexmath]
+++++
+\[N_v = N_r (4 - 3 - 0) + 0 = 1N_r\]
+++++
+
+Therefore the number of values for this table is equal to the number of
+rows. You still need to determine a number of rows. To do this, make
+estimates based on the application design. The table is storing a record
+for each room, in each of hotel, for every night. Let's assume the
+system will be used to store two years of inventory at a time, and there
+are 5,000 hotels in the system, with an average of 100 rooms in each
+hotel.
+
+Since there is a partition for each hotel, the estimated number of rows
+per partition is as follows:
+
+[latexmath]
+++++
+\[N_r = 100 rooms/hotel \times 730 days = 73,000 rows\]
+++++
+
+This relatively small number of rows per partition is not going to get
+you in too much trouble, but if you start storing more dates of
+inventory, or don’t manage the size of the inventory well using TTL, you
+could start having issues. You still might want to look at breaking up
+this large partition, which you'll see how to do shortly.
+
+When performing sizing calculations, it is tempting to assume the
+nominal or average case for variables such as the number of rows.
+Consider calculating the worst case as well, as these sorts of
+predictions have a way of coming true in successful systems.
+
+== Calculating Size on Disk
+
+In addition to calculating the size of a partition, it is also an
+excellent idea to estimate the amount of disk space that will be
+required for each table you plan to store in the cluster. In order to
+determine the size, use the following formula to determine the size S~t~
+of a partition:
+
+[latexmath]
+++++
+\[S_t = \displaystyle\sum_i sizeOf\big (c_{k_i}\big) + \displaystyle\sum_j sizeOf\big(c_{s_j}\big) + N_r\times \bigg(\displaystyle\sum_k sizeOf\big(c_{r_k}\big) + \displaystyle\sum_l sizeOf\big(c_{c_l}\big)\bigg) +\]
+++++
+
+[latexmath]
+++++
+\[N_v\times sizeOf\big(t_{avg}\big)\]
+++++
+
+This is a bit more complex than the previous formula, but let's break it
+down a bit at a time. Let’s take a look at the notation first:
+
+* In this formula, c~k~ refers to partition key columns, c~s~ to static
+columns, c~r~ to regular columns, and c~c~ to clustering columns.
+* The term t~avg~ refers to the average number of bytes of metadata
+stored per cell, such as timestamps. It is typical to use an estimate of
+8 bytes for this value.
+* You'll recognize the number of rows N~r~ and number of values N~v~
+from previous calculations.
+* The *sizeOf()* function refers to the size in bytes of the CQL data
+type of each referenced column.
+
+The first term asks you to sum the size of the partition key columns.
+For this example, the `available_rooms_by_hotel_date` table has a single
+partition key column, the `hotel_id`, which is of type `text`. Assuming
+that hotel identifiers are simple 5-character codes, you have a 5-byte
+value, so the sum of the partition key column sizes is 5 bytes.
+
+The second term asks you to sum the size of the static columns. This
+table has no static columns, so the size is 0 bytes.
+
+The third term is the most involved, and for good reason—it is
+calculating the size of the cells in the partition. Sum the size of the
+clustering columns and regular columns. The two clustering columns are
+the `date`, which is 4 bytes, and the `room_number`, which is a 2-byte
+short integer, giving a sum of 6 bytes. There is only a single regular
+column, the boolean `is_available`, which is 1 byte in size. Summing the
+regular column size (1 byte) plus the clustering column size (6 bytes)
+gives a total of 7 bytes. To finish up the term, multiply this value by
+the number of rows (73,000), giving a result of 511,000 bytes (0.51 MB).
+
+The fourth term is simply counting the metadata that that Cassandra
+stores for each cell. In the storage format used by Cassandra 3.0 and
+later, the amount of metadata for a given cell varies based on the type
+of data being stored, and whether or not custom timestamp or TTL values
+are specified for individual cells. For this table, reuse the number of
+values from the previous calculation (73,000) and multiply by 8, which
+gives 0.58 MB.
+
+Adding these terms together, you get a final estimate:
+
+[latexmath]
+++++
+\[Partition size = 16 bytes + 0 bytes + 0.51 MB + 0.58 MB = 1.1 MB\]
+++++
+
+This formula is an approximation of the actual size of a partition on
+disk, but is accurate enough to be quite useful. Remembering that the
+partition must be able to fit on a single node, it looks like the table
+design will not put a lot of strain on disk storage.
+
+Cassandra’s storage engine was re-implemented for the 3.0 release,
+including a new format for SSTable files. The previous format stored a
+separate copy of the clustering columns as part of the record for each
+cell. The newer format eliminates this duplication, which reduces the
+size of stored data and simplifies the formula for computing that size.
+
+Keep in mind also that this estimate only counts a single replica of
+data. You will need to multiply the value obtained here by the number of
+partitions and the number of replicas specified by the keyspace’s
+replication strategy in order to determine the total required total
+capacity for each table. This will come in handy when you plan your
+cluster.
+
+== Breaking Up Large Partitions
+
+As discussed previously, the goal is to design tables that can provide
+the data you need with queries that touch a single partition, or failing
+that, the minimum possible number of partitions. However, as shown in
+the examples, it is quite possible to design wide partition-style tables
+that approach Cassandra’s built-in limits. Performing sizing analysis on
+tables may reveal partitions that are potentially too large, either in
+number of values, size on disk, or both.
+
+The technique for splitting a large partition is straightforward: add an
+additional column to the partition key. In most cases, moving one of the
+existing columns into the partition key will be sufficient. Another
+option is to introduce an additional column to the table to act as a
+sharding key, but this requires additional application logic.
+
+Continuing to examine the available rooms example, if you add the `date`
+column to the partition key for the `available_rooms_by_hotel_date`
+table, each partition would then represent the availability of rooms at
+a specific hotel on a specific date. This will certainly yield
+partitions that are significantly smaller, perhaps too small, as the
+data for consecutive days will likely be on separate nodes.
+
+Another technique known as *bucketing* is often used to break the data
+into moderate-size partitions. For example, you could bucketize the
+`available_rooms_by_hotel_date` table by adding a `month` column to the
+partition key, perhaps represented as an integer. The comparision with
+the original design is shown in the figure below. While the `month`
+column is partially duplicative of the `date`, it provides a nice way of
+grouping related data in a partition that will not get too large.
+
+image::data_modeling_hotel_bucketing.png[image]
+
+If you really felt strongly about preserving a wide partition design,
+you could instead add the `room_id` to the partition key, so that each
+partition would represent the availability of the room across all dates.
+Because there was no query identified that involves searching
+availability of a specific room, the first or second design approach is
+most suitable to the application needs.
+
+_Material adapted from Cassandra, The Definitive Guide. Published by
+O'Reilly Media, Inc. Copyright © 2020 Jeff Carpenter, Eben Hewitt. All
+rights reserved. Used with permission._
diff --git a/doc/modules/cassandra/pages/data_modeling/data_modeling_schema.adoc b/doc/modules/cassandra/pages/data_modeling/data_modeling_schema.adoc
new file mode 100644
index 00000000000..7b0cf5cd355
--- /dev/null
+++ b/doc/modules/cassandra/pages/data_modeling/data_modeling_schema.adoc
@@ -0,0 +1,130 @@
+= Defining Database Schema
+
+Once you have finished evaluating and refining the physical model,
+you’re ready to implement the schema in CQL. Here is the schema for the
+`hotel` keyspace, using CQL’s comment feature to document the query
+pattern supported by each table:
+
+[source,cql]
+----
+CREATE KEYSPACE hotel WITH replication =
+ {‘class’: ‘SimpleStrategy’, ‘replication_factor’ : 3};
+
+CREATE TYPE hotel.address (
+ street text,
+ city text,
+ state_or_province text,
+ postal_code text,
+ country text );
+
+CREATE TABLE hotel.hotels_by_poi (
+ poi_name text,
+ hotel_id text,
+ name text,
+ phone text,
+ address frozen,
+ PRIMARY KEY ((poi_name), hotel_id) )
+ WITH comment = ‘Q1. Find hotels near given poi’
+ AND CLUSTERING ORDER BY (hotel_id ASC) ;
+
+CREATE TABLE hotel.hotels (
+ id text PRIMARY KEY,
+ name text,
+ phone text,
+ address frozen,
+ pois set )
+ WITH comment = ‘Q2. Find information about a hotel’;
+
+CREATE TABLE hotel.pois_by_hotel (
+ poi_name text,
+ hotel_id text,
+ description text,
+ PRIMARY KEY ((hotel_id), poi_name) )
+ WITH comment = Q3. Find pois near a hotel’;
+
+CREATE TABLE hotel.available_rooms_by_hotel_date (
+ hotel_id text,
+ date date,
+ room_number smallint,
+ is_available boolean,
+ PRIMARY KEY ((hotel_id), date, room_number) )
+ WITH comment = ‘Q4. Find available rooms by hotel date’;
+
+CREATE TABLE hotel.amenities_by_room (
+ hotel_id text,
+ room_number smallint,
+ amenity_name text,
+ description text,
+ PRIMARY KEY ((hotel_id, room_number), amenity_name) )
+ WITH comment = ‘Q5. Find amenities for a room’;
+----
+
+Notice that the elements of the partition key are surrounded with
+parentheses, even though the partition key consists of the single column
+`poi_name`. This is a best practice that makes the selection of
+partition key more explicit to others reading your CQL.
+
+Similarly, here is the schema for the `reservation` keyspace:
+
+[source,cql]
+----
+CREATE KEYSPACE reservation WITH replication = {‘class’:
+ ‘SimpleStrategy’, ‘replication_factor’ : 3};
+
+CREATE TYPE reservation.address (
+ street text,
+ city text,
+ state_or_province text,
+ postal_code text,
+ country text );
+
+CREATE TABLE reservation.reservations_by_confirmation (
+ confirm_number text,
+ hotel_id text,
+ start_date date,
+ end_date date,
+ room_number smallint,
+ guest_id uuid,
+ PRIMARY KEY (confirm_number) )
+ WITH comment = ‘Q6. Find reservations by confirmation number’;
+
+CREATE TABLE reservation.reservations_by_hotel_date (
+ hotel_id text,
+ start_date date,
+ end_date date,
+ room_number smallint,
+ confirm_number text,
+ guest_id uuid,
+ PRIMARY KEY ((hotel_id, start_date), room_number) )
+ WITH comment = ‘Q7. Find reservations by hotel and date’;
+
+CREATE TABLE reservation.reservations_by_guest (
+ guest_last_name text,
+ hotel_id text,
+ start_date date,
+ end_date date,
+ room_number smallint,
+ confirm_number text,
+ guest_id uuid,
+ PRIMARY KEY ((guest_last_name), hotel_id) )
+ WITH comment = ‘Q8. Find reservations by guest name’;
+
+CREATE TABLE reservation.guests (
+ guest_id uuid PRIMARY KEY,
+ first_name text,
+ last_name text,
+ title text,
+ emails set,
+ phone_numbers list,
+ addresses map,
+ confirm_number text )
+ WITH comment = ‘Q9. Find guest by ID’;
+----
+
+You now have a complete Cassandra schema for storing data for a hotel
+application.
+
+_Material adapted from Cassandra, The Definitive Guide. Published by
+O'Reilly Media, Inc. Copyright © 2020 Jeff Carpenter, Eben Hewitt. All
+rights reserved. Used with permission._
diff --git a/doc/modules/cassandra/pages/data_modeling/data_modeling_tools.adoc b/doc/modules/cassandra/pages/data_modeling/data_modeling_tools.adoc
new file mode 100644
index 00000000000..0f3556f5b53
--- /dev/null
+++ b/doc/modules/cassandra/pages/data_modeling/data_modeling_tools.adoc
@@ -0,0 +1,44 @@
+= Cassandra Data Modeling Tools
+
+There are several tools available to help you design and manage your
+Cassandra schema and build queries.
+
+* https://hackolade.com/nosqldb.html#cassandra[Hackolade] is a data
+modeling tool that supports schema design for Cassandra and many other
+NoSQL databases. Hackolade supports the unique concepts of CQL such as
+partition keys and clustering columns, as well as data types including
+collections and UDTs. It also provides the ability to create Chebotko
+diagrams.
+* http://kdm.dataview.org/[Kashlev Data Modeler] is a Cassandra data
+modeling tool that automates the data modeling methodology described in
+this documentation, including identifying access patterns, conceptual,
+logical, and physical data modeling, and schema generation. It also
+includes model patterns that you can optionally leverage as a starting
+point for your designs.
+* DataStax DevCenter is a tool for managing schema, executing queries
+and viewing results. While the tool is no longer actively supported, it
+is still popular with many developers and is available as a
+https://academy.datastax.com/downloads[free download]. DevCenter
+features syntax highlighting for CQL commands, types, and name literals.
+DevCenter provides command completion as you type out CQL commands and
+interprets the commands you type, highlighting any errors you make. The
+tool provides panes for managing multiple CQL scripts and connections to
+multiple clusters. The connections are used to run CQL commands against
+live clusters and view the results. The tool also has a query trace
+feature that is useful for gaining insight into the performance of your
+queries.
+* IDE Plugins - There are CQL plugins available for several Integrated
+Development Environments (IDEs), such as IntelliJ IDEA and Apache
+NetBeans. These plugins typically provide features such as schema
+management and query execution.
+
+Some IDEs and tools that claim to support Cassandra do not actually
+support CQL natively, but instead access Cassandra using a JDBC/ODBC
+driver and interact with Cassandra as if it were a relational database
+with SQL support. Wnen selecting tools for working with Cassandra you’ll
+want to make sure they support CQL and reinforce Cassandra best
+practices for data modeling as presented in this documentation.
+
+_Material adapted from Cassandra, The Definitive Guide. Published by
+O'Reilly Media, Inc. Copyright © 2020 Jeff Carpenter, Eben Hewitt. All
+rights reserved. Used with permission._
diff --git a/doc/modules/cassandra/pages/data_modeling/images/Figure_1_data_model.jpg b/doc/modules/cassandra/pages/data_modeling/images/Figure_1_data_model.jpg
new file mode 100644
index 00000000000..a3b330e7a39
Binary files /dev/null and b/doc/modules/cassandra/pages/data_modeling/images/Figure_1_data_model.jpg differ
diff --git a/doc/modules/cassandra/pages/data_modeling/images/Figure_2_data_model.jpg b/doc/modules/cassandra/pages/data_modeling/images/Figure_2_data_model.jpg
new file mode 100644
index 00000000000..7acdeac02ab
Binary files /dev/null and b/doc/modules/cassandra/pages/data_modeling/images/Figure_2_data_model.jpg differ
diff --git a/doc/modules/cassandra/pages/data_modeling/images/data_modeling_chebotko_logical.png b/doc/modules/cassandra/pages/data_modeling/images/data_modeling_chebotko_logical.png
new file mode 100755
index 00000000000..e54b5f2740a
Binary files /dev/null and b/doc/modules/cassandra/pages/data_modeling/images/data_modeling_chebotko_logical.png differ
diff --git a/doc/modules/cassandra/pages/data_modeling/images/data_modeling_chebotko_physical.png b/doc/modules/cassandra/pages/data_modeling/images/data_modeling_chebotko_physical.png
new file mode 100644
index 00000000000..bfdaec55272
Binary files /dev/null and b/doc/modules/cassandra/pages/data_modeling/images/data_modeling_chebotko_physical.png differ
diff --git a/doc/modules/cassandra/pages/data_modeling/images/data_modeling_hotel_bucketing.png b/doc/modules/cassandra/pages/data_modeling/images/data_modeling_hotel_bucketing.png
new file mode 100644
index 00000000000..8b53e38f90e
Binary files /dev/null and b/doc/modules/cassandra/pages/data_modeling/images/data_modeling_hotel_bucketing.png differ
diff --git a/doc/modules/cassandra/pages/data_modeling/images/data_modeling_hotel_erd.png b/doc/modules/cassandra/pages/data_modeling/images/data_modeling_hotel_erd.png
new file mode 100755
index 00000000000..e86fe68f34f
Binary files /dev/null and b/doc/modules/cassandra/pages/data_modeling/images/data_modeling_hotel_erd.png differ
diff --git a/doc/modules/cassandra/pages/data_modeling/images/data_modeling_hotel_logical.png b/doc/modules/cassandra/pages/data_modeling/images/data_modeling_hotel_logical.png
new file mode 100755
index 00000000000..e920f12486d
Binary files /dev/null and b/doc/modules/cassandra/pages/data_modeling/images/data_modeling_hotel_logical.png differ
diff --git a/doc/modules/cassandra/pages/data_modeling/images/data_modeling_hotel_physical.png b/doc/modules/cassandra/pages/data_modeling/images/data_modeling_hotel_physical.png
new file mode 100644
index 00000000000..2d20a6ddbb9
Binary files /dev/null and b/doc/modules/cassandra/pages/data_modeling/images/data_modeling_hotel_physical.png differ
diff --git a/doc/modules/cassandra/pages/data_modeling/images/data_modeling_hotel_queries.png b/doc/modules/cassandra/pages/data_modeling/images/data_modeling_hotel_queries.png
new file mode 100755
index 00000000000..2434db39d4f
Binary files /dev/null and b/doc/modules/cassandra/pages/data_modeling/images/data_modeling_hotel_queries.png differ
diff --git a/doc/modules/cassandra/pages/data_modeling/images/data_modeling_hotel_relational.png b/doc/modules/cassandra/pages/data_modeling/images/data_modeling_hotel_relational.png
new file mode 100755
index 00000000000..43e784eea74
Binary files /dev/null and b/doc/modules/cassandra/pages/data_modeling/images/data_modeling_hotel_relational.png differ
diff --git a/doc/modules/cassandra/pages/data_modeling/images/data_modeling_reservation_logical.png b/doc/modules/cassandra/pages/data_modeling/images/data_modeling_reservation_logical.png
new file mode 100755
index 00000000000..0460633b68f
Binary files /dev/null and b/doc/modules/cassandra/pages/data_modeling/images/data_modeling_reservation_logical.png differ
diff --git a/doc/modules/cassandra/pages/data_modeling/images/data_modeling_reservation_physical.png b/doc/modules/cassandra/pages/data_modeling/images/data_modeling_reservation_physical.png
new file mode 100755
index 00000000000..1e6e76c16c5
Binary files /dev/null and b/doc/modules/cassandra/pages/data_modeling/images/data_modeling_reservation_physical.png differ
diff --git a/doc/modules/cassandra/pages/data_modeling/index.adoc b/doc/modules/cassandra/pages/data_modeling/index.adoc
new file mode 100644
index 00000000000..105f5a36331
--- /dev/null
+++ b/doc/modules/cassandra/pages/data_modeling/index.adoc
@@ -0,0 +1,11 @@
+= Data Modeling
+
+* xref:data_modeling/intro.adoc[Introduction]
+* xref:data_modeling/data_modeling_rdbms.adoc[RDBMS]
+* xref:data_modeling/data_modeling_conceptual.adoc[Conceptual]
+* xref:data_modeling/data_modeling_logical.adoc[Logical]
+* xref:data_modeling/data_modeling_physical.adoc[Physical]
+* xref:data_modeling/data_modeling_schema.adoc[Schema]
+* xref:data_modeling/data_modeling_queries.adoc[Queries]
+* xref:data_modeling/data_modeling_refining.adoc[Refining]
+* xref:data_modeling/data_modeling_tools.adoc[Tools]
diff --git a/doc/modules/cassandra/pages/data_modeling/intro.adoc b/doc/modules/cassandra/pages/data_modeling/intro.adoc
new file mode 100644
index 00000000000..92df8458792
--- /dev/null
+++ b/doc/modules/cassandra/pages/data_modeling/intro.adoc
@@ -0,0 +1,220 @@
+= Introduction
+
+Apache Cassandra stores data in tables, with each table consisting of
+rows and columns. CQL (Cassandra Query Language) is used to query the
+data stored in tables. Apache Cassandra data model is based around and
+optimized for querying. Cassandra does not support relational data
+modeling intended for relational databases.
+
+== What is Data Modeling?
+
+Data modeling is the process of identifying entities and their
+relationships. In relational databases, data is placed in normalized
+tables with foreign keys used to reference related data in other tables.
+Queries that the application will make are driven by the structure of
+the tables and related data are queried as table joins.
+
+In Cassandra, data modeling is query-driven. The data access patterns
+and application queries determine the structure and organization of data
+which then used to design the database tables.
+
+Data is modeled around specific queries. Queries are best designed to
+access a single table, which implies that all entities involved in a
+query must be in the same table to make data access (reads) very fast.
+Data is modeled to best suit a query or a set of queries. A table could
+have one or more entities as best suits a query. As entities do
+typically have relationships among them and queries could involve
+entities with relationships among them, a single entity may be included
+in multiple tables.
+
+== Query-driven modeling
+
+Unlike a relational database model in which queries make use of table
+joins to get data from multiple tables, joins are not supported in
+Cassandra so all required fields (columns) must be grouped together in a
+single table. Since each query is backed by a table, data is duplicated
+across multiple tables in a process known as denormalization. Data
+duplication and a high write throughput are used to achieve a high read
+performance.
+
+== Goals
+
+The choice of the primary key and partition key is important to
+distribute data evenly across the cluster. Keeping the number of
+partitions read for a query to a minimum is also important because
+different partitions could be located on different nodes and the
+coordinator would need to send a request to each node adding to the
+request overhead and latency. Even if the different partitions involved
+in a query are on the same node, fewer partitions make for a more
+efficient query.
+
+== Partitions
+
+Apache Cassandra is a distributed database that stores data across a
+cluster of nodes. A partition key is used to partition data among the
+nodes. Cassandra partitions data over the storage nodes using a variant
+of consistent hashing for data distribution. Hashing is a technique used
+to map data with which given a key, a hash function generates a hash
+value (or simply a hash) that is stored in a hash table. A partition key
+is generated from the first field of a primary key. Data partitioned
+into hash tables using partition keys provides for rapid lookup. Fewer
+the partitions used for a query faster is the response time for the
+query.
+
+As an example of partitioning, consider table `t` in which `id` is the
+only field in the primary key.
+
+....
+CREATE TABLE t (
+ id int,
+ k int,
+ v text,
+ PRIMARY KEY (id)
+);
+....
+
+The partition key is generated from the primary key `id` for data
+distribution across the nodes in a cluster.
+
+Consider a variation of table `t` that has two fields constituting the
+primary key to make a composite or compound primary key.
+
+....
+CREATE TABLE t (
+ id int,
+ c text,
+ k int,
+ v text,
+ PRIMARY KEY (id,c)
+);
+....
+
+For the table `t` with a composite primary key the first field `id` is
+used to generate the partition key and the second field `c` is the
+clustering key used for sorting within a partition. Using clustering
+keys to sort data makes retrieval of adjacent data more efficient.
+
+In general, the first field or component of a primary key is hashed to
+generate the partition key and the remaining fields or components are
+the clustering keys that are used to sort data within a partition.
+Partitioning data improves the efficiency of reads and writes. The other
+fields that are not primary key fields may be indexed separately to
+further improve query performance.
+
+The partition key could be generated from multiple fields if they are
+grouped as the first component of a primary key. As another variation of
+the table `t`, consider a table with the first component of the primary
+key made of two fields grouped using parentheses.
+
+....
+CREATE TABLE t (
+ id1 int,
+ id2 int,
+ c1 text,
+ c2 text
+ k int,
+ v text,
+ PRIMARY KEY ((id1,id2),c1,c2)
+);
+....
+
+For the preceding table `t` the first component of the primary key
+constituting fields `id1` and `id2` is used to generate the partition
+key and the rest of the fields `c1` and `c2` are the clustering keys
+used for sorting within a partition.
+
+== Comparing with Relational Data Model
+
+Relational databases store data in tables that have relations with other
+tables using foreign keys. A relational database’s approach to data
+modeling is table-centric. Queries must use table joins to get data from
+multiple tables that have a relation between them. Apache Cassandra does
+not have the concept of foreign keys or relational integrity. Apache
+Cassandra’s data model is based around designing efficient queries;
+queries that don’t involve multiple tables. Relational databases
+normalize data to avoid duplication. Apache Cassandra in contrast
+de-normalizes data by duplicating data in multiple tables for a
+query-centric data model. If a Cassandra data model cannot fully
+integrate the complexity of relationships between the different entities
+for a particular query, client-side joins in application code may be
+used.
+
+== Examples of Data Modeling
+
+As an example, a `magazine` data set consists of data for magazines with
+attributes such as magazine id, magazine name, publication frequency,
+publication date, and publisher. A basic query (Q1) for magazine data is
+to list all the magazine names including their publication frequency. As
+not all data attributes are needed for Q1 the data model would only
+consist of `id` ( for partition key), magazine name and publication
+frequency as shown in Figure 1.
+
+image::Figure_1_data_model.jpg[image]
+
+Figure 1. Data Model for Q1
+
+Another query (Q2) is to list all the magazine names by publisher. For
+Q2 the data model would consist of an additional attribute `publisher`
+for the partition key. The `id` would become the clustering key for
+sorting within a partition. Data model for Q2 is illustrated in Figure
+2.
+
+image::Figure_2_data_model.jpg[image]
+
+Figure 2. Data Model for Q2
+
+== Designing Schema
+
+After the conceptual data model has been created a schema may be
+designed for a query. For Q1 the following schema may be used.
+
+....
+CREATE TABLE magazine_name (id int PRIMARY KEY, name text, publicationFrequency text)
+....
+
+For Q2 the schema definition would include a clustering key for sorting.
+
+....
+CREATE TABLE magazine_publisher (publisher text,id int,name text, publicationFrequency text,
+PRIMARY KEY (publisher, id)) WITH CLUSTERING ORDER BY (id DESC)
+....
+
+== Data Model Analysis
+
+The data model is a conceptual model that must be analyzed and optimized
+based on storage, capacity, redundancy and consistency. A data model may
+need to be modified as a result of the analysis. Considerations or
+limitations that are used in data model analysis include:
+
+* Partition Size
+* Data Redundancy
+* Disk space
+* Lightweight Transactions (LWT)
+
+The two measures of partition size are the number of values in a
+partition and partition size on disk. Though requirements for these
+measures may vary based on the application a general guideline is to
+keep number of values per partition to below 100,000 and disk space per
+partition to below 100MB.
+
+Data redundancies as duplicate data in tables and multiple partition
+replicates are to be expected in the design of a data model , but
+nevertheless should be kept in consideration as a parameter to keep to
+the minimum. LWT transactions (compare-and-set, conditional update)
+could affect performance and queries using LWT should be kept to the
+minimum.
+
+== Using Materialized Views
+
+[WARNING]
+.Warning
+====
+Materialized views (MVs) are experimental in the latest (4.0) release.
+====
+Materialized views (MVs) could be used to implement multiple queries
+for a single table. A materialized view is a table built from data from
+another table, the base table, with new primary key and new properties.
+Changes to the base table data automatically add and update data in a
+MV. Different queries may be implemented using a materialized view as an
+MV's primary key differs from the base table. Queries are optimized by
+the primary key definition.
diff --git a/doc/modules/cassandra/pages/faq/index.adoc b/doc/modules/cassandra/pages/faq/index.adoc
new file mode 100644
index 00000000000..41f921dc0c2
--- /dev/null
+++ b/doc/modules/cassandra/pages/faq/index.adoc
@@ -0,0 +1,290 @@
+= Frequently Asked Questions
+
+* `why-cant-list-all`
+* `what-ports`
+* `what-happens-on-joins`
+* `asynch-deletes`
+* `one-entry-ring`
+* `can-large-blob`
+* `nodetool-connection-refused`
+* `to-batch-or-not-to-batch`
+* `selinux`
+* `how-to-unsubscribe`
+* `cassandra-eats-all-my-memory`
+* `what-are-seeds`
+* `are-seeds-SPOF`
+* `why-message-dropped`
+* `oom-map-failed`
+* `what-on-same-timestamp-update`
+* `why-bootstrapping-stream-error`
+
+[[why-cant-list-all]]
+== Why can't I set `listen_address` to listen on 0.0.0.0 (all my addresses)?
+
+Cassandra is a gossip-based distributed system and `listen_address` is
+the address a node tells other nodes to reach it at. Telling other nodes
+"contact me on any of my addresses" is a bad idea; if different nodes in
+the cluster pick different addresses for you, Bad Things happen.
+
+If you don't want to manually specify an IP to `listen_address` for each
+node in your cluster (understandable!), leave it blank and Cassandra
+will use `InetAddress.getLocalHost()` to pick an address. Then it's up
+to you or your ops team to make things resolve correctly (`/etc/hosts/`,
+dns, etc).
+
+One exception to this process is JMX, which by default binds to 0.0.0.0
+(Java bug 6425769).
+
+See `256` and `43` for more gory details.
+
+[[what-ports]]
+== What ports does Cassandra use?
+
+By default, Cassandra uses 7000 for cluster communication (7001 if SSL
+is enabled), 9042 for native protocol clients, and 7199 for JMX. The
+internode communication and native protocol ports are configurable in
+the `cassandra-yaml`. The JMX port is configurable in `cassandra-env.sh`
+(through JVM options). All ports are TCP.
+
+[[what-happens-on-joins]]
+== What happens to existing data in my cluster when I add new nodes?
+
+When a new nodes joins a cluster, it will automatically contact the
+other nodes in the cluster and copy the right data to itself. See
+`topology-changes`.
+
+[[asynch-deletes]]
+== I delete data from Cassandra, but disk usage stays the same. What gives?
+
+Data you write to Cassandra gets persisted to SSTables. Since SSTables
+are immutable, the data can't actually be removed when you perform a
+delete, instead, a marker (also called a "tombstone") is written to
+indicate the value's new status. Never fear though, on the first
+compaction that occurs between the data and the tombstone, the data will
+be expunged completely and the corresponding disk space recovered. See
+`compaction` for more detail.
+
+[[one-entry-ring]]
+== Why does nodetool ring only show one entry, even though my nodes logged that they see each other joining the ring?
+
+This happens when you have the same token assigned to each node. Don't
+do that.
+
+Most often this bites people who deploy by installing Cassandra on a VM
+(especially when using the Debian package, which auto-starts Cassandra
+after installation, thus generating and saving a token), then cloning
+that VM to other nodes.
+
+The easiest fix is to wipe the data and commitlog directories, thus
+making sure that each node will generate a random token on the next
+restart.
+
+[[change-replication-factor]]
+== Can I change the replication factor (a a keyspace) on a live cluster?
+
+Yes, but it will require running a full repair (or cleanup) to change
+the replica count of existing data:
+
+* `Alter ` the replication factor for desired
+keyspace (using cqlsh for instance).
+* If you're reducing the replication factor, run `nodetool cleanup` on
+the cluster to remove surplus replicated data. Cleanup runs on a
+per-node basis.
+* If you're increasing the replication factor, run
+`nodetool repair -full` to ensure data is replicated according to the
+new configuration. Repair runs on a per-replica set basis. This is an
+intensive process that may result in adverse cluster performance. It's
+highly recommended to do rolling repairs, as an attempt to repair the
+entire cluster at once will most likely swamp it. Note that you will
+need to run a full repair (`-full`) to make sure that already repaired
+sstables are not skipped.
+
+[[can-large-blob]]
+== Can I Store (large) BLOBs in Cassandra?
+
+Cassandra isn't optimized for large file or BLOB storage and a single
+`blob` value is always read and send to the client entirely. As such,
+storing small blobs (less than single digit MB) should not be a problem,
+but it is advised to manually split large blobs into smaller chunks.
+
+Please note in particular that by default, any value greater than 16MB
+will be rejected by Cassandra due the `max_mutation_size_in_kb`
+configuration of the `cassandra-yaml` file (which default to half of
+`commitlog_segment_size_in_mb`, which itself default to 32MB).
+
+[[nodetool-connection-refused]]
+== Nodetool says "Connection refused to host: 127.0.1.1" for any remote host. What gives?
+
+Nodetool relies on JMX, which in turn relies on RMI, which in turn sets
+up its own listeners and connectors as needed on each end of the
+exchange. Normally all of this happens behind the scenes transparently,
+but incorrect name resolution for either the host connecting, or the one
+being connected to, can result in crossed wires and confusing
+exceptions.
+
+If you are not using DNS, then make sure that your `/etc/hosts` files
+are accurate on both ends. If that fails, try setting the
+`-Djava.rmi.server.hostname=` JVM option near the bottom of
+`cassandra-env.sh` to an interface that you can reach from the remote
+machine.
+
+[[to-batch-or-not-to-batch]]
+== Will batching my operations speed up my bulk load?
+
+No. Using batches to load data will generally just add "spikes" of
+latency. Use asynchronous INSERTs instead, or use true `bulk-loading`.
+
+An exception is batching updates to a single partition, which can be a
+Good Thing (as long as the size of a single batch stay reasonable). But
+never ever blindly batch everything!
+
+[[selinux]]
+== On RHEL nodes are unable to join the ring
+
+Check if https://en.wikipedia.org/wiki/Security-Enhanced_Linux[SELinux]
+is on; if it is, turn it off.
+
+[[how-to-unsubscribe]]
+== How do I unsubscribe from the email list?
+
+Send an email to `user-unsubscribe@cassandra.apache.org`.
+
+[[cassandra-eats-all-my-memory]]
+== Why does top report that Cassandra is using a lot more memory than the Java heap max?
+
+Cassandra uses https://en.wikipedia.org/wiki/Memory-mapped_file[Memory
+Mapped Files] (mmap) internally. That is, we use the operating system's
+virtual memory system to map a number of on-disk files into the
+Cassandra process' address space. This will "use" virtual memory; i.e.
+address space, and will be reported by tools like top accordingly, but
+on 64 bit systems virtual address space is effectively unlimited so you
+should not worry about that.
+
+What matters from the perspective of "memory use" in the sense as it is
+normally meant, is the amount of data allocated on brk() or mmap'd
+/dev/zero, which represent real memory used. The key issue is that for a
+mmap'd file, there is never a need to retain the data resident in
+physical memory. Thus, whatever you do keep resident in physical memory
+is essentially just there as a cache, in the same way as normal I/O will
+cause the kernel page cache to retain data that you read/write.
+
+The difference between normal I/O and mmap() is that in the mmap() case
+the memory is actually mapped to the process, thus affecting the virtual
+size as reported by top. The main argument for using mmap() instead of
+standard I/O is the fact that reading entails just touching memory - in
+the case of the memory being resident, you just read it - you don't even
+take a page fault (so no overhead in entering the kernel and doing a
+semi-context switch). This is covered in more detail
+http://www.varnish-cache.org/trac/wiki/ArchitectNotes[here].
+
+== What are seeds?
+
+Seeds are used during startup to discover the cluster.
+
+If you configure your nodes to refer some node as seed, nodes in your
+ring tend to send Gossip message to seeds more often (also see the
+`section on gossip `) than to non-seeds. In other words, seeds
+are worked as hubs of Gossip network. With seeds, each node can detect
+status changes of other nodes quickly.
+
+Seeds are also referred by new nodes on bootstrap to learn other nodes
+in ring. When you add a new node to ring, you need to specify at least
+one live seed to contact. Once a node join the ring, it learns about the
+other nodes, so it doesn't need seed on subsequent boot.
+
+You can make a seed a node at any time. There is nothing special about
+seed nodes. If you list the node in seed list it is a seed
+
+Seeds do not auto bootstrap (i.e. if a node has itself in its seed list
+it will not automatically transfer data to itself) If you want a node to
+do that, bootstrap it first and then add it to seeds later. If you have
+no data (new install) you do not have to worry about bootstrap at all.
+
+Recommended usage of seeds:
+
+* pick two (or more) nodes per data center as seed nodes.
+* sync the seed list to all your nodes
+
+[[are-seeds-SPOF]]
+== Does single seed mean single point of failure?
+
+The ring can operate or boot without a seed; however, you will not be
+able to add new nodes to the cluster. It is recommended to configure
+multiple seeds in production system.
+
+[[cant-call-jmx-method]]
+== Why can't I call jmx method X on jconsole?
+
+Some of JMX operations use array argument and as jconsole doesn't
+support array argument, those operations can't be called with jconsole
+(the buttons are inactive for them). You need to write a JMX client to
+call such operations or need array-capable JMX monitoring tool.
+
+[[why-message-dropped]]
+== Why do I see "... messages dropped ..." in the logs?
+
+This is a symptom of load shedding -- Cassandra defending itself against
+more requests than it can handle.
+
+Internode messages which are received by a node, but do not get not to
+be processed within their proper timeout (see `read_request_timeout`,
+`write_request_timeout`, ... in the `cassandra-yaml`), are dropped
+rather than processed (since the as the coordinator node will no longer
+be waiting for a response).
+
+For writes, this means that the mutation was not applied to all replicas
+it was sent to. The inconsistency will be repaired by read repair, hints
+or a manual repair. The write operation may also have timeouted as a
+result.
+
+For reads, this means a read request may not have completed.
+
+Load shedding is part of the Cassandra architecture, if this is a
+persistent issue it is generally a sign of an overloaded node or
+cluster.
+
+[[oom-map-failed]]
+== Cassandra dies with `java.lang.OutOfMemoryError: Map failed`
+
+If Cassandra is dying *specifically* with the "Map failed" message, it
+means the OS is denying java the ability to lock more memory. In linux,
+this typically means memlock is limited. Check
+`/proc//limits` to verify this and raise it (eg, via
+ulimit in bash). You may also need to increase `vm.max_map_count.` Note
+that the debian package handles this for you automatically.
+
+[[what-on-same-timestamp-update]]
+== What happens if two updates are made with the same timestamp?
+
+Updates must be commutative, since they may arrive in different orders
+on different replicas. As long as Cassandra has a deterministic way to
+pick the winner (in a timestamp tie), the one selected is as valid as
+any other, and the specifics should be treated as an implementation
+detail. That said, in the case of a timestamp tie, Cassandra follows two
+rules: first, deletes take precedence over inserts/updates. Second, if
+there are two updates, the one with the lexically larger value is
+selected.
+
+[[why-bootstrapping-stream-error]]
+== Why bootstrapping a new node fails with a "Stream failed" error?
+
+Two main possibilities:
+
+. the GC may be creating long pauses disrupting the streaming process
+. compactions happening in the background hold streaming long enough
+that the TCP connection fails
+
+In the first case, regular GC tuning advices apply. In the second case,
+you need to set TCP keepalive to a lower value (default is very high on
+Linux). Try to just run the following:
+
+....
+$ sudo /sbin/sysctl -w net.ipv4.tcp_keepalive_time=60 net.ipv4.tcp_keepalive_intvl=60 net.ipv4.tcp_keepalive_probes=5
+....
+
+To make those settings permanent, add them to your `/etc/sysctl.conf`
+file.
+
+Note: https://cloud.google.com/compute/[GCE]'s firewall will always
+interrupt TCP connections that are inactive for more than 10 min.
+Running the above command is highly recommended in that environment.
diff --git a/doc/modules/cassandra/pages/getting_started/configuring.adoc b/doc/modules/cassandra/pages/getting_started/configuring.adoc
new file mode 100644
index 00000000000..ba72f97917b
--- /dev/null
+++ b/doc/modules/cassandra/pages/getting_started/configuring.adoc
@@ -0,0 +1,84 @@
+= Configuring Cassandra
+
+The `Cassandra` configuration files location varies, depending on the
+type of installation:
+
+* docker: `/etc/cassandra` directory
+* tarball: `conf` directory within the tarball install location
+* package: `/etc/cassandra` directory
+
+Cassandra's default configuration file, `cassandra.yaml`, is sufficient
+to explore a simple single-node `cluster`. However, anything beyond
+running a single-node cluster locally requires additional configuration
+to various Cassandra configuration files. Some examples that require
+non-default configuration are deploying a multi-node cluster or using
+clients that are not running on a cluster node.
+
+* `cassandra.yaml`: the main configuration file for Cassandra
+* `cassandra-env.sh`: environment variables can be set
+* `cassandra-rackdc.properties` OR `cassandra-topology.properties`: set
+rack and datacenter information for a cluster
+* `logback.xml`: logging configuration including logging levels
+* `jvm-*`: a number of JVM configuration files for both the server and
+clients
+* `commitlog_archiving.properties`: set archiving parameters for the
+`commitlog`
+
+Two sample configuration files can also be found in `./conf`:
+
+* `metrics-reporter-config-sample.yaml`: configuring what the
+metrics-report will collect
+* `cqlshrc.sample`: how the CQL shell, cqlsh, can be configured
+
+== Main runtime properties
+
+Configuring Cassandra is done by setting yaml properties in the
+`cassandra.yaml` file. At a minimum you should consider setting the
+following properties:
+
+* `cluster_name`: Set the name of your cluster.
+* `seeds`: A comma separated list of the IP addresses of your cluster
+`seed nodes`.
+* `storage_port`: Check that you don't have the default port of 7000
+blocked by a firewall.
+* `listen_address`: The `listen address` is the IP address of a node
+that allows it to communicate with other nodes in the cluster. Set to
+[.title-ref]#localhost# by default. Alternatively, you can set
+`listen_interface` to tell Cassandra which interface to use, and
+consecutively which address to use. Set one property, not both.
+* `native_transport_port`: Check that you don't have the default port of
+9042 blocked by a firewall, so that clients like cqlsh can communicate
+with Cassandra on this port.
+
+== Changing the location of directories
+
+The following yaml properties control the location of directories:
+
+* `data_file_directories`: One or more directories where data files,
+like `SSTables` are located.
+* `commitlog_directory`: The directory where commitlog files are
+located.
+* `saved_caches_directory`: The directory where saved caches are
+located.
+* `hints_directory`: The directory where `hints` are located.
+
+For performance reasons, if you have multiple disks, consider putting
+commitlog and data files on different disks.
+
+== Environment variables
+
+JVM-level settings such as heap size can be set in `cassandra-env.sh`.
+You can add any additional JVM command line argument to the `JVM_OPTS`
+environment variable; when Cassandra starts, these arguments will be
+passed to the JVM.
+
+== Logging
+
+The default logger is [.title-ref]#logback#. By default it will log:
+
+* *INFO* level in `system.log`
+* *DEBUG* level in `debug.log`
+
+When running in the foreground, it will also log at INFO level to the
+console. You can change logging properties by editing `logback.xml` or
+by running the [.title-ref]#nodetool setlogginglevel# command.
diff --git a/doc/modules/cassandra/pages/getting_started/drivers.adoc b/doc/modules/cassandra/pages/getting_started/drivers.adoc
new file mode 100644
index 00000000000..eb15a558305
--- /dev/null
+++ b/doc/modules/cassandra/pages/getting_started/drivers.adoc
@@ -0,0 +1,90 @@
+= Client drivers
+
+Here are known Cassandra client drivers organized by language. Before
+choosing a driver, you should verify the Cassandra version and
+functionality supported by a specific driver.
+
+== Java
+
+* http://achilles.archinnov.info/[Achilles]
+* https://github.com/Netflix/astyanax/wiki/Getting-Started[Astyanax]
+* https://github.com/noorq/casser[Casser]
+* https://github.com/datastax/java-driver[Datastax Java driver]
+* https://github.com/impetus-opensource/Kundera[Kundera]
+* https://github.com/deanhiller/playorm[PlayORM]
+
+== Python
+
+* https://github.com/datastax/python-driver[Datastax Python driver]
+
+== Ruby
+
+* https://github.com/datastax/ruby-driver[Datastax Ruby driver]
+
+== C# / .NET
+
+* https://github.com/pchalamet/cassandra-sharp[Cassandra Sharp]
+* https://github.com/datastax/csharp-driver[Datastax C# driver]
+* https://github.com/managedfusion/fluentcassandra[Fluent Cassandra]
+
+== Nodejs
+
+* https://github.com/datastax/nodejs-driver[Datastax Nodejs driver]
+
+== PHP
+
+* http://code.google.com/a/apache-extras.org/p/cassandra-pdo[CQL | PHP]
+* https://github.com/datastax/php-driver/[Datastax PHP driver]
+* https://github.com/aparkhomenko/php-cassandra[PHP-Cassandra]
+* https://github.com/duoshuo/php-cassandra[PHP Library for Cassandra]
+
+== C++
+
+* https://github.com/datastax/cpp-driver[Datastax C++ driver]
+* http://sourceforge.net/projects/libqtcassandra[libQTCassandra]
+
+== Scala
+
+* https://github.com/datastax/spark-cassandra-connector[Datastax Spark
+connector]
+* https://github.com/newzly/phantom[Phantom]
+* https://github.com/getquill/quill[Quill]
+
+== Clojure
+
+* https://github.com/mpenet/alia[Alia]
+* https://github.com/clojurewerkz/cassaforte[Cassaforte]
+* https://github.com/mpenet/hayt[Hayt]
+
+== Erlang
+
+* https://github.com/matehat/cqerl[CQerl]
+* https://github.com/silviucpp/erlcass[Erlcass]
+
+== Go
+
+* https://github.com/relops/cqlc[CQLc]
+* https://github.com/hailocab/gocassa[Gocassa]
+* https://github.com/gocql/gocql[GoCQL]
+
+== Haskell
+
+* https://github.com/ozataman/cassy[Cassy]
+
+== Rust
+
+* https://github.com/neich/rust-cql[Rust CQL]
+
+== Perl
+
+* https://github.com/tvdw/perl-dbd-cassandra[Cassandra::Client and
+DBD::Cassandra]
+
+== Elixir
+
+* https://github.com/lexhide/xandra[Xandra]
+* https://github.com/matehat/cqex[CQEx]
+
+== Dart
+
+* https://github.com/achilleasa/dart_cassandra_cql[dart_cassandra_cql]
diff --git a/doc/modules/cassandra/pages/getting_started/index.adoc b/doc/modules/cassandra/pages/getting_started/index.adoc
new file mode 100644
index 00000000000..af43c17a0bf
--- /dev/null
+++ b/doc/modules/cassandra/pages/getting_started/index.adoc
@@ -0,0 +1,30 @@
+= Getting Started
+
+This section covers how to get started using Apache Cassandra and should
+be the first thing to read if you are new to Cassandra.
+
+* xref:getting_started/installing.adoc[Installing Cassandra]: Installation instructions plus information on choosing a method.
+** [ xref:getting_started/installing.adoc#installing-the-docker-image[Docker] ]
+[ xref:getting_started/installing.adoc#installing-the-binary-tarball[tarball] ]
+[ xref:getting_started/installing.adoc#installing-the-debian-packages[Debian] ]
+[ xref:getting_started/installing.adoc#installing-the-rpm-packages[RPM] ]
+* xref:getting_started/configuring.adoc[Configuring Cassandra]
+* xref:getting_started/querying.adoc[Inserting and querying data]
+* xref:getting_started/drivers.adoc[Client drivers]: Drivers for various languages.
+** [ xref:getting_started/drivers.adoc#java[Java] ]
+ [ xref:getting_started/drivers.adoc#python[Python] ]
+ [ xref:getting_started/drivers.adoc#ruby[Ruby] ]
+ [ xref:getting_started/drivers.adoc#c-net[C# / .NET] ]
+ [ xref:getting_started/drivers.adoc#nodejs[Node.js] ]
+ [ xref:getting_started/drivers.adoc#php[PHP] ]
+ [ xref:getting_started/drivers.adoc#c[C++] ]
+ [ xref:getting_started/drivers.adoc#scala[Scala] ]
+ [ xref:getting_started/drivers.adoc#clojure[Clojure] ]
+ [ xref:getting_started/drivers.adoc#erlang[Erlang] ]
+ [ xref:getting_started/drivers.adoc#go[Go] ]
+ [ xref:getting_started/drivers.adoc#haskell[Haskell] ]
+ [ xref:getting_started/drivers.adoc#rust[Rust] ]
+ [ xref:getting_started/drivers.adoc#perl[Perl] ]
+ [ xref:getting_started/drivers.adoc#elixir[Elixir] ]
+ [ xref:getting_started/drivers.adoc#dart[Dart] ]
+* xref:getting_started/production.adoc[Production recommendations]
diff --git a/doc/modules/cassandra/pages/getting_started/installing.adoc b/doc/modules/cassandra/pages/getting_started/installing.adoc
new file mode 100644
index 00000000000..4d4ea06ba38
--- /dev/null
+++ b/doc/modules/cassandra/pages/getting_started/installing.adoc
@@ -0,0 +1,344 @@
+= Installing Cassandra
+:tabs:
+
+These are the instructions for deploying the supported releases of
+Apache Cassandra on Linux servers.
+
+Cassandra runs on a wide array of Linux distributions including (but not
+limited to):
+
+* Ubuntu, most notably LTS releases 16.04 to 18.04
+* CentOS & RedHat Enterprise Linux (RHEL) including 6.6 to 7.7
+* Amazon Linux AMIs including 2016.09 through to Linux 2
+* Debian versions 8 & 9
+* SUSE Enterprise Linux 12
+
+This is not an exhaustive list of operating system platforms, nor is it
+prescriptive. However users will be well-advised to conduct exhaustive
+tests of their own particularly for less-popular distributions of Linux.
+Deploying on older versions is not recommended unless you have previous
+experience with the older distribution in a production environment.
+
+== Prerequisites
+
+* Install the latest version of Java 8, either the
+http://www.oracle.com/technetwork/java/javase/downloads/index.html[Oracle
+Java Standard Edition 8] or http://openjdk.java.net/[OpenJDK 8]. To
+verify that you have the correct version of java installed, type
+`java -version`.
+* *NOTE*: _Experimental_ support for Java 11 was added in Cassandra {40_version}
+(https://issues.apache.org/jira/browse/CASSANDRA-9608[CASSANDRA-9608]).
+Running Cassandra on Java 11 is _experimental_. Do so at your own risk.
+For more information, see
+https://github.com/apache/cassandra/blob/trunk/NEWS.txt[NEWS.txt].
+* For using cqlsh, the latest version of
+https://www.python.org/downloads/[Python 2.7] or Python 3.6+. To verify
+that you have the correct version of Python installed, type
+`python --version`.
+
+== Choosing an installation method
+
+There are three methods of installing Cassandra that are common:
+
+* Docker image
+* Tarball binary file
+* Package installation (RPM, YUM)
+
+If you are a current Docker user, installing a Docker image is simple.
+You'll need to install Docker Desktop for Mac, Docker Desktop for Windows,
+or have `docker` installed on Linux.
+Pull the appropriate image and then start Cassandra with a run command.
+
+For most users, installing the binary tarball is also a simple choice.
+The tarball unpacks all its contents into a single location with
+binaries and configuration files located in their own subdirectories.
+The most obvious attribute of the tarball installation is it does not
+require `root` permissions and can be installed on any Linux
+distribution.
+
+Packaged installations require `root` permissions, and are most appropriate for
+production installs.
+Install the RPM build on CentOS and RHEL-based distributions if you want to
+install Cassandra using YUM.
+Install the Debian build on Ubuntu and other Debian-based
+distributions if you want to install Cassandra using APT.
+Note that both the YUM and APT methods required `root` permissions and
+will install the binaries and configuration files as the `cassandra` OS user.
+
+== Installing the docker image
+
+[arabic, start=1]
+. Pull the docker image. For the latest image, use:
+
+[source, shell]
+----
+include::example$BASH/docker_pull.sh[]
+----
+
+This `docker pull` command will get the latest version of the official
+Apache Cassandra image available from the https://hub.docker.com/_/cassandra[Dockerhub].
+
+[arabic, start=2]
+. Start Cassandra with a `docker run` command:
+
+[source, shell]
+----
+include::example$BASH/docker_run.sh[]
+----
+
+The `--name` option will be the name of the Cassandra cluster created.
+
+[arabic, start=3]
+. Start the CQL shell, `cqlsh` to interact with the Cassandra node created:
+
+[source, shell]
+----
+include::example$BASH/docker_cqlsh.sh[]
+----
+== Installing the binary tarball
+
+include::partial$java_version.adoc[]
+
+[arabic, start=2]
+. Download the binary tarball from one of the mirrors on the
+{cass_url}download/[Apache Cassandra Download] site.
+For example, to download Cassandra {40_version}:
+
+[source,shell]
+----
+include::example$BASH/curl_install.sh[]
+----
+
+NOTE: The mirrors only host the latest versions of each major supported
+release. To download an earlier version of Cassandra, visit the
+http://archive.apache.org/dist/cassandra/[Apache Archives].
+
+[arabic, start=3]
+. OPTIONAL: Verify the integrity of the downloaded tarball using one of
+the methods https://www.apache.org/dyn/closer.cgi#verify[here]. For
+example, to verify the hash of the downloaded file using GPG:
+
+[{tabs}]
+====
+Command::
++
+--
+[source,shell]
+----
+include::example$BASH/verify_gpg.sh[]
+----
+--
+
+Result::
++
+--
+[source,plaintext]
+----
+include::example$RESULTS/verify_gpg.result[]
+----
+--
+====
+
+Compare the signature with the SHA256 file from the Downloads site:
+
+[{tabs}]
+====
+Command::
++
+--
+[source,shell]
+----
+include::example$BASH/curl_verify_sha.sh[]
+----
+--
+
+Result::
++
+--
+[source,plaintext]
+----
+28757dde589f70410f9a6a95c39ee7e6cde63440e2b06b91ae6b200614fa364d
+----
+--
+====
+
+[arabic, start=4]
+. Unpack the tarball:
+
+[source,shell]
+----
+include::example$BASH/tarball.sh[]
+----
+
+The files will be extracted to the `apache-cassandra-4.0.0/` directory.
+This is the tarball installation location.
+
+[arabic, start=5]
+. Located in the tarball installation location are the directories for
+the scripts, binaries, utilities, configuration, data and log files:
+
+[source,plaintext]
+----
+include::example$TEXT/tarball_install_dirs.txt[]
+----
+<1> location of the commands to run cassandra, cqlsh, nodetool, and SSTable tools
+<2> location of cassandra.yaml and other configuration files
+<3> location of the commit logs, hints, and SSTables
+<4> location of system and debug logs
+<5>location of cassandra-stress tool
+
+For information on how to configure your installation, see
+{cass_url}doc/latest/getting_started/configuring.html[Configuring
+Cassandra].
+
+[arabic, start=6]
+. Start Cassandra:
+
+[source,shell]
+----
+include::example$BASH/start_tarball.sh[]
+----
+
+NOTE: This will run Cassandra as the authenticated Linux user.
+
+include::partial$tail_syslog.adoc[]
+You can monitor the progress of the startup with:
+
+[{tabs}]
+====
+Command::
++
+--
+[source,shell]
+----
+include::example$BASH/tail_syslog.sh[]
+----
+--
+
+Result::
++
+--
+Cassandra is ready when you see an entry like this in the `system.log`:
+
+[source,plaintext]
+----
+include::example$RESULTS/tail_syslog.result[]
+----
+--
+====
+
+include::partial$nodetool_and_cqlsh.adoc[]
+
+== Installing the Debian packages
+
+include::partial$java_version.adoc[]
+
+[arabic, start=2]
+. Add the Apache repository of Cassandra to the file
+`cassandra.sources.list`.
+include::partial$package_versions.adoc[]
+
+[source,shell]
+----
+include::example$BASH/get_deb_package.sh[]
+----
+
+[arabic, start=3]
+. Add the Apache Cassandra repository keys to the list of trusted keys
+on the server:
+
+[{tabs}]
+====
+Command::
++
+--
+[source,shell]
+----
+include::example$BASH/add_repo_keys.sh[]
+----
+--
+
+Result::
++
+--
+[source,plaintext]
+----
+include::example$RESULTS/add_repo_keys.result[]
+----
+--
+====
+
+[arabic, start=4]
+. Update the package index from sources:
+
+[source,shell]
+----
+include::example$BASH/apt-get_update.sh[]
+----
+
+[arabic, start=5]
+. Install Cassandra with APT:
+
+[source,shell]
+----
+include::example$BASH/apt-get_cass.sh[]
+----
+
+NOTE: For information on how to configure your installation, see
+{cass_url}doc/latest/getting_started/configuring.html[Configuring
+Cassandra].
+
+include::partial$tail_syslog.adoc[]
+
+include::partial$nodetool_and_cqlsh_nobin.adoc[]
+
+== Installing the RPM packages
+
+include::partial$java_version.adoc[]
+
+[arabic, start=2]
+. Add the Apache repository of Cassandra to the file
+`/etc/yum.repos.d/cassandra.repo` (as the `root` user).
+include::partial$package_versions.adoc[]
+
+[source,plaintext]
+----
+include::example$RESULTS/add_yum_repo.result[]
+----
+
+[arabic, start=3]
+. Update the package index from sources:
+
+[source,shell]
+----
+include::example$BASH/yum_update.sh[]
+----
+
+[arabic, start=4]
+. Install Cassandra with YUM:
+
+[source,shell]
+----
+include::example$BASH/yum_cass.sh[]
+----
+
+NOTE: A new Linux user `cassandra` will get created as part of the
+installation. The Cassandra service will also be run as this user.
+
+[arabic, start=5]
+. Start the Cassandra service:
+
+[source,shell]
+----
+include::example$BASH/yum_start.sh[]
+----
+
+include::partial$tail_syslog.adoc[]
+
+include::partial$nodetool_and_cqlsh_nobin.adoc[]
+
+== Further installation info
+
+For help with installation issues, see the
+{cass_url}doc/latest/troubleshooting/index.html[Troubleshooting]
+section.
diff --git a/doc/modules/cassandra/pages/getting_started/production.adoc b/doc/modules/cassandra/pages/getting_started/production.adoc
new file mode 100644
index 00000000000..de7fb54234a
--- /dev/null
+++ b/doc/modules/cassandra/pages/getting_started/production.adoc
@@ -0,0 +1,163 @@
+= Production recommendations
+
+The `cassandra.yaml` and `jvm.options` files have a number of notes and
+recommendations for production usage.
+This page expands on some of the information in the files.
+
+== Tokens
+
+Using more than one token-range per node is referred to as virtual nodes, or vnodes.
+`vnodes` facilitate flexible expansion with more streaming peers when a new node bootstraps
+into a cluster.
+Limiting the negative impact of streaming (I/O and CPU overhead) enables incremental cluster expansion.
+However, more tokens leads to sharing data with more peers, and results in decreased availability.
+These two factors must be balanced based on a cluster's characteristic reads and writes.
+To learn more,
+https://github.com/jolynch/python_performance_toolkit/raw/master/notebooks/cassandra_availability/whitepaper/cassandra-availability-virtual.pdf[Cassandra Availability in Virtual Nodes, Joseph Lynch and Josh Snyder] is recommended reading.
+
+Change the number of tokens using the setting in the `cassandra.yaml` file:
+
+`num_tokens: 16`
+
+Here are the most common token counts with a brief explanation of when
+and why you would use each one.
+
+[width="100%",cols="13%,87%",options="header",]
+|===
+|Token Count |Description
+|1 |Maximum availablility, maximum cluster size, fewest peers, but
+inflexible expansion. Must always double size of cluster to expand and
+remain balanced.
+
+|4 |A healthy mix of elasticity and availability. Recommended for
+clusters which will eventually reach over 30 nodes. Requires adding
+approximately 20% more nodes to remain balanced. Shrinking a cluster may
+result in cluster imbalance.
+
+|8 | Using 8 vnodes distributes the workload between systems with a ~10% variance
+and has minimal impact on performance.
+
+|16 |Best for heavily elastic clusters which expand and shrink
+regularly, but may have issues availability with larger clusters. Not
+recommended for clusters over 50 nodes.
+|===
+
+In addition to setting the token count, it's extremely important that
+`allocate_tokens_for_local_replication_factor` in `cassandra.yaml` is set to an
+appropriate number of replicates, to ensure even token allocation.
+
+== Read ahead
+
+Read ahead is an operating system feature that attempts to keep as much
+data as possible loaded in the page cache.
+Spinning disks can have long seek times causing high latency, so additional
+throughout on reads using page cache can improve performance.
+By leveraging read ahead, the OS can pull additional data into memory without
+the cost of additional seeks.
+This method works well when the available RAM is greater than the size of the
+hot dataset, but can be problematic when the reverse is true (dataset > RAM).
+The larger the hot dataset, the less read ahead is useful.
+
+Read ahead is definitely not useful in the following cases:
+
+* Small partitions, such as tables with a single partition key
+* Solid state drives (SSDs)
+
+
+Read ahead can actually increase disk usage, and in some cases result in as much
+as a 5x latency and throughput performance penalty.
+Read-heavy, key/value tables with small (under 1KB) rows are especially prone
+to this problem.
+
+The recommended read ahead settings are:
+
+[width="59%",cols="40%,60%",options="header",]
+|===
+|Hardware |Initial Recommendation
+|Spinning Disks |64KB
+|SSD |4KB
+|===
+
+Read ahead can be adjusted on Linux systems using the `blockdev` tool.
+
+For example, set the read ahead of the disk `/dev/sda1\` to 4KB:
+
+[source, shell]
+----
+$ blockdev --setra 8 /dev/sda1
+----
+[NOTE]
+====
+The `blockdev` setting sets the number of 512 byte sectors to read ahead.
+The argument of 8 above is equivalent to 4KB, or 8 * 512 bytes.
+====
+
+All systems are different, so use these recommendations as a starting point and
+tune, based on your SLA and throughput requirements.
+To understand how read ahead impacts disk resource usage, we recommend carefully
+reading through the xref:troubleshooting/use_tools.adoc[Diving Deep, using external tools]
+section.
+
+== Compression
+
+Compressed data is stored by compressing fixed size byte buffers and writing the
+data to disk.
+The buffer size is determined by the `chunk_length_in_kb` element in the compression
+map of a table's schema settings for `WITH COMPRESSION`.
+The default setting is 16KB starting with Cassandra {40_version}.
+
+Since the entire compressed buffer must be read off-disk, using a compression
+chunk length that is too large can lead to significant overhead when reading small records.
+Combined with the default read ahead setting, the result can be massive
+read amplification for certain workloads. Therefore, picking an appropriate
+value for this setting is important.
+
+LZ4Compressor is the default and recommended compression algorithm.
+If you need additional information on compression, read
+https://thelastpickle.com/blog/2018/08/08/compression_performance.html[The Last Pickle blogpost on compression performance].
+
+== Compaction
+
+There are different xref:compaction/index.adoc[compaction] strategies available
+for different workloads.
+We recommend reading about the different strategies to understand which is the
+best for your environment.
+Different tables may, and frequently do use different compaction strategies in
+the same cluster.
+
+== Encryption
+
+It is significantly better to set up peer-to-peer encryption and client server
+encryption when setting up your production cluster.
+Setting it up after the cluster is serving production traffic is challenging
+to do correctly.
+If you ever plan to use network encryption of any type, we recommend setting it
+up when initially configuring your cluster.
+Changing these configurations later is not impossible, but mistakes can
+result in downtime or data loss.
+
+== Ensure keyspaces are created with NetworkTopologyStrategy
+
+Production clusters should never use `SimpleStrategy`.
+Production keyspaces should use the `NetworkTopologyStrategy` (NTS).
+For example:
+
+[source, cql]
+----
+CREATE KEYSPACE mykeyspace WITH replication = {
+ 'class': 'NetworkTopologyStrategy',
+ 'datacenter1': 3
+};
+----
+
+Cassandra clusters initialized with `NetworkTopologyStrategy` can take advantage
+of the ability to configure multiple racks and data centers.
+
+== Configure racks and snitch
+
+**Correctly configuring or changing racks after a cluster has been provisioned is an unsupported process**.
+Migrating from a single rack to multiple racks is also unsupported and can
+result in data loss.
+Using `GossipingPropertyFileSnitch` is the most flexible solution for on
+premise or mixed cloud environments.
+`Ec2Snitch` is reliable for AWS EC2 only environments.
diff --git a/doc/modules/cassandra/pages/getting_started/querying.adoc b/doc/modules/cassandra/pages/getting_started/querying.adoc
new file mode 100644
index 00000000000..a8b348a06c0
--- /dev/null
+++ b/doc/modules/cassandra/pages/getting_started/querying.adoc
@@ -0,0 +1,31 @@
+= Inserting and querying
+
+The API for Cassandra is xref:cql/ddl.adoc[`CQL`, the Cassandra Query Language]. To
+use CQL, you will need to connect to the cluster, using either:
+
+* `cqlsh`, a shell for CQL
+* a client driver for Cassandra
+* for the adventurous, check out https://zeppelin.apache.org/docs/0.7.0/interpreter/cassandra.html[Apache Zeppelin], a notebook-style tool
+
+== CQLSH
+
+`cqlsh` is a command-line shell for interacting with Cassandra using
+CQL. It is shipped with every Cassandra package, and can be found in the
+`bin` directory alongside the `cassandra` executable. It connects to the
+single node specified on the command line. For example:
+
+[source, shell]
+----
+include::example$BASH/cqlsh_localhost.sh[]
+----
+[source, cql]
+----
+include::example$RESULTS/cqlsh_localhost.result[]
+----
+If the command is used without specifying a node, `localhost` is the default. See the xref:tools/cqlsh.adoc[`cqlsh` section] for full documentation.
+
+== Client drivers
+
+A lot of xref:getting_started/drivers.adoc[client drivers] are provided by the Community and a list of
+known drivers is provided. You should refer to the documentation of each driver
+for more information.
diff --git a/doc/modules/cassandra/pages/getting_started/quickstart.adoc b/doc/modules/cassandra/pages/getting_started/quickstart.adoc
new file mode 100644
index 00000000000..69b55a6b532
--- /dev/null
+++ b/doc/modules/cassandra/pages/getting_started/quickstart.adoc
@@ -0,0 +1,116 @@
+= Apache Cassandra Quickstart
+:tabs:
+
+_Interested in getting started with Cassandra? Follow these instructions._
+
+*STEP 1: GET CASSANDRA USING DOCKER*
+
+You'll need to have Docker Desktop for Mac, Docker Desktop for Windows, or
+similar software installed on your computer.
+
+[source, plaintext]
+----
+docker pull cassandra:latest
+----
+
+Apache Cassandra is also available as a https://cassandra.apache.org/download/[tarball or package download].
+
+*STEP 2: START CASSANDRA*
+
+[source, plaintext]
+----
+docker run --name cassandra cassandra
+----
+
+*STEP 3: CREATE FILES*
+
+In the directory where you plan to run the next step, create these two files
+so that some data can be automatically inserted in the next step.
+
+A _cqlshrc_ file will log into the Cassandra database with the default superuser:
+
+[source, plaintext]
+----
+[authentication]
+ username = cassandra
+ password = cassandra
+----
+
+Create a _scripts_ directory and change to that directory.
+The following _data.cql_ file will create a keyspace, the layer at which Cassandra
+replicates its data, a table to hold the data, and insert some data:
+
+[source, plaintext]
+----
+# Create a keyspace
+CREATE KEYSPACE IF NOT EXISTS store WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : '1' };
+
+# Create a table
+CREATE TABLE IF NOT EXISTS store.shopping_cart (
+ userid text PRIMARY KEY,
+ item_count int,
+ last_update_timestamp timestamp
+);
+
+# Insert some data
+INSERT INTO store.shopping_cart
+(userid, item_count, last_update_timestamp)
+VALUES ('9876', 2, toTimeStamp(toDate(now))));
+INSERT INTO store.shopping_cart
+(userid, item_count, last_update_timestamp)
+VALUES (1234, 5, toTimeStamp(toDate(now))));
+----
+
+You should now have a _cqlshrc_ file and _/scripts/data.cql_ file.
+
+*STEP 4: RUN CQLSH TO INTERACT*
+
+Cassandra is a distributed database that can read and write data across multiple
+nodes with peer-to-peer replication. The Cassandra Query Language (CQL) is
+similar to SQL but suited for the JOINless structure of Cassandra. The CQL
+shell, or `cqlsh`, is one tool to use in interacting with the database.
+
+[source, plaintext]
+----
+docker run --rm -it -v //scripts:/scripts \
+-v / | --host )] [(-p | --port )]
+ [(-pp | --print-port)] [(-pw | --password )]
+ [(-pwf | --password-file )]
+ [(-u | --username )] enableauditlog
+ [--excluded-categories ]
+ [--excluded-keyspaces ]
+ [--excluded-users ]
+ [--included-categories ]
+ [--included-keyspaces ]
+ [--included-users ] [--logger ]
+
+OPTIONS
+ --excluded-categories
+ Comma separated list of Audit Log Categories to be excluded for
+ audit log. If not set the value from cassandra.yaml will be used
+
+ --excluded-keyspaces
+ Comma separated list of keyspaces to be excluded for audit log. If
+ not set the value from cassandra.yaml will be used
+
+ --excluded-users
+ Comma separated list of users to be excluded for audit log. If not
+ set the value from cassandra.yaml will be used
+
+ -h , --host
+ Node hostname or ip address
+
+ --included-categories
+ Comma separated list of Audit Log Categories to be included for
+ audit log. If not set the value from cassandra.yaml will be used
+
+ --included-keyspaces
+ Comma separated list of keyspaces to be included for audit log. If
+ not set the value from cassandra.yaml will be used
+
+ --included-users
+ Comma separated list of users to be included for audit log. If not
+ set the value from cassandra.yaml will be used
+
+ --logger
+ Logger name to be used for AuditLogging. Default BinAuditLogger. If
+ not set the value from cassandra.yaml will be used
+
+ -p , --port
+ Remote jmx agent port number
+
+ -pp, --print-port
+ Operate in 4.0 mode with hosts disambiguated by port number
+
+ -pw , --password
+ Remote jmx agent password
+
+ -pwf , --password-file
+ Path to the JMX password file
+
+ -u , --username
+ Remote jmx agent username
+----
+
+To enable audit logging, run following command on each node in the cluster on which you want to enable logging:
+
+[source, bash]
+----
+$ nodetool enableauditlog
+----
+
+== Disabling audit logging
+
+Use the `nodetool disableauditlog` command to disable audit logging.
+
+== Viewing audit logs
+
+The `auditlogviewer` tool is used to view (dump) audit logs if the logger was ``BinAuditLogger``..
+``auditlogviewer`` converts the binary log files into human-readable format; only the audit log directory must be supplied as a command-line option.
+If the logger ``FileAuditLogger`` was set, the log file are already in human-readable format and ``auditlogviewer`` is not needed to read files.
+
+
+The syntax of `auditlogviewer` is:
+
+[source, plaintext]
+----
+auditlogviewer
+
+Audit log files directory path is a required argument.
+usage: auditlogviewer [...] [options]
+--
+View the audit log contents in human readable format
+--
+Options are:
+-f,--follow Upon reaching the end of the log continue indefinitely
+ waiting for more records
+-h,--help display this help message
+-r,--roll_cycle How often to roll the log file was rolled. May be
+ necessary for Chronicle to correctly parse file names. (MINUTELY, HOURLY,
+ DAILY). Default HOURLY.
+----
+
+== Example
+
+[arabic, start=1]
+. To demonstrate audit logging, first configure the ``cassandra.yaml`` file with the following settings:
+
+[source, yaml]
+----
+audit_logging_options:
+ enabled: true
+ logger: BinAuditLogger
+ audit_logs_dir: "/cassandra/audit/logs/hourly"
+ # included_keyspaces:
+ # excluded_keyspaces: system, system_schema, system_virtual_schema
+ # included_categories:
+ # excluded_categories:
+ # included_users:
+ # excluded_users:
+ roll_cycle: HOURLY
+ # block: true
+ # max_queue_weight: 268435456 # 256 MiB
+ # max_log_size: 17179869184 # 16 GiB
+ ## archive command is "/path/to/script.sh %path" where %path is replaced with the file being rolled:
+ # archive_command:
+ # max_archive_retries: 10
+----
+
+[arabic, start=2]
+. Create the audit log directory `/cassandra/audit/logs/hourly` and set the directory permissions to read, write, and execute for all.
+
+[arabic, start=3]
+. Now create a demo keyspace and table and insert some data using ``cqlsh``:
+
+[source, cql]
+----
+ cqlsh> CREATE KEYSPACE auditlogkeyspace
+ ... WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 1};
+ cqlsh> USE auditlogkeyspace;
+ cqlsh:auditlogkeyspace> CREATE TABLE t (
+ ...id int,
+ ...k int,
+ ...v text,
+ ...PRIMARY KEY (id)
+ ... );
+ cqlsh:auditlogkeyspace> INSERT INTO t (id, k, v) VALUES (0, 0, 'val0');
+ cqlsh:auditlogkeyspace> INSERT INTO t (id, k, v) VALUES (0, 1, 'val1');
+----
+
+All the supported CQL commands will be logged to the audit log directory.
+
+[arabic, start=4]
+. Change directory to the audit logs directory.
+
+[source, bash]
+----
+$ cd /cassandra/audit/logs/hourly
+----
+
+[arabic, start=5]
+. List the audit log files and directories.
+
+[source, bash]
+----
+$ ls -l
+----
+
+You should see results similar to:
+
+[source, plaintext]
+----
+total 28
+-rw-rw-r--. 1 ec2-user ec2-user 65536 Aug 2 03:01 directory-listing.cq4t
+-rw-rw-r--. 1 ec2-user ec2-user 83886080 Aug 2 03:01 20190802-02.cq4
+-rw-rw-r--. 1 ec2-user ec2-user 83886080 Aug 2 03:01 20190802-03.cq4
+----
+
+The audit log files will all be listed with a `.cq4` file type. The audit directory is of `.cq4t` type.
+
+[arabic, start=6]
+. Run `auditlogviewer` tool to view the audit logs.
+
+[source, bash]
+----
+$ auditlogviewer /cassandra/audit/logs/hourly
+----
+
+This command will return a readable version of the log. Here is a partial sample of the log for the commands in this demo:
+
+[source, plaintext]
+----
+WARN 03:12:11,124 Using Pauser.sleepy() as not enough processors, have 2, needs 8+
+Type: AuditLog
+LogMessage:
+user:anonymous|host:10.0.2.238:7000|source:/127.0.0.1|port:46264|timestamp:1564711427328|type :USE_KEYSPACE|category:OTHER|ks:auditlogkeyspace|operation:USE AuditLogKeyspace;
+Type: AuditLog
+LogMessage:
+user:anonymous|host:10.0.2.238:7000|source:/127.0.0.1|port:46264|timestamp:1564711427329|type :USE_KEYSPACE|category:OTHER|ks:auditlogkeyspace|operation:USE "auditlogkeyspace"
+Type: AuditLog
+LogMessage:
+user:anonymous|host:10.0.2.238:7000|source:/127.0.0.1|port:46264|timestamp:1564711446279|type :SELECT|category:QUERY|ks:auditlogkeyspace|scope:t|operation:SELECT * FROM t;
+Type: AuditLog
+LogMessage:
+user:anonymous|host:10.0.2.238:7000|source:/127.0.0.1|port:46264|timestamp:1564713878834|type :DROP_TABLE|category:DDL|ks:auditlogkeyspace|scope:t|operation:DROP TABLE IF EXISTS
+AuditLogKeyspace.t;
+Type: AuditLog
+LogMessage:
+user:anonymous|host:10.0.2.238:7000|source:/3.91.56.164|port:42382|timestamp:1564714618360|ty
+pe:REQUEST_FAILURE|category:ERROR|operation:CREATE KEYSPACE AuditLogKeyspace
+WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 1};; Cannot add
+existing keyspace "auditlogkeyspace"
+Type: AuditLog
+LogMessage:
+user:anonymous|host:10.0.2.238:7000|source:/127.0.0.1|port:46264|timestamp:1564714690968|type :DROP_KEYSPACE|category:DDL|ks:auditlogkeyspace|operation:DROP KEYSPACE AuditLogKeyspace;
+Type: AuditLog
+LogMessage:
+user:anonymous|host:10.0.2.238:7000|source:/3.91.56.164|port:42406|timestamp:1564714708329|ty pe:CREATE_KEYSPACE|category:DDL|ks:auditlogkeyspace|operation:CREATE KEYSPACE
+AuditLogKeyspace
+WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 1};
+Type: AuditLog
+LogMessage:
+user:anonymous|host:10.0.2.238:7000|source:/127.0.0.1|port:46264|timestamp:1564714870678|type :USE_KEYSPACE|category:OTHER|ks:auditlogkeyspace|operation:USE auditlogkeyspace;
+[ec2-user@ip-10-0-2-238 hourly]$
+----
+
+== Diagnostic events for user audit logging
+
+Any native transport-enabled client can subscribe to audit log events for diagnosing cluster issues.
+These events can be consumed by external tools to implement a Cassandra user auditing solution.
diff --git a/doc/source/new/fqllogging.rst b/doc/modules/cassandra/pages/new/fqllogging.adoc
similarity index 63%
rename from doc/source/new/fqllogging.rst
rename to doc/modules/cassandra/pages/new/fqllogging.adoc
index 20085dac8ff..84febb5d567 100644
--- a/doc/source/new/fqllogging.rst
+++ b/doc/modules/cassandra/pages/new/fqllogging.adoc
@@ -1,209 +1,159 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements. See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership. The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License. You may obtain a copy of the License at
-..
-.. http://www.apache.org/licenses/LICENSE-2.0
-..
-.. Unless required by applicable law or agreed to in writing, software
-.. distributed under the License is distributed on an "AS IS" BASIS,
-.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-.. See the License for the specific language governing permissions and
-.. limitations under the License.
-
-Full Query Logging (FQL)
-========================
-
-Apache Cassandra 4.0 adds a new highly performant feature that supports live query logging (`CASSANDRA-13983 `_).
+= Full Query Logging
+
+Apache Cassandra 4.0 adds a new highly performant feature that supports live query logging (https://issues.apache.org/jira/browse/CASSANDRA-13983[CASSANDRA-13983]).
FQL is safe for production use, with configurable limits to heap memory and disk space to prevent out-of-memory errors.
-This feature is useful for live traffic capture, as well as traffic replay. The tool provided can be used for both debugging query traffic and migration.
+This feature is useful for live traffic capture, as well as traffic replay.
+The tool provided can be used for both debugging query traffic and migration.
New ``nodetool`` options are also added to enable, disable or reset FQL, as well as a new tool to read and replay the binary logs.
-The full query logging (FQL) capability uses `Chronicle-Queue `_ to rotate a log of queries.
+The full query logging (FQL) capability uses http://github.com/OpenHFT/Chronicle-Queue[Chronicle-Queue] to rotate a log of queries.
Full query logs will be referred to as *logs* for the remainder of the page.
Some of the features of FQL are:
-- The impact on query latency is reduced by asynchronous single-thread log entry writes to disk.
-- Heap memory usage is bounded by a weighted queue, with configurable maximum weight sitting in front of logging thread.
-- If the weighted queue is full, producers can be blocked or samples can be dropped.
-- Disk utilization is bounded by a configurable size, deleting old log segments once the limit is reached.
-- A flexible schema binary format, `Chronicle-Wire `_, for on-disk serialization that can skip unrecognized fields, add new ones, and omit old ones.
-- Can be enabled, disabled, or reset (to delete on-disk data) using the JMX tool, ``nodetool``.
-- Can configure the settings in either the `cassandra.yaml` file or by using ``nodetool``.
-- Introduces new ``fqltool`` that currently can ``Dump`` the binary logs to a readable format. Other options are ``Replay`` and ``Compare``.
+* The impact on query latency is reduced by asynchronous single-thread log entry writes to disk.
+* Heap memory usage is bounded by a weighted queue, with configurable maximum weight sitting in front of logging thread.
+* If the weighted queue is full, producers can be blocked or samples can be dropped.
+* Disk utilization is bounded by a configurable size, deleting old log segments once the limit is reached.
+* A flexible schema binary format, http://github.com/OpenHFT/Chronicle-Wire[Chronicle-Wire], for on-disk serialization that can skip unrecognized fields, add new ones, and omit old ones.
+* Can be enabled, disabled, or reset (to delete on-disk data) using the JMX tool, ``nodetool``.
+* Can configure the settings in either the `cassandra.yaml` file or by using ``nodetool``.
+* Introduces new ``fqltool`` that currently can ``Dump`` the binary logs to a readable format. Other options are ``Replay`` and ``Compare``.
-FQL logs all successful Cassandra Query Language (CQL) requests, both events that modify the data and those that query.
+FQL logs all successful Cassandra Query Language (CQL) requests, both events that modify the data and those that query.
While audit logs also include CQL requests, FQL logs only the CQL request. This difference means that FQL can be used to replay or compare logs, which audit logging cannot. FQL is useful for debugging, performance benchmarking, testing and auditing CQL queries, while audit logs are useful for compliance.
-Currently DCL statements containing passwords are logged for informational purposes but for security reasons they are not available for replay.
-Replay of those statements will be unsuccessful operation because everything after the word password in a DCL statement
-will be obfuscated as *******.
-
In performance testing, FQL appears to have little or no overhead in ``WRITE`` only workloads, and a minor overhead in ``MIXED`` workload.
-Query information logged
-------------------------
+== Query information logged
The query log contains:
-- all queries invoked
-- approximate time they were invoked
-- any parameters necessary to bind wildcard values
-- all query options
+* all queries invoked
+* approximate time they were invoked
+* any parameters necessary to bind wildcard values
+* all query options
-The logger writes single or batched CQL queries after they finish, so only successfully completed queries are logged. Failed or timed-out queries are not logged. Different data is logged, depending on the type of query.
+The logger writes single or batched CQL queries after they finish, so only successfully completed queries are logged.
+Failed or timed-out queries are not logged. Different data is logged, depending on the type of query.
A single CQL query log entry contains:
-- query - CQL query text
-- queryOptions - Options associated with the query invocation
-- queryState - Timestamp state associated with the query invocation
-- queryTimeMillis - Approximate time in milliseconds since the epoch since the query was invoked
+* query - CQL query text
+* queryOptions - Options associated with the query invocation
+* queryState - Timestamp state associated with the query invocation
+* queryTimeMillis - Approximate time in milliseconds since the epoch since the query was invoked
A batch CQL query log entry contains:
-- queries - CQL text of the queries
-- queryOptions - Options associated with the query invocation
-- queryState - Timestamp state associated with the query invocation
-- batchTimeMillis - Approximate time in milliseconds since the epoch since the batch was invoked
-- type - The type of the batch
-- values - Values to bind to as parameters for the queries
+* queries - CQL text of the queries
+* queryOptions - Options associated with the query invocation
+* queryState - Timestamp state associated with the query invocation
+* batchTimeMillis - Approximate time in milliseconds since the epoch since the batch was invoked
+* type - The type of the batch
+* values - Values to bind to as parameters for the queries
-Because FQL is backed by `Binlog`, the performance and footprint are predictable, with minimal impact on log record producers.
+Because FQL is backed by `Binlog`, the performance and footprint are predictable, with minimal impact on log record producers.
Performance safety prevents the producers from overloading the log, using a weighted queue to drop records if the logging falls behind.
Single-thread asynchronous writing produces the logs. Chronicle-Queue provides an easy method of rolling the logs.
-Logging information logged
---------------------------
+== Logging information logged
FQL also tracks information about the stored log files:
-- Stored log files that are added and their storage impact. Deletes them if over storage limit.
-- The log files in Chronicle-Queue that have already rolled
-- The number of bytes in the log files that have already rolled
+* Stored log files that are added and their storage impact. Deletes them if over storage limit.
+* The log files in Chronicle-Queue that have already rolled
+* The number of bytes in the log files that have already rolled
-Logging sequence
-----------------
+== Logging sequence
The logger follows a well-defined sequence of events:
-1. The consumer thread that writes log records is started. This action can occur only once.
-2. The consumer thread offers a record to the log. If the in-memory queue is full, the record will be dropped and offer returns a `false` value.
-3. If accepted, the record is entered into the log. If the in-memory queue is full, the putting thread will be blocked until there is space or it is interrupted.
-4. The buffers are cleaned up at thread exit. Finalization will check again, to ensure there are no stragglers in the queue.
-5. The consumer thread is stopped. It can be called multiple times.
+. The consumer thread that writes log records is started. This action can occur only once.
+. The consumer thread offers a record to the log. If the in-memory queue is full, the record will be dropped and offer returns a `false` value.
+. If accepted, the record is entered into the log. If the in-memory queue is full, the putting thread will be blocked until there is space or it is interrupted.
+. The buffers are cleaned up at thread exit. Finalization will check again, to ensure there are no stragglers in the queue.
+. The consumer thread is stopped. It can be called multiple times.
-Using FQL
----------
+== Using FQL
-To use FQL, two actions must be completed. FQL must be configured using either the `cassandra.yaml` file or ``nodetool``, and logging must be enabled using ``nodetool enablefullquerylog``.
-Both actions are completed on a per-node basis.
+To use FQL, two actions must be completed. FQL must be configured using either the `cassandra.yaml` file or ``nodetool``, and logging must be enabled using ``nodetool enablefullquerylog``.
With either method, at a minimum, the path to the log directory must be specified.
+Both actions are completed on a per-node basis.
Full query logs are generated on each enabled node, so logs on each node will have that node's queries.
-Configuring FQL in cassandra.yaml
----------------------------------
+== Configuring FQL in cassandra.yaml
-The `cassandra.yaml` file can be used to configure FQL before enabling the feature with ``nodetool``.
+The `cassandra.yaml` file can be used to configure FQL before enabling the feature with ``nodetool``.
The file includes the following options that can be uncommented for use:
-::
-
- # default options for full query logging - these can be overridden from command line
- # when executing nodetool enablefullquerylog
- #full_query_logging_options:
- # log_dir:
- # roll_cycle: HOURLY
- # block: true
- # max_queue_weight: 268435456 # 256 MiB
- # max_log_size: 17179869184 # 16 GiB
- ## archive command is "/path/to/script.sh %path" where %path is replaced with the file being rolled:
- # archive_command:
- # max_archive_retries: 10
-
-log_dir
-^^^^^^^
-
-To write logs, an existing directory must be set in ``log_dir``.
-
-The directory must have appropriate permissions set to allow reading, writing, and executing.
-Logging will recursively delete the directory contents as needed.
-Do not place links in this directory to other sections of the filesystem.
+[source, yaml]
+----
+# default options for full query logging - these can be overridden from command line
+# when executing nodetool enablefullquerylog
+#full_query_logging_options:
+ # log_dir:
+ # roll_cycle: HOURLY
+ # block: true
+ # max_queue_weight: 268435456 # 256 MiB
+ # max_log_size: 17179869184 # 16 GiB
+ # archive command is "/path/to/script.sh %path" where %path is replaced with the file being rolled:
+ # archive_command:
+ # max_archive_retries: 10
+----
+
+=== log_dir
+
+To write logs, an existing directory must be set in ``log_dir``.
+
+The directory must have appropriate permissions set to allow reading, writing, and executing.
+Logging will recursively delete the directory contents as needed.
+Do not place links in this directory to other sections of the filesystem.
For example, ``log_dir: /tmp/cassandrafullquerylog``.
-roll_cycle
-^^^^^^^^^^
+=== roll_cycle
-The ``roll_cycle`` defines the frequency with which the log segments are rolled.
+The ``roll_cycle`` defines the frequency with which the log segments are rolled.
Supported values are ``HOURLY`` (default), ``MINUTELY``, and ``DAILY``.
For example: ``roll_cycle: DAILY``
-block
-^^^^^
+=== block
The ``block`` option specifies whether FQL should block writing or drop log records if FQL falls behind. Supported boolean values are ``true`` (default) or ``false``.
For example: ``block: false`` to drop records
-max_queue_weight
-^^^^^^^^^^^^^^^^
+=== max_queue_weight
The ``max_queue_weight`` option sets the maximum weight of in-memory queue for records waiting to be written to the file before blocking or dropping. The option must be set to a positive value. The default value is 268435456, or 256 MiB.
For example, to change the default: ``max_queue_weight: 134217728 # 128 MiB``
-max_log_size
-^^^^^^^^^^^^
+=== max_log_size
The ``max_log_size`` option sets the maximum size of the rolled files to retain on disk before deleting the oldest file. The option must be set to a positive value. The default is 17179869184, or 16 GiB.
For example, to change the default: ``max_log_size: 34359738368 # 32 GiB``
-archive_command
-^^^^^^^^^^^^^^^
+=== archive_command
-The ``archive_command`` option sets the user-defined archive script to execute on rolled log files.
-When not defined, files are deleted, with a default of ``""`` which then maps to `org.apache.cassandra.utils.binlog.DeletingArchiver`.
+The ``archive_command`` option sets the user-defined archive script to execute on rolled log files.
+When not defined, files are deleted, with the default ``""`` which then maps to `org.apache.cassandra.utils.binlog.DeletingArchiver`.
For example: ``archive_command: /usr/local/bin/archiveit.sh %path # %path is the file being rolled``
-max_archive_retries
-^^^^^^^^^^^^^^^^^^^
+=== max_archive_retries
The ``max_archive_retries`` option sets the max number of retries of failed archive commands. The default is 10.
For example: ``max_archive_retries: 10``
FQL can also be configured using ``nodetool`` when enabling the feature, and will override any values set in the `cassandra.yaml` file, as discussed in the next section.
-Querying the state of FQL
----------------------
+== Enabling FQL
-In order to know what state FQL is in, you may use nodetool command ``getfullquerylog``. It will print out whether FQL is enabled
-and with what configuration options; if you reset or stop FQL, the configuration displayed will be taken from
-configuration in ``cassandra.yaml``.
-
-::
-
- $ nodetool getfullquerylog
- enabled true
- log_dir /path/to/fql/log/dir
- archive_command /usr/local/bin/archiveit.sh %path
- roll_cycle HOURLY
- block true
- max_log_size 17179869184
- max_queue_weight 268435456
- max_archive_retries 10
-
-Enabling FQL
-------------
-
-FQL is enabled on a per-node basis using the ``nodetool enablefullquerylog`` command. At a minimum, the path to the logging directory must be defined, if ``log_dir`` is not set in the `cassandra.yaml` file.
+FQL is enabled on a per-node basis using the ``nodetool enablefullquerylog`` command. At a minimum, the path to the logging directory must be defined, if ``log_dir`` is not set in the `cassandra.yaml` file.
The syntax of the ``nodetool enablefullquerylog`` command has all the same options that can be set in the ``cassandra.yaml`` file.
-In addition, ``nodetool`` has options to set which host and port to run the command on, and username and password if the command requires authentication.
-
-::
+In addition, ``nodetool`` has options to set which host and port to run the command on, and username and password if the command requires authentication.
+[source, plaintext]
+----
nodetool [(-h | --host )] [(-p