cheri-c-programming.tex

\documentclass[12pt,twoside,openright,a4paper]{article}
%\documentclass[12pt,twoside,openright,usletter]{article}
% !TeX spellcheck = en_US
%\documentclass[11pt]{article}
% UK date format in bibliography:
\usepackage[british]{babel}
\usepackage[inner=25mm,outer=25mm,top=20mm,bottom=20mm]{geometry}

%\usepackage[UKenglish]{isodate}%UK date endian
\usepackage[headings]{fullpage}
\usepackage[hidelinks]{hyperref}

% Bibliography:
\usepackage[utf8]{inputenc}
\usepackage{csquotes,xpatch}% recommended
% list up to 99 names instead of the default 3
\usepackage[backend=biber,bibencoding=utf8,style=numeric,maxnames=99,backref=false,sortcites,datamodel=thesis]{biblatex}
\addbibresource{cheri.bib}
\AtEveryBibitem{%
% Don't print ISBN,issn, or URL dates
\clearfield{issn}%
\clearfield{isbn}%
\clearfield{urldate}%
\clearfield{urlyear}%
}

\usepackage{bytefield}
\usepackage{color}
\usepackage[scaled=0.8]{DejaVuSansMono}
\usepackage[T1]{fontenc}
\usepackage{listings}
\usepackage{mdframed} % To avoid linebreaks in lstlistings
\lstnewenvironment{clisting}[1][]{\endgraf\noindent\minipage{\linewidth}\lstset{language={C},breaklines=true,frame=L,#1}}{\endminipage\endgraf}
\lstnewenvironment{compilerwarning}[1][]{\endgraf\noindent\minipage{\linewidth}\lstset{language={},breaklines=true,basicstyle=\scriptsize\ttfamily\bfseries,frame=L,#1}}{\endminipage\endgraf}

\usepackage{subcaption}
\usepackage{times}
\usepackage{url}
\usepackage[svgnames]{xcolor}
\definecolor{lightgray}{gray}{0.8}
\usepackage{xspace}
\usepackage{xfrac}

\usepackage[nameinlink,noabbrev,capitalise]{cleveref}

% drawing over lstlistings (code stolen from nwf)
\usepackage{tikz}
   \usetikzlibrary{decorations.pathreplacing}
   \usetikzlibrary{fit}
   \usetikzlibrary{tikzmark}
   \usetikzlibrary{calc}
   \usetikzlibrary{patterns}
\newcommand*{\vcpgfmark}[1]{\ensuremath{\vcenter{\hbox{\pgfmark{#1}}}}}
% GBP symbol should be safe since it's easy to enter (at least on a UK keyboard) and won't be in any valid lstlistings
\lstset{escapechar=£} % Note: ensure this doesn't occur in any of the code
\newcommand{\TikzListingHighlight}[3][]{\tikz[overlay,remember picture]{\draw[\ifstrempty{#1}{yellow}{#1}, line width=10pt,opacity=0.3](#2) -- (#3);}}
\newcommand*{\TikzListingHighlightStartEnd}[2][]{\tikz[overlay,remember picture]{\draw[\ifstrempty{#1}{yellow}{#1}, line width=10pt,opacity=0.3](pic cs:Start#2) -- (pic cs:End#2);}}


\renewcommand{\UrlFont}{\ttfamily\small}

\newcommand{\baselineboxformatting}[1]{%
  % Measure size of contents
  \sbox0{#1}%
  % Use the difference between the contents' height and the bitbox's height,
  % clamped to [-.44\baselineskip, 0], as our minimum depth.
  \setlength{\skip0}{\ht0 - \height}%
  \ifdim\skip0>0pt%
    \setlength{\skip0}{0}%
  \else%
    \ifdim\skip0<-.44\baselineskip%
      \setlength{\skip0}{-.44\baselineskip}%
    \fi%
  \fi%
  \centering\rule[\skip0]{0pt}{\height}#1}
\bytefieldsetup{boxformatting=\baselineboxformatting}

\lstset{basicstyle=\footnotesize\ttfamily}
%\newcommand{\ccode}[1]{\lstinline[language={C}]{#1}}
%\newcommand{\cxxcode}[1]{\lstinline[language={C++}]{#1}}
\newcommand{\ccode}[1]{{\small\ttfamily{#1}}}
\newcommand{\cxxcode}[1]{{\ccode{#1}}}
\newcommand{\cconst}[1]{{\ccode{#1}}}
\newcommand{\cfunc}[1]{{\ccode{#1()}}}
\newcommand{\cvar}[1]{{\ccode{#1}}}
\newcommand{\pathname}[1]{{\ccode{#1}}}
\newcommand{\commandline}[1]{{\ccode{#1}}}

\newcommand{\ptrdifft}{{\ccode{ptrdiff\_t}}\xspace}
\newcommand{\maxalignt}{{\ccode{max\_align\_t}}\xspace}
\newcommand{\sizet}{{\ccode{size\_t}}\xspace}
\newcommand{\ssizet}{{\ccode{ssize\_t}}\xspace}
\newcommand{\ptraddrt}{{\ccode{ptraddr\_t}}\xspace}
\newcommand{\cuintptrt}{{\ccode{uintptr\_t}}\xspace}
\newcommand{\cintptrt}{{\ccode{intptr\_t}}\xspace}
\newcommand{\ccharstar}{{\ccode{char *}}\xspace}
\newcommand{\cvoidstar}{{\ccode{void *}}\xspace}
\newcommand{\clongt}{{\ccode{long}}\xspace}
\newcommand{\cintt}{{\ccode{int}}\xspace}
\newcommand{\cintttt}{{\ccode{int32\_t}}\xspace}
\newcommand{\cintsft}{{\ccode{int64\_t}}\xspace}

\newcommand{\SIGPROT}{{\ccode{SIGPROT}}\xspace}

\newcommand{\note}[2]{{\color{blue}[ Note: #1 - #2]}}
\usepackage{xstring}
\IfSubStr*{\jobname}{final}{
  \renewcommand{\note}[2]{\relax\ifhmode\unskip\fi}
}{
  % show comments by default
}

\newcommand{\arnote}[1]{\note{#1}{Alex R.}}
\newcommand{\bdnote}[1]{\note{#1}{Brooks D.}}
\newcommand{\rwnote}[1]{\note{#1}{Robert W.}}
\newcommand{\amnote}[1]{\note{#1}{Alfredo M.}}
\newcommand{\psnote}[1]{\note{#1}{Peter S.}}
\newcommand{\pgnnote}[1]{\note{#1}{Peter N.}}
\newcommand{\jrtcnote}[1]{\note{#1}{Jess C.}}
\newcommand{\hmnote}[1]{\note{#1}{Hesham A.}}
\newcommand{\nwfnote}[1]{\note{#1}{nwf}}

% typeset C++ sensibly
\usepackage{relsize}
\newcommand*{\cpp}[1][]{C\textsmaller[2]{\nolinebreak[4]\hspace{-.05em}\raisebox{.45ex}{\textbf{++}}}}
% And another macro sensible PDF metadata:
\newcommand*{\cppInHeader}[1][]{\texorpdfstring{\cpp{}}{C++}}
\newcommand*{\purecapCOrCpp}[1]{CHERI C/\cpp{}}

\hyphenation{Free-BSD}
\hyphenation{Free-RTOS}
\hyphenation{Cheri-BSD}
\hyphenation{Cheri-Free-RTOS}
\hyphenation{Cheri-ABI}
\hyphenation{Web-Kit}
\hyphenation{Postgre-SQL}

\title{CHERI C/\cppInHeader{} Programming Guide \\ (DRAFT)}
\author{Robert N. M. Watson$^*$, Alexander Richardson$^*$,
  Brooks Davis$^\dagger$, \\
  John Baldwin$^\ddagger$, David Chisnall$^\S$, Jessica Clarke$^*$,
  Nathaniel Filardo$^*$, \\
  Simon W. Moore$^*$,  Edward Napierala$^*$, Peter Sewell$^*$, and \\
  Peter G. Neumann$^\dagger$ \\
  \\
  $^*$University of Cambridge, $^\dagger$SRI International, \\
  $^\ddagger$Ararat River Consulting, LLC and $^\S$Microsoft Research}

\begin{document}
\sloppy

%% CL tech-report format provides its own cover page.  Comment for final
%% version.
%\maketitle

%% CL tech-report format requires page numbering to start at 3.  Uncomment for
%% final version.
\setcounter{page}{3}
%%

%
% Keep Abstract in sync with the Introduction.
%
\newcommand{\abstracttext}{
This document is a brief introduction to the \purecapCOrCpp{}
programming languages, which employ CHERI's architectural capability
primitive to implement C/\cpp{}-language memory safety.
We explain the principles underlying these language variants, and their
grounding in CHERI's multiple architectural instantiations:
CHERI-MIPS, CHERI-RISC-V, and Arm's Morello.
We describe the most commonly encountered differences between these
dialects and C/\cpp{} on conventional architectures, and where existing
software may require minor changes.
We document new compiler warnings and errors that may be experienced compiling
code with the CHERI Clang/LLVM compiler, and suggest how they may be addressed
through typically minor source-code changes.
We explain how modest language extensions allow selected software, such
as memory allocators, to further refine permissions and bounds on pointers.
This guidance is based on our experience adapting the FreeBSD operating-system
userspace, and applications such as PostgreSQL and WebKit, to run in a
CHERI C/\cpp{} capability-based programming environment.
We conclude by recommending further reading.

\psnote{should this mention CheriFreeRTOS and CHERI-RTEMS?}

}


\begin{abstract}
\abstracttext
\end{abstract}

\newpage
\setcounter{tocdepth}{2}
\tableofcontents

\newpage

\section{Introduction}

%
% Keep Abstract in sync with the Introduction.
%
\abstracttext{}

\subsection{Definitions}

CHERI Clang/LLVM and LLD implement the following new language,
code-generation, and linkage models:

\begin{description}
\item[CHERI C/\cpp{}] are C/\cpp{}-language dialects tuned to
  requirements arising from implementing all pointers using CHERI capabilities.
  This includes all explicit pointers (i.e., those declared by the programmer)
  and all implied pointers (e.g., those used to access local and global
  variables).
  For example, they diverge from C/\cpp{} implementations on conventional
  architectures by preventing pointers passed through integer type other
  than \cuintptrt and \cintptrt{} from being dereferenced.
  New Application Programming Interfaces (APIs) provide access to capability
  features of pointers, including getting and setting their bounds, required
  by selected software such as memory allocators.
  The vast majority of C/\cpp{} source code we have encountered requires
  little or no modification to be compiled as CHERI C/\cpp{}.

\item[Pure-capability machine code] is compiled code (or hand-written
  assembly) that utilizes CHERI capabilities for all memory accesses --
  including loads, stores, and instruction fetches -- rather than integer
  addresses.
  Capabilities are used to implement pointers explicitly described in the
  source program, and also to implement implied pointers in the C execution
  environment, such as those used for control flow.
  Pure-capability machine code is not binary compatible with
  capability-unaware code using integer pointers, not least due to the
  different size of the pointer data type.
  %Pure-capability code will most frequently be used to implement CHERI C,
  %although could also be used for other purposes (e.g., non-CHERI C).
\end{description}

While the focus of this document is \purecapCOrCpp{}, CHERI is an
architectural feature able to support other software use cases including
other C/\cpp{}-language mappings into its features.
Another mapping is hybrid C/\cpp{}, in which only selected pointers are
implemented using capabilities, with the remainder implemented using integers.
We have primarily used hybrid C in systems software that bridges between
environments executing pure-capability machine code and those running largely
or entirely non-CHERI-aware machine code.
For example, a largely CHERI-unaware CheriBSD kernel can host pure-capability
processes using its CheriABI wrapper implemented in hybrid C
(see \Cref{sec:cheriabi}).
Hybrid machine code has stronger binary compatibility, but weaker protection,
than pure-capability machine code.
We do not consider hybrid C further in this document.

\section{Background}

CHERI extends conventional processor Instruction-Set Architectures (ISAs) with
support for \textit{architectural capabilities}.
One important use for this new hardware data type is in the implementation
of safer C/\cpp{} pointers and the code or data they point at.
Our technical report, \textit{An Introduction to CHERI}, provides a more
detailed
overview of the CHERI architecture, ISA modeling, hardware implementations,
and software stack~\cite{UCAM-CL-TR-941}.

\subsection{CHERI capabilities}
\label{sec:cheri-capabilities}

\begin{figure}[b]
\hspace{2.5cm}
% Tag
\begin{subfigure}[t!]{0.1\textwidth}
\begin{bytefield}[bitwidth=3pt]{1}
% \bitheader[endianness=big]{~,~} \\
\begin{leftwordgroup}{1-bit tag}
\bitbox{1}{}
\end{leftwordgroup}
\end{bytefield}
\end{subfigure}
% Capability
\begin{subfigure}[t!]{0.1\textwidth}
\begin{bytefield}[bitwidth=3pt]{64}
\bitheader[endianness=big]{0,63} \\
\begin{rightwordgroup}{128-bit \\ in-memory \\ capability}
\bitbox{16}{perms} & \bitbox{3}{\color{lightgray}\rule{\width}{\height}} & \bitbox{15}{otype} & \bitbox{30}{bounds} \\
\bitbox[lrb]{64}{64-bit~address}
\end{rightwordgroup}
\end{bytefield}
\end{subfigure}
\caption{128-bit CHERI Concentrate capability representation used in
  64-bit CHERI-MIPS and 64-bit CHERI-RISC-V: 64-bit address
  and metadata in addressable memory; and 1-bit out-of-band tag.}
\label{figure:cheri-capability-representation}
\end{figure}

CHERI capabilities are twice the width of the native integer pointer type of
the baseline architecture: there are 128-bit capabilities on 64-bit platforms,
and 64-bit capabilities on 32-bit platforms.
Each capability consists of an integer (virtual) address of the natural size for
the architecture (e.g., 32 or 64 bit), and also additional metadata that is
compressed in order to fit in the remaining 32 or 64 bits of the capability
(see \Cref{figure:cheri-capability-representation} for an example; details
vary across underlying architectures and word sizes).
In addition, they are associated with a 1-bit validity ``tag'' whose value is
maintained in registers and memory by the architecture, but not part of
addressable memory.
Each element of the additional metadata and tag of the capability contributes
to the protection model:

\begin{description}
\item[Validity tag] The tag tracks the validity of a capability.
  If invalid, the capability cannot be used for load, store, instruction
  fetch, or other operations.
  It is still possible to extract fields from an invalid capability,
  including its address.

\item[Bounds] The lower and upper bounds are addresses restricting the
  portion of the address space within which the capability can be used for
  load, store, and instruction fetch.
  %
  Setting a capability's address (i.e., where it points) within
  bounds will retain the capability's validity tag.  Setting addresses out of
  bounds is subject to the precision limits of the bounds compression model
  (see below and \cref{sec:oob}); broadly speaking, setting addresses ``near''
  the capability's bounds will preserve the validity tag.  (These out-of-bounds
  capabilities continue to authorize access only to memory within bounds.)

\item[Permissions] The permissions mask controls how the capability can be
  used -- for example, by authorizing the loading and storing of data and/or
  capabilities.

\item[Object type] If this value is not equal to the unsealed object type, the capability is ``sealed'' and
  cannot be modified or dereferenced, but can be used to implement opaque
  pointer types.
  This feature is not described further in this document, as it is primarily
  used to implement software compartmentalization rather than object-level
  memory protection.\arnote{Should we mention that code pointers are sealed (sentries)?}
\end{description}

When stored in memory, valid capabilities must be naturally aligned -- i.e., at
64-bit or 128-bit boundaries, depending on capability size -- as that is the
granularity at which in-memory tags are maintained.
Partial or complete overwrites with data, rather than a complete overwrite
with a valid capability, lead to the in-memory tag being cleared, preventing
corrupted capabilities from later being dereferenced.

In order to reduce the memory footprint of capabilities, capability
compression is used to reduce the overhead of bounds so that the full
capability, including address, permissions, and bounds fits within 64 or
128 bits (plus the 1-bit out-of-band tag).
Bounds compression takes advantage of redundancy between the address
and the bounds, which occurs because a pointer typically falls within (or
close to) its associated allocation, and because allocations are typically
well aligned.
The compression scheme uses a floating-point representation, allowing high-precision bounds for small
objects, but requiring stronger alignment and padding for larger allocations
(see \cref{sec:bounds_alignment}).

\subsection{Architectural rules for capability use}

The architecture enforces several important security properties on changes to
this metadata:

\begin{description}
\item[Provenance validity] ensures that capabilities can be used -- for
  load, store, instruction fetch, etc. -- only if they are derived via valid
  transformations of valid capabilities.
  This property holds for capabilities in both registers and memory.

% \item[Capability integrity] prevents direct in-memory manipulation of
%   capabilities.  (Although this property is subsumed
%   under the previous property, it seems worth stating on its own.)
% \pgnnote{Does that added sentence work?}
% \rwnote{I'm not really sure that that helps.}

%  \psnote{As they are stated above, ``provenance validity''
%    subsumes ``capability integrity'', which is a bit confusing.  One
%    could just lose the latter, or (I suppose) split ``provenance
%    validity'' into the case of capability construction in registers,
%    via loads and register
%    operations and the case of capability (de)construction in memory,
%    via good and bad store operations}

%  \psnote{As stated above, ``provenance validity'' involves both the
%    construction and use of capabilities. Think that's ok, but a
%    different slicing of the concepts would be to have it just be
%    construction.   If sticking with ``can be used'', then somehow the
%    text should be elaborated to not forbid use in non-authorising
%    ways, e.g., ``using'' a possibly-non-valid capability by pulling
%    out its address}

\item[Monotonicity] requires that any capability derived from another
  cannot exceed the permissions and bounds of the capability from which it was
  derived (leaving aside sealed capabilities, used for domain transition,
  whose mechanism is not detailed in this report).

% \psnote{That's a bit odd, as a capability is really just a pure
%   value, not a mutable thing.  A better (but still a bit fuzzy)
%   statement would be something like ``Monotonicity ensures that a
%   valid capability can only be constructed from another capability
%   with the same or greater authority''.
% Beyond that, one should talk not just about monotonicity of capability
% construction, but monotonicity of the set of all \emph{reachable
%   capabilities} -- compare with the cheri\_formal\_paper Section IV. }
% \rwnote{This comment partially addressed.}
% \rwnote{This report does not discuss compartmentalisation at all, so is
% uninterested in the other definition of monotonicity.}

\end{description}

At boot time, the architecture provides initial capabilities to the firmware,
allowing data access and instruction fetch across the full address space.
Additionally, all tags are cleared in memory.
Further capabilities can then be derived (in accordance with the monotonicity
property) as they are passed from firmware to boot loader, from boot loader to
hypervisor, from hypervisor to the OS, and from the OS to the application.
At each stage in the derivation chain, bounds and permissions may be
restricted to further limit access.
For example, the OS may assign capabilities for only a limited portion of the
address space to the user software, preventing use of other portions of the
address space.

% These capabilities describe the set of memory access permissions held by each
% software component.

%The initial capabilities are then
%  derived from existing valid capabilities, in accordance with the monotonicity
%  property;
% \pgnnote{This seems cleaner.}
% \rwnote{More clear but less correct -- the initial capabilities are never
%   derived from other capabilities.}

Similarly, capabilities carry with them \textit{intentionality}: when a
process passes a capability as an argument to a system call, the OS kernel can
carefully use only that capability to ensure that it does not access other
process memory that was not intended by the user process -- even though the
kernel may in fact have permission to access the entire address space through
other capabilities it holds.
This is important, as it prevents ``confused deputy'' problems, in which a more
privileged party uses an excess of privilege when acting on behalf of a less
privileged party, performing operations that were not intended to be
authorized.
For example, this prevents the kernel from overflowing the bounds on a
userspace buffer when a pointer to the buffer is passed as a
system-call argument.

The hardware furthermore guarantees that capability tags and capability data is written atomically.
For example, if one thread stores a valid capability and another writes arbitrary data to the same location, it is impossible to observe the arbitrary data with the validity bit set.
% \arnote{I guess the other way around is fine? I.e. capability data but \textbf{without} the tag?}

These architectural properties provide the foundation on which a
capability-based OS, compiler, and runtime can implement C/\cpp{}-language memory
safety.
They have been made precise and have been proved, with machine-checked proof,
to hold for the CHERI-MIPS architecture~\cite{cheri-formal-SP2020}.


\section{CHERI C/\cppInHeader{}}

The architectural-capability type can be used in a variety of ways by
software.
One particularly useful use case is in implementing \textit{CHERI C/\cpp{}}.
In this model, all C/\cpp{} language-visible pointer types, as well as any
implied pointers implementing vtables, return addresses, global variables,
arrays of variadic-function arguments, and so on, are implemented using
capabilities with tight bounds.
This allows the architecture to imbue pointers with protection by virtue of
architectural provenance validity, bounds checking, and permission checking,
protecting pointers from corruption and providing strong spatial memory
safety.

\subsection{The CHERI C/C++ run-time environment}

CHERI C code executes within a capability-aware run-time environment
-- whether ``bare metal'' with a suitable runtime, or in a richer, OS-based
process environment such as CheriABI (see \Cref{sec:cheriabi}),
which ensures that:
\begin{itemize}
  \itemsep0em  % the default large spacing looks weird in this list
  \item capabilities are context switched (if required);
  \item tags are maintained by the OS virtual-memory subsystem (if present);
  \item capabilities are supported in OS control operations such as
    debugging (as needed);
  \item system-call arguments, the
run-time linker, and other aspects of the OS Application Binary Interface
(ABI) utilize capabilities rather than integer pointers; and
  \item the C/\cpp{}-language runtime implements suitable capability preservation
    \\
    (e.g., in \cfunc{memcpy}\kern-2pt) and restriction (e.g., in \cfunc{malloc}\kern-2pt).
\end{itemize}

In CheriBSD, our CHERI-extended version of the open-source FreeBSD operating
system, CheriABI operates as a complete additional OS ABI.
CheriABI is implemented in the style of a 32-bit or 64-bit OS personality, in
that it requires its own set of suitably compiled system libraries and classes.
We have also successfully adapted bare-metal runtimes, such as newlib, and
embedded operating systems, such as FreeRTOS (CheriFreeRTOS) and RTEMS
(CHERI-RTEMS), to support CHERI memory protection.

Outside of the OS and language runtime themselves, CHERI C/\cpp{} require
relatively few source-code-level changes to C/\cpp{}-language software.
We explore those changes in the remainder of this document.

\subsection{Referential, spatial, and temporal safety}

\purecapCOrCpp{} introduces a number of new types of protection not
present in compilation to conventional architectures:

\begin{description}
\item[Referential safety] protects pointers (references) themselves.
  This includes \textit{integrity} (corrupted pointers cannot be dereferenced)
  and \textit{provenance validity} (only pointers derived from valid pointers
  via valid manipulations can be dereferenced).

  When pointers are implemented using architectural capabilities, CHERI's
  capability tags and provenance validity naturally provide this protection.

\item[Spatial safety] ensures that pointers may be used only to access memory
  within bounds of their associated allocation; dually, manipulating an
  out-of-bounds pointer will not grant access to another allocation.

  This is accomplished by adapting various memory allocators, including the run-time
  linker for global variables, the stack allocator, and the heap allocator,
  to set the bounds on the capability implementing a pointer before returning
  it to the caller.
  Due to precision constraints on capability bounds, bounds on returned
  pointers may include additional padding, but will still not permit access to any
  other allocations (see \cref{sec:bounds_alignment}).
  Monotonicity ensures that callers cannot later broaden the bounds to cover
  other allocations.
\end{description}

\noindent
Referential safety and spatial safety are implemented in CheriBSD's
pure-capability CheriABI execution environment and for bare-metal in
CheriFreeRTOS and CHERI-RTEMS.

\begin{description}
\item[Temporal safety] prevents a pointer retained after the release of its
  underlying allocation from being used to access its memory if that memory
  has been reused for a fresh allocation (e.g., after a fresh pointer to that
  memory has been returned by a further call to \cfunc{malloc} after the
  current pointer passed to \cfunc{free}\kern-2pt).

  Heap temporal safety is accomplished by preventing new pointers being
  returned to a previously allocated region of memory while any prior pointers
  to that memory persist in application-accessible memory.
  Memory will be held in \textit{quarantine} until any prior pointers have
  been revoked; then the memory may be reallocated.
  Architectural capability tags and virtual memory allow intermittent
  \textit{revocation sweeps} to accurately and efficiently locate and
  overwrite any capabilities implementing stale pointers.
  Spatial safety ensures that pointers cannot be used to reference other
  memory, including other freed memory.
\end{description}

%\rwnote{I wonder if we should make a note somewhere about the nature of the
  %overwrite -- e.g., that we anticipate that it will simply clear the tag, but
  %that other implementations are possible?}
% BFG: done below in the Implications of capability revocation for temporal
% safety subsection.

\noindent
Temporal safety is the object of ongoing experiments.
A prototype that guards \emph{heap} allocations has been developed for
CheriABI on CheriBSD, but is not yet integrated with the main development
branch.
We currently have no plans to develop support for temporal memory safety in
CheriFreeRTOS and CHERI-RTEMS, both due to the complexity of the temporal
safety runtime, and also because of CHERI temporal safety's dependence on an
MMU for performance.

%% nwf thinks this might belong in here, lest someone think that the restriction
%% to heap temporal safety is all that we can achieve even in theory.
%
% A blue-sky microkernel operating system project, CheriOS, achieves full spatial
% and temporal safety of its C runtime environment.

\section{Impact on the C/\cppInHeader{} programming model}
Several kinds of changes may be required by programmers; the extent to which
these changes impact a particular library or application will depend
significantly on its idiomatic use of C.
Our experience suggests that low-level system components such as run-time
linkers, debuggers, memory allocators, and language runtimes require a modest
but non-trivial porting effort.
Similarly, support classes that include, for example, custom synchronization
features, may also require moderate adaptation.
Other applications may compile with few or no changes -- especially if they
are already portable across 32-bit and 64-bit platforms and are written in a contemporary C or \cpp{} dialect.
In the following sections, we consider various kinds of programmer-visible
changes required in the CHERI C/\cpp{} programming environment.
In many cases, compiler warnings and errors can be used to identify potential
issues compiling code as \purecapCOrCpp{} (see
\Cref{sec:cheri-compiler-warnings-and-errors}).
\rwnote{Alex: Can we use the word ``most'' instead of ``many''?}

\subsection{Capability-related faults}
\label{sec:faults}

When architectural capability properties are violated, such as by an attempt
to dereference an invalid capability, access memory outside the bounds of a
capability, or perform accesses not authorized by the permissions on a
capability, this typically leads to a hardware exception (trap).
Operating-system kernels are able to catch this exception via a trap handler,
optionally delivering it to the run-time environment via OS-specific
mechanisms.

However, the language-level behavior of CHERI C/C++ is considerably more
subtle: existing undefined behavior semantics in C are retained.
The compiler is free to assume that loads and stores will not trap (i.e., that
any program is free of undefined behavior), and may optimize under this
assumption, including reordering code.
Architectural traps occur when dynamic loads and stores are attempted, and
reordering could lead to potential confusing behavior for programmers.

In the CheriABI process environment, the operating system catches the hardware
exception and delivers a \SIGPROT signal to the user process;
further information may be found in \Cref{sec:cheriabi}.
In other environments, such as bare metal or under an embedded OS, behavior is
specific to those environments, as it will depend both on how architectural
exceptions are handled, and how those events are delivered to the C-language
stack.
Fail stop may be appropriate behavior in some environments, and is in fact the
default behavior in CheriABI when \SIGPROT is not handled.

\rwnote{We've opted to use the term ``hardware exception'' throughout, and
  mention ``traps'' only here.  This could cause confusion with respect to \cpp{}
  exceptions .. but perhaps less so than if we used the word ``exception''
  unadorned.}

\subsection{Pointer provenance validity}
\label{sec:pointer_provenance_validity}

\purecapCOrCpp{} implement pointers using architectural
capabilities, rather than using conventional 32-bit or 64-bit integers.
This allows the provenance validity of language-level pointers to be
protected by the provenance properties of CHERI architectural capabilities:
only pointers implemented using valid capabilities can be dereferenced.
Other types that contain pointers, \cuintptrt and \cintptrt,
%\psnote{It would be better to exhaustively list them (is it just intptr\_t and uintptr\_t?) rather than this vague ``such as''}
%\arnote{There are also cases such as C++11 strongly typed enums that use uintcap\_t as the underlying type, but we really don't need to mention this here. And I'm also not sure if we want to keep allowing that since enums should really be integer values only}
are similarly implemented
using architectural capabilities, so that casts through these types
can retain capability properties.
When a dereference is attempted on a capability without a valid tag --
including load, store, and instruction fetch -- a hardware exception fires
(see \Cref{sec:faults}).

On the whole, the effects of pointer provenance validity are non-disruptive to
C/\cpp{} source code.
However, a number of cases exist in language runtimes and other
(typically less portable) C code that conflate integers and pointers that can
disrupt provenance validity.
In general, generated code will propagate provenance validity in only two
situations:

\begin{description}
\item[Pointer types] The compiler will generate suitable code to propagate
  the provenance validity of pointers by using capability load and store
  instructions.
  This occurs when using a pointer type (e.g., \cvoidstar) or an
  integer type defined as being able to hold a pointer (e.g.,
  \cintptrt).
  As with attempting to store 64-bit pointers in 32-bit integers on 64-bit
  architectures, passing a pointer through an inappropriate type will lead to
  truncation of metadata (e.g., the validity tag and bounds).
  It is therefore important that a suitable type be used to hold pointers.

  This pattern often occurs where an opaque field exists in a data structure
  -- e.g., a \clongt argument to a callback in older C code -- that
  needs to be changed to use a capability-oblivious type such as \cintptrt.

\psnote{I'm not sure this document has explained the ISA behavior concretely enough for this stuff to really make sense -- the previous description was quite high-level.  Maybe somewhere it should be explicit that registers have tags, that load and store instructions must be via a capability, and that there are both capability and non-capability load and store instructions, with the former preserving tags (both ways) and the latter clearing them?}

\item[Capability-oblivious code] In some portions of the C/\cpp{} runtime and
  com\-piler-generated code, it may not be possible to know whether memory is
  intended to contain a pointer or not -- and yet preserving pointers is
  desirable.
  In those cases, memory accesses must be performed in a way that preserves
  pointer provenance.
  In the C runtime itself, this includes \cfunc{memcpy}\kern-2pt, which must use
  capability load and store instructions to transparently propagate capability
  metadata and tags.

  A useful example of potentially surprising code requiring modification for
  \purecapCOrCpp{} is \cfunc{qsort}\kern-2pt.
  Some C programs assume that \cfunc{qsort} on an array of data structures
  containing pointers will preserve the usability of those pointers.
  As a result, \cfunc{qsort} must be modified to perform memory copies using
  pointer-based types, such as \cintptrt, when size and alignment
  require it.
\end{description}

\subsubsection{Recommended use of C-language types}
\label{sec:recommended-c-types}

As confusion frequently arises about the most appropriate types to use for
integers, pointers, and pointer-related values, we make the following
recommendations:

\begin{description}
\item[\cintt, \cintttt, \clongt, \cintsft,
  \ldots{}] These pure integer types should be used to hold integer values
  that will never be cast to a pointer type without first combining them with
  another pointer value -- e.g., by using them as an array offset.
  Most integers in a C/\cpp{}-language program will be of these types.

\item[\ptraddrt] This is a new integer type introduced by CHERI C and should be used to hold
  addresses.
  \ptraddrt should not be directly cast to a pointer type for
  dereference; instead, it must be combined with an existing valid capability
  to the address space to generate a dereferenceable pointer.
  Typically, this is done using the \ccode{cheri\_address\_set(c, x)} function.

\item[\sizet, \ssizet] These integer types should be used
  to hold the unsigned or signed lengths of regions of address space.
  \arnote{\sizet not necessary the same as unsigned \ptrdifft.}

\item[\ptrdifft] This integer type describes the difference of indices
  between two pointers to elements of the same array, and should not be used for
any other purpose.
  It can be added to a pointer to obtain a new pointer, but the result will
  be dereferenceable only if the address lies within the bounds of the
  pointer from which it was derived.

  \note{Isn't that last sentence true of any combination?}{nwf}

  Less standards-compliant code sometimes uses \ptrdifft when the
  programmer more likely meant \cintptrt or (less commonly)
  \sizet.
  When porting code, it is worthwhile to audit use of \ptrdifft.

  \note{Should we recommend that \sizet be used to hold lengths of
  allocations and \ptrdifft be used to talk about spans of
  address space (e.g., the offsets between two subobjects of an allocation)?  I feel
  like the recommendations here are not as concrete as I'd like.}{nwf}

\item[\cintptrt, \cuintptrt] These integer types should be
  used to hold values that may be valid pointers if cast back to a pointer
  type.
  When an \cintptrt is assigned an integer value -- e.g., due to
  constant initialization to an integer in the source -- and the result is
  cast to a pointer type, the pointer will be invalid and hence
  non-dereferenceable.
  These types will be used in two cases: (1) Where there is uncertainty as to
  whether the value to be held will be an integer or a pointer -- e.g., for an
  opaque argument to a callback function; or (2) Where it is more convenient
  to place a pointer value in an integer type for the purposes of arithmetic
  (which takes place on the capability's address and in units of bytes, as if
  the pointer had been cast to \texttt{char *}).

  The observable, integer range of a \cuintptrt is the same as
  that of a \ptraddrt (or \ptrdifft for \cintptrt), despite the increased \emph{alignment} and \emph{storage} requirements.

\item[\ccode{intmax\_t}, \ccode{uintmax\_t}] According to the C standard\arnote{7.20.1.5 Greatest-width integer types}, these integer types should be \enquote{capable of representing any value of any (unsigned) integer type}.
  In \purecapCOrCpp{}, they are not provenance-carrying and can represent the integer \emph{range} of \cuintptrt/\cintptrt, but not the capability metadata or tag bit.
  As the observable value of \cuintptrt/\cintptrt is the pointer address range, we believe this choice to be compatible with the C standard.

  Additionally, due to ABI constraints, it would be extremely difficult to change the width of these types from 64 to 129 bits.
  This is also true for other architectures such as x86: despite Clang and GCC supporting an \ccode{\_\_int128} type, \ccode{intmax\_t} remains 64 bits wide.

  We generally do not recommend use of these types in \purecapCOrCpp{}.
  However, the types may be useful in \cfunc{printf} calls (using the \ccode{\%j} format string width modifier) as the \pathname{inttypes.h} \ccode{PRI*} macros can be rather verbose.


\item[\maxalignt] This type is defined in C as \enquote{an object type whose alignment is the greatest fundamental alignment} \arnote{C2x \S{}7.19.2} and this includes capability types for \purecapCOrCpp{}.
% and in \cpp{} as a \enquote{type whose alignment requirement is at least as great as that of every scalar type}\arnote{C++17 \S{}21.2.4p5}
We found that some custom allocators use \ccode{sizeof(long double)} or \ccode{sizeof(uint64\_t)} to align their return values.
While this appears to work on most architectures, in \purecapCOrCpp{} this must be changed to \ccode{alignof(\maxalignt)}\kern-2pt.%
%
\footnote{%
It is important to use \ccode{alignof} instead of \ccode{sizeof} since many
common implementations, such as GCC and FreeBSD, define \maxalignt as a
\ccode{struct} and not a \ccode{union}.}

\item[\ccharstar, \ldots{}] These pointer types are suitable for
  dereference, but in general \psnote{that ``in general'' makes me wonder about the exceptions?}
  \arnote{The only exception I can think of is requiring \cvoidstar due to bad API design (callback parameters, etc).}  should not be cast to or from arbitrary integer
  values.
  Valid pointers are always derived from other valid pointers (including those cast to \cintptrt or \cuintptrt), and cannot be
  constructed using arbitrary integer arithmetic.
\end{description}

It is important to note that \cuintptrt is no longer the same size as
\sizet. This difference may require making some changes to
existing code to use the correct type depending on whether the variable
needs to be able store a pointer type. In cases where this is not obvious
(such as for a callback argument), we recommend the use of \cuintptrt.
This ensures that provenance is maintained.

\pgnnote{The above section begs questions relating to what is the
  responsibility of programmers and what can be aided or managed by
  compilers.  Ideally, the latter would be preferable to requiring
  programmers to understand things are possibly beyond their so-called
  experience.}

\subsubsection{Capability alignment in memory}

Because tags apply only to memory locations that are capability-aligned
and capability-sized,
unaligned storage of pointers will either generate a run-time
hardware exception (if a capability-aware load or store is performed), or discard the
tag (if a capability-oblivious memory copy is performed -- e.g., using
\cfunc{memcpy} to copy from an aligned location to an unaligned one).
One example of this is Berkeley DB (BDB) when used as an in-memory
implementation rather than as an on-disk database format.
Even when patched to use \cfunc{memcpy} to copy objects stored as data, it
does not ensure sufficient alignment in its internal storage to preserve tags.
We therefore recommend against using BDB for this purpose.
While unaligned pointer use is uncommon in C programs, as data-structure
layouts are normally designed to keep them strongly aligned for performance
and atomicity reasons, any code depending on unaligned pointers will need
to be changed.

\amnote{Should we mention code that assumes that it is ok to go out of bounds
for optimization purposes? E.g., strcmp loading a word at a time?}
\psnote{yes}

\subsubsection{Single-origin provenance}
\label{sec:ambiguous-provenance}

In the CHERI memory protection model, capabilities are derived from a single other
capability.
However, in C code, expressions may construct a new \cintptrt value from more
than one provenance-carrying parent \cintptrt{} -- for example, by casting both a
pointer and a literal value to \cintptrt{}-s, and then adding them.
\psnote{That literal value wouldn't have a non-empty provenance, so this isn't the best example.   Maybe better to have something like \texttt{p+(q1-q2)} ?}
\psnote{More generally, there is a bit of a mismatch between this and our C provenance treatment of \cintptrt, which there is a plain integer type with no provenance -- but which regains provenance in some cases when cast back to a pointer.  To ponder...}
In that case, the compiler must decide which input capability provides the
capability metadata (bounds, permissions, \ldots{}) to be used in the output
value.
Consider for example the following code:
\begin{lstlisting}[language=C]
void *c1 = (void *)((uintptr_t)input_ptr + 1);
void *c2 = (void *)(1 + (uintptr_t)input_ptr);
uintptr_t offset = 1;
void *c3 = (void *)(offset + (uintptr_t)input_ptr);
\end{lstlisting}

In C with integer pointers, the values of \cvar{c1}, \cvar{c2}, and \cvar{c3} might be expected to have the
same value as \cvar{input\_ptr}, except with the address incremented by one.
In CHERI C, each expression includes an arithmetic operation between provenance-carrying types.
While not visible in the source code, the constant \cconst{1} is promoted to a capability type, \cuintptrt.
In the current implementation, the compiler will return the expected provenance-carrying result for cases \cvar{c1} and \cvar{c2} but not \cvar{c3}.%
%
\footnote{%
Historically, the CHERI compiler would select the left-hand-most pointer in the expression as the provenance source.
While this model follows a single consistent rule, it can lead to surprising behavior if an expression places the provenance-carrying value to the right-hand-side.
In the example above, the value of \ccode{c1} would be a valid capability, but \cvar{c2} and \cvar{c3} would hold an untagged value (albeit with the expected address).}
%
For \cvar{c1} and \cvar{c2}, the compiler sees that one of the sides is a non-provenance-carrying integer type that was promoted to \cuintptrt and therefore selects the other operand as the provenance source.
It is not feasible to infer the correct provenance source for the third case, so the compiler will emit a warning.%
%
\footnote{%
We could add a data-flow-sensitive analysis to determine whether values are the result of promotion from a non-provenance-carrying type.
However, this would add significant complexity to the compiler and we have not seen many cases where this would have avoided changes to the source code.
\psnote{from a language-design POV, it'd be pretty horrid to have substantial semantics depend on just how smart one's analysis is}
\arnote{I agree. Even the current behavior is quite ugly, but at least it has measurable compatibility benefits.}
}
%
The current behavior for such ambiguous cases is to select the left-hand-side as the provenance source, but we are considering making this an error in the future.
%
The recommended approach to resolve such ambiguous cases is to change the type of one operand to a non-provenance-carrying type such as \sizet.
Alternatively, if the variable declaration cannot be changed, it is also possible to use a cast in the expression itself.
\begin{lstlisting}[language=C]
size_t offset_size_t = 1;
void *c3_good1 = (void *)(offset_size_t + (uintptr_t)input_ptr);

uintptr_t offset_uintptr_t = 1;
void *c3_good2 = (void *)((size_t)offset_uintptr_t + (uintptr_t)input_ptr);
\end{lstlisting}

We also provide a new attribute \ccode{cheri\_no\_provenance} that can be used to annotate variables or fields of type \cintptrt/\cuintptrt where the underlying type cannot be changed:
\begin{lstlisting}[language=C]
struct S {
    uintptr_t maybe_tagged;
    uintptr_t never_tagged __attribute__((cheri_no_provenance));
}
void test(struct S s, uintptr_t ptr) {
    void *x1 = (void *)(s.maybe_tagged + ptr); // ambiguous, currently uses LHS
    void *x2 = (void *)(s.never_tagged + ptr); // not ambiguous, uses RHS
}
\end{lstlisting}
\psnote{This doesn't really explain what \ccode{cheri\_no\_provenance} does?  And what it means when applied to other types?}\arnote{compiler error if it's not \cuintptrt. Will try to improve example later.}

\subsection{Bounds}

CHERI C/\cpp{} pointers are implemented using capabilities that enforce lower and
upper bounds on access.
In the pure-capability run-time environment, those bounds are normally set to
the range of the memory allocation into which the pointer is intended to
point.
Because of capability compression, increased alignment requirements may apply
to larger allocations (see \Cref{sec:bounds_alignment}).

Bounds may be set on pointers returned by multiple system components including
the OS kernel, the run-time linker, compiler-generated code, system libraries,
and other utility functions.
As with violations of provenance validity, out-of-bounds accesses -- including
load, store, and instruction fetch -- trigger a hardware exception (see
\Cref{sec:faults}).

\subsubsection{Bounds from the compiler and linker}

The compiler will arrange that language-level pointers to stack allocations have suitable
bounds, and that the run-time linker will return bounded pointers to global
variables.
Bounds will typically be set based on an explicitly requested allocation size
(e.g., via the size passed to \cfunc{alloca}\kern-2pt) or, for compiler-generated
code or linker-allocated memory, by the C type mechanism (e.g.,
\ccode{sizeof(foo)}\kern-2pt), adjusted for precision requirements arising from
capability compression.
In some cases, such as with global variables allocated in multiple object
files, the actual size of the allocation may not be resolved until run time,
by the run-time linker.
These bounds will typically not cause observable changes in behavior -- other than hardware exceptions when (accidentally) performing an out-of-bounds access.

\subsubsection{Bounds from the heap allocator}

\cfunc{malloc} will set bounds on pointers to new heap allocations.
In typical C use, this is not a problem, as programmers expect to access
addresses only within an allocation.

However, in some uses of C, there may be an expectation that memory access can
occur outside the allocation bounds of the pointer via which memory access
takes place.
For example, if an integer pointer difference \cvar{D} is taken between
pointers to two different allocations (\cvar{B} and \cvar{A}), and later
added to pointer \cvar{A}, the new pointer will have an address
within \cvar{B}, but permit access only to \cvar{A}.
This idiom is mostly likely to be found with non-trivial uses of \cfunc{realloc} (e.g., cases where multiple pointers into a buffer allocated or reallocated by \cfunc{realloc} need to be updated).
We note that the subtraction of two pointers from different
allocations is undefined behavior in ISO C, and risks mis-optimization from
breaking compiler alias analysis assumptions.
Further, \emph{any} operation on the pointer passed to \cfunc{realloc} is undefined upon
return.  Instead, we suggest that the programmer measure a pointer \cvar{P}'s
offset into an object \cvar{A} \emph{prior to} \cfunc{realloc} and derive new pointers
from the \cfunc{realloc} result \cvar{B} and these offsets. (i.e., compute
$\text{\cvar{B}} + (\text{\cvar{P}} - \text{\cvar{A}})$ rather than
$\text{\cvar{P}} + (\text{\cvar{B}} - \text{\cvar{A}})$).%
%
\footnote{%
While it may seem that \cvar{A} remains available after \cfunc{realloc}\kern-2pt, our
revocation sweeps which enforce temporal safety may have atomically replaced
this with a non-pointer value.  The scalar value
$\text{\cvar{D}} = \text{\cvar{P}} - \text{\cvar{A}}$
will naturally be preserved by revocation.}

\subsubsection{Subobject bounds}

\purecapCOrCpp{} also supports automatically restricting the
bounds when a pointer is taken to a subobject -- for example, an array
embedded within another structure that itself has been heap allocated.
This will prevent an overflow on that array from affecting the remainder of
the structure, improving spatial safety.
Subobject bounds are not enabled by default as they may require additional source code changes
for compatibility, but can be enabled using the \mbox{\commandline{-Xclang}} \mbox{\commandline{-cheri-bounds=subobject-safe}} compiler flag.

One example of C code that requires changes for subobject bounds is the \ccode{containerof}
pattern, in which pointer arithmetic on a pointer to a subobject is used to
recover a pointer to the container object -- for example, as seen in the
widely used BSD \pathname{queue.h} linked-list macros or the generic C
hash-table implementation, \pathname{uthash.h}.

In these cases, an opt-out annotation can be applied to a given type, field or variable
that instructs the compiler to not tighten bounds when creating pointers to subobjects.
We currently define three opt-out annotations that can be used to allow
existing code to disable use of subobject bounds:

\paragraph{Completely disable subobject bounds} It is possible to annotate a typedef, record member,
or variable declaration with:

\begin{lstlisting}[language={C}]
__attribute__((cheri_no_subobject_bounds))
\end{lstlisting}

\noindent
to indicate that the compiler should not tighten bounds when taking the address or a \cpp{} reference. In \cpp{}11/C20 mode this can also be spelled as \cxxcode{[[cheri::no\_subobject\_bounds]]}.

\begin{lstlisting}[language={C}]
struct str {
    /*
     * Nul-terminated string array -- pointers taken to this subobject will
     * use the array's bounds, not those of the container structure.
     */
    char               str_array[128];

    /*
     * Linked-list entry element -- because of the additional attribute,
     * pointers taken to this subobject will use the container structure's
     * bounds, not those of the specific field.
     */
    struct list_entry  str_le __attribute__((cheri_no_subobject_bounds));
} str_instance;

void
fn(void)
{
    /* Struct pointer gets bounds of str_instance. */
    struct str *strp = &str_instance;

    /* Character pointer gets bounds of the subobject, not str_instance. */
    char *c = str_instance.str_array;

    /* Struct pointer gets bounds of str_instance, not the subobject. */
    struct list_entry *lep = &str_instance.str_le;
}
\end{lstlisting}

\paragraph{Disable subobject bounds in specific expressions}
It is also possible to opt out of bounds-tightening on a per-expression
granularity by casting to an annotated type:

\begin{lstlisting}[language={C}]
char *foo(struct str *strp) {
    return (&((__attribute__((cheri_no_subobject_bounds))struct str *)
        strp)->str_array);
}
\end{lstlisting}

\paragraph{Use remaining allocation size}
In certain cases, the size of the subobject is not known, but we still know that data
before the field member will not be accessed (e.g., variable size array members
inside structs).
Pre-C99 code will declare such members as fixed-size arrays, which will cause
a hardware exception if the allocation does not grant access to that many bytes.%
%
\footnote{%
If flexible arrays members are declared using the C99 syntax with empty square
brackets, the compiler will automatically use the remaining allocation size.}
%
To use the remaining allocation size instead of completely disabling bounds
(and thus protecting against buffer underflows) the annotation:

\begin{lstlisting}[language={C}]
__attribute__((cheri_subobject_bounds_use_remaining_size))
\end{lstlisting}

\noindent
can be used.
When targeting \cpp{}11/C20:

\begin{lstlisting}[language={C++}]
[[cheri::subobject_bounds_use_remaining_size]]
\end{lstlisting}

\noindent
is also supported.
Examples of this pattern include FreeBSD's \ccode{struct dirent}, which uses
\ccode{char d\_name[255]} for an array that is actually of variable size, with
the containing allocation (e.g., of the heap) being sized to allow additional
space for array entries regardless of size in the type definition.
For example:

\begin{lstlisting}[language={C}]
struct message {
    int     m_type;

    /*
     * Variable-length character array -- because of the additional
     * attribute, pointers taken to this subobject will have a lower bound
     * at the first address of the array, but retain an upper bound of the
     * allocation containing the array, rather than 252 bytes higher.
     */
    char    m_data[252]
                 __attribute__((cheri_subobject_bounds_use_remaining_size));
};
\end{lstlisting}

The use of subobject bounds imposes additional compatibility constraints on
existing C and \cpp{} code.
While we have not encountered many issues related to subobject bounds in
existing code, it does slightly increase the porting effort.
%\nwfnote{Already said above:}
%Therefore, this feature is currently not enabled by default and requires a
%compiler flag to be enabled.

\psnote{that seems excessively bold to me}
\psnote{what flag?}
\psnote{curious: what has to change before you think it'd be a good default?}

% \noindent
% \textbf{XXX: Explain how to exempt those pointer-taking snippets.}

\subsubsection{Other sources of bounds}

Bounds may also be set by other parts of the implementation.
For example, the kernel may set bounds on pointers to new memory mappings (see
\Cref{sec:cheriabi}), and the system library may set bounds on pointers
into returned buffers from APIs -- e.g., \cfunc{fgetln}\kern-2pt.
More detailed information on how C/\cpp{} code can set bounds can be found in
\Cref{sec:cheri-apis}.

\subsubsection{Out-of-bounds pointers}
\label{sec:oob}

\note{I feel like this section wants a reference to CHERI Concentrate?}{nwf}

ISO C permits pointers to go only one byte beyond their original
allocation, but widely used code sometimes constructs transient pointer
values that are further out of bounds.
For example, \ccode{for} loops iterating over an array may increment a pointer
into the array by the array entry size before performing an overflow check
that terminates the loop.
This temporarily constructs an out-of-bounds pointer without an out-of-bounds
dereference taking place.
%
\nwfnote{In the straightforward case, tho, that still results in the pointer
being only one past the end of its allocation, doesn't it?}

To support this behavior, capabilities
can hold a range of out-of-bounds addresses while retaining a valid
tag, and CHERI-enabled hardware performs bounds checks only on pointer
use (i.e., dereference), not on pointer manipulation.  Dereferencing
an out-of-bounds pointer will raise a hardware exception (see
\Cref{sec:faults}).  However, an out-of-bounds pointer can be
dereferenced once it has been brought back in bounds, by adjusting the
address or supplying a suitable offset in the dereference.

There is, however, a limit to the range of out-of-bounds addresses a capability can hold.
The capability compression model exploits redundancy between the pointer's address and
its bounds to reduce memory overhead (see
Section~\ref{sec:cheri-capabilities}).
However, when a pointer goes out of bounds, this redundancy is reduced, and at
some point the bounds can no longer be represented within the capability.
The architecture prohibits manipulations that would produce such
a capability.
Depending on the architecture and context, this may lead to the
tag being cleared, resulting in an invalid capability, or in an immediate
hardware exception being thrown.
Attempting to dereference the invalid capability will fail in the same
manner as a loss of pointer provenance validity (see
\Cref{sec:pointer_provenance_validity}).
\psnote{Comment on whether that should immediately trap instead?}
The range of out-of-bounds addresses permitted for a capability is
a function of the length of the bounded region and the number of bits used for bounds in the capability representation.
With 27 bits of the capability used for bounds, CHERI-MIPS and 64-bit
CHERI-RISC-V provide the following guarantees:

\begin{itemize}
\item A pointer is able to travel at least \sfrac{1}{4} the size of the object, or
  $2$ KiB ($2^{\mathit{floor}(\mathit{bounds\_bits}/2)-2}$), whichever is greater, above its upper bound.

\item It is able to travel at least \sfrac{1}{8} the size of the object, or $1$ KiB ($2^{\mathit{floor}(\mathit{bounds\_bits}/2)-3}$),
  whichever is greater, below its lower bound.
\end{itemize}

In general, programmers should not rely on support for arbitrary out-of-bounds
pointers.  Nevertheless, in practice, we have found that the CHERI capability
compression scheme supports almost all in-the-field out-of-bounds behavior in
widely used software such as FreeBSD, PostgreSQL, and WebKit.

\subsection{Pointer comparison}

In \purecapCOrCpp{}, pointer comparison considers only the
integer address part of a capability.
This means that differences in tag validity, bounds, permissions, and so on,
will not be considered when by C operators such as \texttt{==}, \texttt{<}, and \texttt{<=}.
On the whole, this leads to intuitive behavior in systems software, where,
for example, \cfunc{malloc} adjusts bounds on a pointer before returning it to
a caller, and then expects an address-wise comparison to succeed when the
pointer is later returned via a call to \cfunc{free}\kern-2pt.
%
\nwfnote{I don't think I particularly like that example, since the thing \cfunc{free}
is nominally comparing against is the bounded return from \cfunc{malloc}\kern-2pt.}
%
However, this behavior could also lead to potentially confusing results; for
example:

\begin{itemize}
\item If a tag on a pointer is lost due to non-provenance-preserving
  \cfunc{memcpy} (e.g., a \ccode{for} loop copying a sequence of bytes), the
  source and destination pointers will compare as equal even though the
  destination will not be dereferenceable.

\item If a \cfunc{realloc} implementation returns a pointer to the same
  address, but with different bounds, a caller check to see if the passed and
  returned pointers are equal will return \ccode{true} even though an access
  might be permitted via one pointer but not the other.
\end{itemize}

\psnote{I'm curious about the impact on compiler optimisation, where in the scope of \texttt{if (p==q)} compilers will often assume the two are interchangeable.  Comment on that?
 }
 \arnote{The choice between exact vs non-exact equals is made extremely late in code generation, it just chooses between emitting CEq and CExEq.
 Compiler analyses use a stricter definition of equality.
 In clang that should include some cases of taking provenance into account for alias information.}

\noindent
However, practical experience has suggested that the current semantics produce fewer
subtle bugs, and require fewer changes, than having comparison operators take
the tag or other metadata into account.%
%
\footnote{%
The CHERI Clang compiler supports an experimental flag \texttt{-cheri-comparison=exact} that causes capability equality comparisons to also include capability metadata and the tag bit.
\arnote{default behavior=\texttt{-cheri-comparison=address}}
}

\subsection{Implications of capability revocation for temporal safety}

Heap temporal safety utilizes revocation sweeps, which, after some quarantine
period, replace in-register and in-memory capabilities to freed memory with
non-dereferenceable values.
For performance reasons, that replacement may be substantially deferred, or,
if there is little demand for fresh allocations, may never occur.
Pointer value replacement may also permit some instances of
a pointer to continue to be usable for longer than others, but the referenced
memory will not be reallocated or otherwise reused until all instances have been rendered unusable.
This model does permit non-exploitable \textit{use-after-free} of heap memory,
but prohibits exploitable memory aliasing by disallowing \textit{use-after-reallocation}.

A pointer's value after \cfunc{free} is undefined, and so dereference is
an undefined behavior.
In practice, however, the value of a \cfunc{free}-d pointer may still be
observed in a number of situations, including in lockless algorithms, which
may compare an allocated pointer to a freed one.

Our systems have a choice of replacement values for revoked pointers; all that
is required for correct temporal safety is that the replacement not authorize
access to memory.
Our prototype implementation clears the tag when replacing, as this
certainly removes authority and possibly simplifies debugging and
non-dereferencing operations, as the original capability bits are left behind.
For example, pointer equality checks that compare only the addresses of the two
pointers (and not their tag values) will continue to work as expected.  With
revocation performed this way, software making explicit use of tags must be
designed to tolerate capability tag clearing by revocation.

Unfortunately, tag-clearing risks type confusion if programmers intend to use
the capability tag to distinguish between integers and pointers in tagged
unions (we have so far generally discouraged this idea, but understand why it
may remain attractive).  Therefore, we have considered other options for
revocation, including tag-preserving \emph{permission}-zeroing (but tag
preservation) and wholesale replacement with \cconst{NULL} (i.e., the untagged
all~zero value).  These options may be more attractive for some software, and
would have different implications for the C/\cpp{} programming model.

We anticipate that revocation will remain a tag-clearing operation by default,
as tag-clearing removes any risk of needlessly re-examining the capability in
later revocations.  However, it may be possible to allow coarse control over
revocation behavior either per process or by region of the address space.  In
the latter case, \cfunc{mmap} may gain flags specifying which revocation
behavior is desirable for capabilities pointing \emph{into} the mapped region
and/or \cfunc{madvise} may gain flags controlling the revocation behavior of
capabilities \emph{within} a target region.  Which of these or similar
mechanisms provide utility to software and can be offered at reasonable
performance remains an open question.

\subsection{Bitwise operations on capability types}

In most cases bitwise operations -- such as those used to store or clear flags
in the lower bits of pointers to well-aligned allocations -- will result in the expected \cuintptrt value being created.
However, there are some corner cases where the result may be a tagged (but out-of-bounds)
capability when an integer value is expected.
\arnote{TODO: add an example. Maybe the mutex example checking low pointer bits + some alignment checks?}
Dually, bitwise operations may also result in the loss of tags if intermediate results become unrepresentable (recall \Cref{sec:oob}).%
%
\footnote{%
Previous versions of the compiler used the capability offset (address minus base) instead
of the address for arithmetic on \cuintptrt.
This often resulted in unexpected results and therefore we switched to using
the address in \cuintptrt arithmetic instead.
The old offset-based mode may be interesting for garbage collected C where
addresses are less useful and therefore it can still be enabled by
passing \commandline{-cheri-uintcap=offset}.
However, this may result in significantly reduced compatibility with legacy C code.}
%
Most bitwise operations on \cuintptrt fall into one of three categories for which we provide
higher-level abstractions.

\paragraph{Aligning pointer values}
If the C code is attempting to align a pointer or check the alignment of pointers,
the following compiler builtins should be used instead:
\begin{description}
\item[\ccode{T \_\_builtin\_align\_down(T ptr, size\_t alignment)}\kern-2pt]
  This builtin returns \cvar{ptr} rounded down to the next multiple of \cvar{alignment}.
\item[\ccode{T \_\_builtin\_align\_up(T ptr, size\_t alignment)}\kern-2pt]
  This builtin returns \cvar{ptr} rounded up to the next multiple of \cvar{alignment}.
\item[\ccode{\_Bool \_\_builtin\_is\_aligned(T ptr, size\_t alignment)}\kern-2pt]
  This builtin returns \cconst{true} if \cvar{ptr} is aligned to at least \cvar{alignment} bytes.
\end{description}

\rwnote{It would be nice if we had, and could document here, cheri\_ versions
  of these macros.}
\arnote{Probably best to use the \_\_builtin versions since that also works for upstream clang.}

One advantage of these builtins compared to \cuintptrt arithmetic is that they preserve the
type of the argument and can therefore remove the need for intermediate casts to \cuintptrt.
Moreover, using these builtins allows for improved compiler diagnostics and can result in better code-generation compared to hand-written functions or macros.
We have submitted these builtins as part of the upstream Clang 10.0 release, so they can also be used for code that does not depend on CHERI.
\arnote{Should I include some of the documentation I wrote for upstream LLVM? (\url{https://clang.llvm.org/docs/LanguageExtensions.html\#alignment-builtins})}

\paragraph{Storing additional data in pointers}
\label{sec:low-pointer-bits}
In many cases the minimum alignment of pointer values is known and therefore
programmers assume that the low bits (which will always be zero) can be
used to store additional data.%
%
\footnote{%
CHERI actually provides many more usable bits than a conventional architecture.
In the current implementation of 128-bit CHERI, any bit between\psnote{inclusive?} the least
significant and the 9th least significant bit may be toggled without causing
the tag to be cleared in pointers that point to the beginning of an allocation (i.e., whose \emph{offset} is zero).
\psnote{This is confusing -- not clearing the tag isn't the same as not destroying part of the pointer data...}
If the pointer is strongly aligned, further bits may be toggled without clearing the tag.
%
\nwfnote{But the macros only permit the use of the bottom 5.  We should say that somewhere.}}
%
Unused high pointer bits cannot be used for additional metadata since toggling them causes a large change to the address field, and capabilities that are significantly far out-of-bounds cannot be represented (see \cref{sec:oob}).

The compiler-provided header \ccode{<cheri.h>} provides explicit macros for this
use of bitwise arithmetic on pointers.
The use of these macros is currently optional,%
%
\footnote{%
Until recently, not using these macros could result in subtle bugs at run time since pointer equality comparisons included the tag bit in addition to the address.}
%
but we believe that they can improve readability compared to hand-written bitwise operations.
Additionally, the bitwise-AND operation is ambiguous since it can be used both to clear bits (which should return a provenance-carrying \cuintptrt) and to check bits (which should return an integer value).
In complex nested expressions, these macros can avoid ambiguous provenance sources (see \cref{sec:ambiguous-provenance}) since it shows the compiler which intermediate results can carry provenance.

\begin{description}
\item[\ccode{uintptr\_t cheri\_low\_bits\_clear(uintptr\_t ptr, ptraddr\_t mask)}\kern-2pt]
  This function clears the low bits of \cvar{ptr} in the same way as \ccode{ptr \& \textasciitilde{}mask}.
  It returns a new \cuintptrt value that can be used for memory accesses when cast to a pointer.
  \cvar{mask} should be a bitwise-AND mask less than \ccode{\_Alignof(ptr)}\kern-2pt.

\item[\ccode{ptraddr\_t cheri\_low\_bits\_get(uintptr\_t ptr, ptraddr\_t mask)}\kern-2pt]
  This function returns the low bits of \cvar{ptr} in the same way as \ccode{ptr \& mask}.
  It should be used instead of the raw bitwise operation since it can never return
  an unexpectedly tagged value.
  \cvar{mask} should be a bitwise-AND mask less than \ccode{\_Alignof(ptr)}\kern-2pt.

\item[\ccode{uintptr\_t cheri\_low\_bits\_or(uintptr\_t ptr, ptraddr\_t bits)}\kern-2pt]
  This function performs a bitwise-OR of \cvar{ptr} with \cconst{bits}.
  In order to retain compatibility with a non-CHERI architecture, \cconst{bits} should be less than the known alignment of \cvar{ptr}.

\item[\ccode{uintptr\_t cheri\_low\_bits\_set(uintptr\_t ptr, ptraddr\_t mask, ptraddr\_t bits)}\kern-2pt]
  This function sets the low bits of \cvar{ptr} to \cconst{bits} by clearing the low bits in \cvar{mask} first.

\end{description}

\paragraph{Computing hash values}

The compiler will also warn when operators such as modulus or shifts are used on
\cuintptrt. This usually indicates that the pointer is being used as the input to a hash
function or similar computations.
In this case, the programmer should not be using \cuintptrt but instead cast the pointer
to \ptraddrt and perform the arithmetic on this type instead.
This has the advantage that it can be slightly more efficient than \cuintptrt arithmetic on
a split-register file architecture such as CHERI-MIPS.
\jrtcnote{This point is slightly dubious; a sufficiently-smart compiler should
be able to optimize it to the equivalent \ptraddrt form, provided the result
eventually is put in a \ptraddrt or other non-capability type and nothing
consumes the capability metadata for all the intermediate calculations (e.g.,
it's used as an index).}
\arnote{I think I was just trying to point out that it avoids wasting space for variables stored to memory (e.g., on-stack arguments, etc.)}

\subsection{Function prototypes and calling conventions}

CHERI C/\cpp{} distinguishes between integer and pointer types at an
architectural level, which can lead to compatibility problems with older C
programming styles that fail to unambiguously differentiate these types:

\begin{description}
\item[Unprototyped (\textit{K\&R}) functions] Because pointers can no longer
  be loaded and stored without using capability-aware instructions, the
  compiler must know whenever a load or store might operate on a pointer
  value.
  The C-language default of using an integer type for function arguments when
  there is not an appropriate function prototype will cause pointer values to
  be handled improperly; this is also true on LP64 ABIs (e.g., most 64-bit
  POSIX systems).%
  %
  \footnote{%
  The forthcoming ISO C2x standard
  makes function declarations with an empty parameter list equivalent to
  a parameter list consisting of a single \ccode{void}.}
  %
  To avoid these problems, the CHERI Clang compiler emits a warning (\commandline{-Wcheri-prototypes}) by default when a function without a declared prototype is called.
  This warning is less strict than \commandline{-Wstrict-prototypes} and can be
  used to convert \textit{K\&R} functions that may cause problems.%
  %
  \footnote{%
  If the \textit{K\&R} function is defined within the same
  file, the compiler can determine the correct calling convention and will not
  emit a warning.}
  %
  This should not be an issue for C code written in the last 20 years, but
  many core operating-system components can be significantly older.

\item[Variadic arguments] The calling convention for variadic functions
  passes all variadic arguments via the stack and accesses them via an
  appropriately bounded capability.
  This provides memory-protection benefits, but means that vararg functions
  must be declared and called via a correct prototype.

  Some C code assumes that the calling convention of variadic and non-variadic
  functions is sufficiently similar that they may be used interchangeably.
  Historically, this included the FreeBSD kernel's implementation of
  \cfunc{open}\kern-2pt, \cfunc{fcntl}\kern-2pt, and \cfunc{syscall}\kern-2pt.

  \rwnote{I wonder if we need to be more specific with an example here.}\arnote{TODO: Add example such as missing open() mode arguments?}

\end{description}

\subsection{Data-structure and memory-allocation alignment}

CHERI C/\cpp{} have stronger alignment requirements than C/\cpp{} on conventional
architectures.
These requirements arise from two sources: that capabilities themselves must
be aligned at twice the integer architectural pointer width, and that
capability compression constrains the addresses that can be used for bounds
on larger objects.
\amnote{Is is worth mentioning compiler flags to warn on excessive padding?
  In particular, it seems that it is often the case that the ordering of
  struct elements that was devised for 32bit and 64bit architectures does
  not help much to avoid extra padding with capabilities. It more or less
  depends on how much the pointers are scattered in the struct definition.}

\subsubsection{Restrictions in capability locations in memory}
\label{sec:restricted-capability-locations}

CHERI C/\cpp{} constrain how and where pointers can be stored in memory in two
ways:

\begin{description}
\item[Alignment] CHERI's tags are associated with capability-aligned,
  capability-sized locations in physical memory.
  Because of this, all valid pointers must be stored at such locations,
  potentially disrupting code that may use other alignments.

  On the whole, for performance and atomicity reasons, pointers are strongly
  aligned even on non-tagged architectures -- however, when C constructs such
  as \ccode{\_\_packed} are used, unaligned pointers can arise, and will not
  work with CHERI.
  While the compiler and native allocators (stack, heap, \ldots{}) will
  provide sufficient alignment for capability-based pointers, custom
  allocators may align allocations to \ccode{sizeof(intmax\_t)} rather than
  \ccode{alignof(\maxalignt)}\kern-2pt.

\item[Size] CHERI capabilities are twice the size of an integer able to
  describe the full address space.
  On 64-bit systems, this means that CHERI pointers will have a width of 128
  bits -- while maintaining the arithmetic properties of a 64-bit integer
  address.
  C code historically embeds assumptions about pointer size in a number of forms,
  all of which will need to be addressed when porting to CHERI,
  including:

  \begin{itemize}
  \item Assuming that a pointer will fit into the largest integer type.
  \item Assuming that the number of bits in a pointer type is the same
    as the number of bits indexing the address space it can refer to.
  \item Assuming that the number of bits in a pointer type is the same as the
    number of bits suitable for use in performing bit-wise manipulations of
    pointer values.
  \item Assuming that pointers must either be 32 or 64 bits.
  \item Assuming that aligning to \ccode{sizeof(double)} is sufficient to store any type.
  \item Assuming that high bits of the pointer address can be used for
  additional metadata. This is not true on CHERI since toggling high bits of a
  pointer can cause it to be so far out of bounds that it is no longer representable
  due to the compression of pointer bounds. However, it is still possible to use
  the low bits for additional metadata (see \Cref{sec:low-pointer-bits}).
  \end{itemize}
  \rwnote{Should there be more things in this list?}
\end{description}

These portability problems will typically be found due to hardware exceptions
thrown on attempted unaligned accesses of capability values
(see \Cref{sec:faults}).
However, they can also arise in the form of stripped tag bits, leading to
invalid capabilities that cannot be dereferenced, if, for example, pointer
values are copied into inappropriately aligned allocations.

\section{The CheriABI POSIX process environment}
\label{sec:cheriabi}

The CheriABI process environment implements a standard POSIX/UNIX API, but in
some areas there are changes to API semantics (e.g., in the handling of tagged
pointer values and I/O) or new functionality (such as relates to handling
capability-related faults).

\rwnote{Should there be information on a further CHERI C header here, which
  contains OS API bits and bobs?  Or is everything either in cheri.h/cheric.h
  or existing OS headers?}

\subsection{POSIX API changes}

\begin{description}
\item[Writing and reading pointers via files] In the CheriABI process
  environment, only untagged data (not tagged pointers) may be written to or
  read from files.
  If a region of memory containing valid pointers is written to a file, and
  then read back, the pointers in that region will no longer be valid.
  If a file is memory mapped, then pages mapped copy-on-write
  (\cconst{MAP\_PRIVATE}) are able to hold tagged pointers, since they are
  swap-backed rather than file-backed, but pages mapped directly from the
  buffer cache (\cconst{MAP\_SHARED}) are not.

\item[Passing pointers via IPC] In the CheriABI process environment, only
  untagged data, not tagged pointers, may be passed via various forms of
  message-passing Inter-Process Communication (IPC).
  Some existing software takes advantage of a shared address-space layout
  (via \cfunc{fork}\kern-2pt) to pass pointers to elements of shared data structures
  (e.g., entries in dispatch tables).
  This code must be converted to use indexes into tables or other lookup
  mechanisms rather than passing pointers via IPC.

\item[\cfunc{mmap} bounds] In CheriABI, the \cfunc{mmap} system
   call returns a bounded capability to the allocated address space.
   To ensure the capability does not overlap other allocations,
   lengths that would otherwise be unrepresentable are rounded up
   and padded with a new type of guard pages.
   These guard pages fault on access and may not be mapped over.
   They are unmapped when the rest of the mapping is unmapped.

\item[\cfunc{mmap} permissions] The permissions of the capability
   returned by \cfunc{mmap} are determined by a combination of the
   requested page protections and the capability passed as an address hint
   (or fixed address with \cconst{MAP\_FIXED}).
   When using the pattern of requesting a mapping with \cconst{PROT\_NONE}
   and then filling in sections (as is done in run-time linkers, VM host
   environments, etc), it is necessary to ensure that the initial
   capability has the right permissions.
   The \cvar{prot} argument has been extended to accept additional
   flags indicating the maximum permission the page can have so that a
   linker might request a reservation for a library with the permissions
   \ccode{(PROT\_MAX(PROT\_READ|PROT\_WRITE|PROT\_EXEC) | PROT\_NONE)}\kern-2pt, which
   would return a capability permitting loads, stores, and instruction
   fetch while mapping the pages with no (MMU) permissions.
\end{description}

\subsection{Handling capability-related signals}

When a capability hardware exception fires, the operating system will map it
into the UNIX \SIGPROT signal.
By default, this signal terminates the process, but the signal can be caught
by registering a \SIGPROT handler.
When the signal handler fires, \ccode{siginfo.si\_code} will be set to
describe the cause of the fault; available values, defined in
\pathname{signal.h}, include:

\begin{description}
\item[\cconst{PROT\_CHERI\_BOUNDS}] Capability bounds fault -- an out-of-bounds
  access was attempted.
\item[\cconst{PROT\_CHERI\_PERM}] Capability permission fault -- the attempted
  access exceeded the permissions granted by a capability.
\item[\cconst{PROT\_CHERI\_SEALED}] Capability sealed fault -- dereferencing a
  sealed capability was attempted.
\item[\cconst{PROT\_CHERI\_TAG}] Capability tag fault -- dereferencing an
  invalid capability was attempted.
\end{description}

\section{CHERI compiler warnings and errors}
\label{sec:cheri-compiler-warnings-and-errors}

\arnote{Feed the source code into \url{https://cheri-compiler-explorer.cl.cam.ac.uk/} to get the latest warning text}

The CHERI Clang compiler includes many diagnostic warnings to identify code that is incompatible with \purecapCOrCpp{} or may result in behavioral differences.
In many cases, a successful compilation that does not emit any CHERI-specific warnings will result in a functional spatially-safe program.
However, some incompatibilities (e.g., memory allocators returning insufficiently aligned pointers) cannot yet be diagnosed statically.
This section describes some of the more-commonly seen compiler warnings and provides suggestions on how to change the source code to be compatible with \purecapCOrCpp{}.
All these warnings are enabled when the \commandline{-Wall} compiler flag
is set.

\subsection{Loss of provenance}
%\begin{compilerwarning}
%cast from provenance-free integer type to pointer type will give pointer that can not be dereferenced
%\end{compilerwarning}
This common compiler warning \arnote{that should be an error by default?} is triggered when casting a non-capability type (e.g., \clongt) to a pointer.
As mentioned in \cref{sec:pointer_provenance_validity}, the result of this cast is a \ccode{NULL}-derived capability with the address set to the integer value.
As any \ccode{NULL}-derived capability is untagged, any attempt to dereference it will trap.

Usually, this warning is caused by programmers incorrectly assuming that \clongt is able to store pointers.
The fix for this problem is to change the type of the cast source to a provenance-carrying type such as \cintptrt or \cuintptrt (see \cref{sec:recommended-c-types}):
\begin{clisting}[numbers=left]
char *example_bad(£\vcpgfmark{StartBadParamTy}£long£\vcpgfmark{EndBadParamTy}£ ptr_or_int) {
    return strdup((const char *)ptr_or_int);
}
char *example_good(£\vcpgfmark{StartGoodParamTy}£intptr_t£\vcpgfmark{EndGoodParamTy}£ ptr_or_int) {
  return strdup((const char *)ptr_or_int);
}
\end{clisting}
\TikzListingHighlightStartEnd[red]{BadParamTy}
\TikzListingHighlightStartEnd[green]{GoodParamTy}
\begin{compilerwarning}
<source>:2:17: warning: cast from provenance-free integer type to pointer type will give pointer that can not be dereferenced [-Wcheri-capability-misuse]
  return strdup((const char *)ptr_or_int);
                ^
1 warning generated.
\end{compilerwarning}

\noindent
In some cases, this warning can be a false positive.
For example, it is common for C callback APIs take a \cvoidstar data argument that is passed to the callback.
If this value is in fact an integer constant, the warning can be silenced by casting to \cuintptrt first:
\begin{clisting}[numbers=left]
void invoke_cb(void (*cb)(void *), void *);
void callback(void *arg);
void false_positive_example(int callback_data) {
    invoke_cb(&callback, (void *)callback_data); // warning
    invoke_cb(&callback, (void *)£\vcpgfmark{StartSilenceProv}£(uintptr_t)£\vcpgfmark{EndSilenceProv}£callback_data); // no warning
}
\end{clisting}
\TikzListingHighlightStartEnd[green]{SilenceProv}
\begin{compilerwarning}
<source>:4:24: warning: cast from provenance-free integer type to pointer type will give pointer that can not be dereferenced [-Wcheri-capability-misuse]
  invoke_cb(&callback, (void *)callback_data); // warning
                       ^
<source>:15:24: warning: cast to 'void *' from smaller integer type 'int' [-Wint-to-void-pointer-cast]
  invoke_cb(&callback, (void *)callback_data); // warning
                       ^
2 warnings generated.
\end{compilerwarning}

\nwfnote{The ``:15:24'' above should also be ``:4:24''?}

\subsection{Ambiguous provenance}
For arithmetic and bitwise binary operations between \cuintptrt/\cintptrt, the compiler can generally infer which side of the expression should be used as the provenance (and bounds) source.
However, as noted in \cref{sec:ambiguous-provenance}, there are cases that are ambiguous as far as the compiler is concerned.

Consider for example a structure that holds a pointer and a small number of flags.
In this case the pointer is known to be aligned to at least 8 bytes, so the programmer uses the lowest 3 bits to store additional data:
\begin{clisting}[numbers=left]
typedef struct { uintptr_t data; } pointer_and_flags;
void set_ptr(pointer_and_flags *p, void *value) {
    p->data = (p->data & (uintptr_t)7) | (uintptr_t)(value);
}
void set_flags(pointer_and_flags *p, unsigned flags) {
    p->data = p->data | (flags & 7);
}
\end{clisting}

\begin{compilerwarning}
<source>:3:40: warning: binary expression on capability types '__uintcap_t' and 'uintptr_t' (aka '__uintcap_t'); it is not clear which should be used as the source of provenance; currently provenance is inherited from the left-hand side [-Wcheri-provenance]
    p->data = (p->data & (uintptr_t)7) | (uintptr_t)(value);
              ~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~~
1 warning generated.
\end{compilerwarning}

\noindent
Unlike the compiler, the programmer knows that inside \cfunc{set\_ptr} capability metadata should always be taken from the \ccode{value} argument.
The suggested fix for this problem is fix is to cast the non-pointer argument to an integer type:
\TikzListingHighlightStartEnd[green]{FixAmbig}
\begin{clisting}[numbers=left]
void set_ptr(pointer_and_flags *p, void *value) {
    p->data = £\vcpgfmark{StartFixAmbig}£(size_t)£\vcpgfmark{EndFixAmbig}£(p->data & (uintptr_t)7) | (uintptr_t)(value);
}
\end{clisting}

\nwfnote{Not use cheri\_low\_bits\_set()?}

\arnote{TODO: this section should have more examples.}

\subsection{Underaligned capabilities}
%\begin{compilerwarning}
%alignment (<N>) of '<type>' is less than the required capability alignment
%\end{compilerwarning}
This warning is triggered when packed structures contain pointers.
As mentioned in \cref{sec:restricted-capability-locations}, pointers must always be aligned to the size of a CHERI capability (16 bytes for a 64-bit architecture).
This warning can be triggered by code that attempts to align pointers to at least 8 bytes (e.g., for compatibility between 32- and 64-bit architectures). For example:
\TikzListingHighlightStartEnd[red]{BadAlignPacked}
\begin{clisting}[numbers=left]
struct AtLeast8ByteAlignedBad {
    void *data;
} __attribute__((packed, £\vcpgfmark{StartBadAlignPacked}£aligned(8)£\vcpgfmark{EndBadAlignPacked}£));
\end{clisting}
\begin{compilerwarning}
<source>:1:8: warning: alignment (8) of 'struct AtLeast8ByteAlignedBad' is less than the required capability alignment (16) [-Wcheri-capability-misuse]
struct AtLeast8ByteAlignedBad {
       ^
<source>:1:8: note: If you are certain that this is correct you can silence the warning by adding __attribute__((annotate("underaligned_capability")))
1 warning generated.
\end{compilerwarning}

\noindent
The simplest fix for this issue is to either increase alignment to be CHERI-compatible, or use a ternary expression to include \ccode{alignof(\cvoidstar)}\kern-2pt:
%
\begin{clisting}[numbers=left]
£\vcpgfmark{StartFixAlign1}£#include <stdalign.h>£\vcpgfmark{EndFixAlign1}£
struct AtLeast8ByteAlignedGood {
    void *data;
} __attribute__((packed,aligned(£\vcpgfmark{StartFixAlign2}£alignof(void *) > 8 ? alignof(void *) : 8£\vcpgfmark{EndFixAlign2}£)));
\end{clisting}
\TikzListingHighlightStartEnd[green]{FixAlign1}
\TikzListingHighlightStartEnd[green]{FixAlign2}
%
In the rare case that creating a potentially underaligned pointer is actually intended, the warning can be silence by adding a \ccode{annotate("underaligned\_capability")} attribute:
%
\TikzListingHighlightStartEnd[green]{SilenceAlign}
\begin{clisting}[numbers=left]
struct UnderalignPointerIgnoreWarning {
    void *data;
} __attribute__((packed, aligned(4), £\vcpgfmark{StartSilenceAlign}£annotate("underaligned_capability")£\vcpgfmark{EndSilenceAlign}£));
\end{clisting}

\section{C APIs to get and set capability properties}
%\section{C APIs for restricting capability permissions and bounds}
\label{sec:cheri-apis}

\rwnote{I wonder if we should talk more about permissions?  Perhaps not in
  this document, in which case possibly we should talk about them less?}
\amnote{If this is intended as a document to guide porting efforts perhaps
  we should mention them only as background info? If this becomes a summary
  of CHERI programming patterns then we probably want a section that talks
  about permissions as well.}

\purecapCOrCpp{} supports a number of new APIs to get and set capability
properties given a pointer argument.
Although most software does not need to directly manage capability properties,
there are some cases when application code needs to further constrain
permissions or limit bounds associated with pointers.
For example, high-performance applications may contain custom memory
allocators and wish to narrow bounds and permissions on returned pointers
to prevent overflows between its own allocations.

\subsection{CHERI-related header files}

A set of compiler built-in functions provide access to capability properties
of pointers.
Two new header files (distributed as part of the CHERI Clang compiler)
provide access to further CHERI-related programming
interfaces including more human-friendly macro wrappers around the compiler
builtins, and also definitions of key CHERI constants:

\begin{description}
\item[\pathname{cheriintrin.h}] defines interfaces to access and
  modify capability properties.
  It also defines constants for capability permissions that are portable
  across all implementations of CHERI.

\item[\pathname{cheri.h}] provides macros for slightly higher-level operations
  such as the manipulation of low pointer bits (\cref{sec:low-pointer-bits}).
\end{description}

\noindent
When compiling for CheriBSD, the following header provides additional
constants relating to OS use of capabilities -- for example, software-defined
permission bits:

\begin{description}
\item[\pathname{cheri/cheri.h}] defines constants such as those used in the
  capability permission mask.

%\item[\pathname{cheri/cheric.h}] defines interfaces to access and
%  modify capability properties.
\end{description}

\rwnote{This section may need updating once we've converged OS and compiler
  versions of cheri.h, and done any necessary header refactoring.}

\subsection{Retrieving capability properties}

The following APIs allow capability properties to be retrieved from pointers:

\begin{description}
\item[\ccode{ptraddr\_t cheri\_address\_get(void *c)}\kern-2pt] Return the address of the capability \cvar{c}.

\item[\ccode{ptraddr\_t cheri\_base\_get(void *c)}\kern-2pt] Return the lower bound of capability \cvar{c}.

\item[\ccode{size\_t cheri\_length\_get(void *c)}\kern-2pt] Return the length of the bounds for the capability \cvar{c}.
  The base plus the length gives the upper bound on \cvar{c}'s address.

\item[\ccode{size\_t cheri\_offset\_get(void *c)}\kern-2pt] Return the difference between the address and the lower bound of the capability \cvar{c}.

\item[\ccode{size\_t cheri\_perms\_get(void *c)}\kern-2pt] Return the permissions of capability
  \cvar{c}. (See \Cref{sec:capability_permissions}.)

\item[\ccode{\_Bool cheri\_tag\_get(void *c)}\kern-2pt] Return whether capability \cvar{c} has its
  validity tag set.
  \arnote{This returns the raw tag value, cheriintrin.h may also provide \cfunc{cheri\_is\_valid} and \cfunc{cheri\_is\_invalid}}

\end{description}

\subsection{Modifying or restricting capability properties}

The following APIs allow capability properties to be refined on pointers:

\begin{description}
\item[\ccode{void *cheri\_address\_set(void *c, ptraddr\_t a)}\kern-2pt] Return a new capability with the same permissions and bounds as \cvar{c} with the address set to \cvar{a}.
This can be useful to re-derive a valid pointer from an address.

\cfunc{cheri\_address\_set} is able to set an address \cvar{a} that is
outside of the current bounds of \cvar{c}.  The resulting capability
is treated as an out-of-bounds pointer as described in \Cref{sec:oob}.
However, if the address \cvar{a} is not representable in the current
bounds of \cvar{c} due to capability compression,
\cfunc{cheri\_address\_set} returns a capability without the tag bit set.

%  This macro wraps the compiler built-in
%  \cfunc{\_\_builtin\_cheri\_address\_set}.
\item[\ccode{void *cheri\_bounds\_set(void *c, size\_t x)}\kern-2pt] Narrow the bounds of capability
  \cvar{c} so that the lower bound is the current address (which may
  have been increased relative to \cvar{c}'s original lower bound), and its
  upper bound is suitable for a length of \cvar{x}.

  Note that the effective bounds of the returned capability may be
  wider than the range [\ccode{cheri\_address\_get(c)}\kern-2pt,
  \ccode{cheri\_address\_get(c) + x}) due to capability compression (see
  \Cref{sec:bounds_alignment}), but they will always be a subset of
  the original bounds. % of \cvar{c}.


\item[\ccode{void *cheri\_bounds\_set\_exact(void *c, size\_t x)}] Narrow the bounds of capability
  \cvar{c} so that the lower bound is the current address, and its
  upper bound is \ccode{cheri\_address\_get(c) + x}.
  This is similar to \cfunc{cheri\_bounds\_set} but will raise a hardware exception if the resulting capability is not precisely representable instead of rounding the bounds.

\nwfnote{No mention of cheri\_bounds\_set\_exact?}

\item[\ccode{void *cheri\_perms\_and(void *c, size\_t x)}\kern-2pt] Perform a bitwise-AND of capability
  \cvar{c}'s permissions and the value \cvar{x}, returning the new
  capability (see \Cref{sec:capability_permissions}).

%  This macro wraps the compiler built-in
%  \cfunc{\_\_builtin\_cheri\_perms\_and}.

\item[\ccode{void *cheri\_tag\_clear(void *c)}\kern-2pt] Clear the tag on \cvar{c}, returning the
  new capability.

\end{description}

% \note{Are the references to the \ccode{\_\_builtin\_} forms useful?  Do we
% want to encourage their use or the \pathname{cheric.h} macros?}{nwf}

\subsection{Capability permissions}
\label{sec:capability_permissions}

A number of capability permissions are available for use; only those relating
to CHERI memory protection are enumerated here:

\begin{description}
\item[\cconst{CHERI\_PERM\_EXECUTE}] Authorize instruction fetch via this
   capability.

\item[\cconst{CHERI\_PERM\_LOAD}] Authorize data load via this capability.

\item[\cconst{CHERI\_PERM\_LOAD\_CAP}] Authorize capability load via this
  capability.
  If the permission is not present, the tag on the loaded value
  will be silently cleared.

\item[\cconst{CHERI\_PERM\_STORE}] Authorize data store via this capability.

\item[\cconst{CHERI\_PERM\_STORE\_CAP}] Authorize capability store via this
  capability.
  If the permission is not present, and the tag on the stored capability is
  valid, then a hardware exception will be thrown.
\end{description}

\noindent
In addition to architectural permissions, CHERI capabilities have
software-defined permissions.
CheriBSD defines the following additional memory-protection-related
permission:

\begin{description}
\item[\cconst{CHERI\_PERM\_CHERIABI\_VMMAP}] A CheriABI-specific user
  permission that the kernel uses to authorize modifications to
  virtual-memory mappings.
  If the permission is not present, system calls that alter the contents
  or the presentation of memory mappings will reject the request.
  As this is a CheriBSD-specific permission, it is not defined in \pathname{cheriintrin.h} and requires inclusion of \pathname{cheri/cheri.h}.
\end{description}

\subsection{Bounds alignment due to compression}
\label{sec:bounds_alignment}

Bounds imprecisions may require a memory allocator to increase the alignment
of an allocation, or increase padding on an allocation, to prevent bounds from
spanning more than one object.
When the length of an object exceeds $2^{floor(bounds\_bits/2)-1}$ (i.e., $4$ KiB for CHERI-MIPS and 64-bit CHERI-RISC-V), additional alignment requirements
apply to the lower and upper bounds.
The alignment required for allocations exceeding the minimum representable range ($4$ KiB for CHERI-MIPS and 64-bit CHERI-RISC-V) is $2^{E+3}$ bytes, where
$E$ is determined from the length, $l$, by
$E = 52 - \textrm{CountLeadingZeros}(l[64:floor(bounds\_bits/2)])$.\arnote{Is this too much detail?}
%\jrtcnote{Do we want to clarify that this is a 65-bit length? One would naively
%expect it to be 64-bit and thus be off by one in all calculations. We should
%probably also steer people towards CRRL/CRAM regardless (and add cheri\_foo
%APIs for them).}
%\arnote{65-bit length is probably too much detail. But CRRL/CRAM now documented}

Correctly computing the rounded size and minimum alignment for a given
allocation is non-trivial and may require many instructions to compute,
especially in the context of fast allocators such as the stack allocator.
Moreover, the architectural constants used for bounds precision differ across
architectures or their variations, and so alignment constraints also vary.
For example, the number of bits available for bounds differs between 32-bit and
64-bit CHERI-RISC-V, and also between 64-bit CHERI-RISC-V and Morello.

To avoid overly specific software knowledge of alignment requirements, and also to allow efficient calculation of alignment constraints during (for example) stack allocation, the CHERI ISA provides instructions that allow determining precisely representable allocations.
These instructions can be generated using compiler builtins that are provided by \pathname{cheriintrin.h}:
\begin{description}
\item[\ccode{size\_t cheri\_representable\_length(size\_t len)}\kern-2pt] returns the length that a capability would have after using \ccode{cheri\_bounds\_set} to set the length to \ccode{len} (assuming appropriate alignment of the base).

\item[\ccode{size\_t cheri\_representable\_alignment\_mask(size\_t len)}\kern-2pt] returns a bitmask that can be used to align an address downwards such that it is sufficiently aligned to create a precisely bounded capability.
\end{description}

\noindent
The precisely representable base address can be computed using:
\begin{lstlisting}[language={C}]
base = base & cheri_representable_alignment_mask(len);
\end{lstlisting}
When allocating from a contiguous buffer, the base needs to be aligned upwards instead of downwards.
This can be done with the following code:
\begin{clisting}[frame=none]
size_t required_alignment(size_t len) {
    return ~cheri_representable_alignment_mask(len) + 1;
}
struct Buffer {
    void *data;
    size_t allocated;
};
void *allocate_next(struct Buffer *buf, size_t len) {
    char *result = buf->data + buf->allocated;
    result = __builtin_align_up(result, required_alignment(len));
    size_t rounded_len = cheri_representable_length(len);
    buf->allocated = (result + rounded_len) - (char *)buf->data;
    return cheri_bounds_set_exact(result, rounded_len);
}

\end{clisting}

\noindent
Software written to use these compiler builtins, rather than encoding alignment
requirements directly, is more likely to be portable between CHERI-MIPS,
CHERI-RISC-V, and Morello.

\subsection{Implications for memory-allocator design}

One use case of these APIs is high-performance applications that contain custom memory
allocators and wish to narrow the bounds of returned pointers.
Two kinds of modifications are typically required:

\begin{description}
\item[Changes to alignment to allow for capabilities and bounds]
  Changes relating to alignment fall into two categories.
  First, those required to allow pointers to be stored within allocations,
  which requires that allocations be aligned to the pointer width (128 bits).
  Second, further alignment changes will be required to ensure that bounds can
  be represented precisely.
  This requires suitably aligning both the bottom and top bounds to exclude
  any other live allocations, as described in
  \Cref{sec:bounds_alignment}.\arnote{May want to switch order of sections?}

\item[Reaching allocation metadata on \cfunc{free}\kern-2pt]
  It is often the case that allocators utilize the value of the pointer passed
  to their custom \cfunc{free} function to locate corresponding metadata --
  for example, by always placing that metadata immediately before the
  allocation, which would be outside of the allocation's bounds.
  Therefore, some additional work may be required to derive a pointer to the
  allocation's metadata via another global capability, rather than the one
  that has been passed to \cfunc{free}\kern-2pt.
\end{description}

These two concerns may interact: When a custom allocator places metadata at
the beginning of the allocation, care must be taken that the resulting pointer
is still strongly aligned.
While porting programs to run on CHERI, we found multiple sub-allocators
that used 8 bytes of metadata after the result from \cfunc{malloc}\kern-2pt.
This causes the resulting pointer to no longer be sufficiently aligned to
store capabilities without faulting or stripping tag bits.
\nwfnote{Does CHERI ISAv7 still fault in any of these scenarios?}

Note that it is also possible to use the above APIs to validate inputs to
\cfunc{free}\kern-2pt, which is useful when the consumer of \cfunc{free} is, for example,
an untrusted compartment or a component of a web browser that might be
influenced by an attacker. In such cases, \cfunc{free} should validate that the
passed-in capability is tagged, is in-bounds, and points to a legitimate,
still-allocated allocation.  For allocators engaged in revocation for temporal
safety, concurrent revocation opens the door to TOCTTOU races within
\cfunc{free}\kern-2pt; additional care must be taken to prevent a double-\cfunc{free}
using a stale pointer from freeing an object allocated after revocation.

%\section{Potential performance impact}
%
%Pure-capability code performs very similarly to non-capability-based code on
%the same architecture, as most compiler-generated constructions are identical.
%A small number of additional instructions will be used around pointers taken
%to stack allocations, or when making new heap allocations, to set up bounds
%for the returned pointer -- but these typically have negligible cost.
%
%The primary overhead for pure-capability code is therefore the increase in
%pointer size, which can impact data-cache efficiency.
%Performance overhead therefore tends to correspond to the density of pointer
%loads and stores in an application's dynamic memory access pattern.
%Relatively pointer-light programs often have overhead that is sub-1\% -- for
%example, in stream or image processing.
%Programs with more pointer-dense access patterns, such as language runtimes
%and compilers, may see more significant overheads, in the range of 10\%.
%\textbf{XXX: Turn these into more real numbers.}

\section{Further reading}
\label{sec:further_reading}

The primary reference for the CHERI Instruction-Set Architecture (ISA) is the
ISA specification; at the time of writing, the most recent version is CHERI
ISAv7~\cite{UCAM-CL-TR-927}:

\smallskip
\noindent
\url{https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-927.pdf}
\smallskip

\noindent
Our technical report, \textit{An Introduction to CHERI}, provides a high-level
overview of the CHERI architecture, ISA modeling, hardware implementations,
and software stack~\cite{UCAM-CL-TR-941}:

\smallskip
\noindent
\url{https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-941.pdf}
\smallskip

\noindent
We published a paper on idiomatic C and spatial memory protection at ASPLOS
2015~\cite{ChisnallCPDP11}:

\smallskip
\noindent
\url{https://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/201503-asplos2015-cheri-cmachine.pdf}
\smallskip

\noindent
We published a paper on CheriABI and the adaptation of a complete OS userspace
and application suite to a pure-capability process environment at ASPLOS
2019~\cite{davis2019:cheriabi}:

\smallskip
\noindent
\url{https://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/201904-asplos-cheriabi.pdf}
\smallskip

\noindent
We also released an extended technical-report version of this paper that
includes greater implementation detail~\cite{UCAM-CL-TR-932}:

\smallskip
\noindent
\url{https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-932.pdf}
\smallskip

\noindent
We published a paper on CHERI and temporal memory safety for the heap at
Oakland 2020~\cite{filardo:cornucopia}:

\smallskip
\noindent
\url{https://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/2020oakland-cornucopia.pdf}
\smallskip

\noindent
We published a paper on C-language pointer provenance, and the implications
for software design, at POPL 2019; CHERI C was a case study in the practical
enforcement of capability provenance-validity
enforcement~\cite{cerberus-popl2019}:

\smallskip
\noindent
\url{https://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/201901-popl-cerberus.pdf}
\smallskip

%\textbf{XXX: Any other pointers?}

\section{Acknowledgements}

We gratefully acknowledge the helpful feedback from our colleagues, including
Hesham Almatary,
Ruben Ayrapetyan, Silviu Baranga, Jacob Bramley, Rod Chapman, Paul Gotch, Al Grant,
Brett Gutstein, Alfredo Mazzinghi, Alan Mycroft, and Lee Smith.
This work was supported by the Defense Advanced Research Projects Agency (DARPA) and the Air Force Research Laboratory (AFRL), under contracts
FA8750-10-C-0237 (``CTSRD'') and HR0011-18-C-0016 (``ECATS'').
The views, opinions, and/or findings contained in this report are those of the authors and should not be interpreted as representing the official views or policies of the Department of Defense or the U.S. Government.
This work was supported in part by the Innovate UK project Digital Security by
Design (DSbD) Technology Platform Prototype, 105694.
This project has received funding from the European Research Council
(ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 789108), ERC Advanced Grant ELVER.
We also acknowledge the EPSRC REMS Programme Grant (EP/K008528/1), Arm Limited,
HP Enterprise, and Google, Inc.
Approved for Public Release, Distribution Unlimited.

\printbibliography

\end{document}