-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcheri-c-programming.tex
2084 lines (1776 loc) · 104 KB
/
cheri-c-programming.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
\documentclass[12pt,twoside,openright,a4paper]{article}
%\documentclass[12pt,twoside,openright,usletter]{article}
% !TeX spellcheck = en_US
%\documentclass[11pt]{article}
% UK date format in bibliography:
\usepackage[british]{babel}
\usepackage[inner=25mm,outer=25mm,top=20mm,bottom=20mm]{geometry}
%\usepackage[UKenglish]{isodate}%UK date endian
\usepackage[headings]{fullpage}
\usepackage[hidelinks]{hyperref}
% Bibliography:
\usepackage[utf8]{inputenc}
\usepackage{csquotes,xpatch}% recommended
% list up to 99 names instead of the default 3
\usepackage[backend=biber,bibencoding=utf8,style=numeric,maxnames=99,backref=false,sortcites,datamodel=thesis]{biblatex}
\addbibresource{cheri.bib}
\AtEveryBibitem{%
% Don't print ISBN,issn, or URL dates
\clearfield{issn}%
\clearfield{isbn}%
\clearfield{urldate}%
\clearfield{urlyear}%
}
\usepackage{bytefield}
\usepackage{color}
\usepackage[scaled=0.8]{DejaVuSansMono}
\usepackage[T1]{fontenc}
\usepackage{listings}
\usepackage{mdframed} % To avoid linebreaks in lstlistings
\lstnewenvironment{clisting}[1][]{\endgraf\noindent\minipage{\linewidth}\lstset{language={C},breaklines=true,frame=L,#1}}{\endminipage\endgraf}
\lstnewenvironment{compilerwarning}[1][]{\endgraf\noindent\minipage{\linewidth}\lstset{language={},breaklines=true,basicstyle=\scriptsize\ttfamily\bfseries,frame=L,#1}}{\endminipage\endgraf}
\usepackage{subcaption}
\usepackage{times}
\usepackage{url}
\usepackage[svgnames]{xcolor}
\definecolor{lightgray}{gray}{0.8}
\usepackage{xspace}
\usepackage{xfrac}
\usepackage[nameinlink,noabbrev,capitalise]{cleveref}
% drawing over lstlistings (code stolen from nwf)
\usepackage{tikz}
\usetikzlibrary{decorations.pathreplacing}
\usetikzlibrary{fit}
\usetikzlibrary{tikzmark}
\usetikzlibrary{calc}
\usetikzlibrary{patterns}
\newcommand*{\vcpgfmark}[1]{\ensuremath{\vcenter{\hbox{\pgfmark{#1}}}}}
% GBP symbol should be safe since it's easy to enter (at least on a UK keyboard) and won't be in any valid lstlistings
\lstset{escapechar=£} % Note: ensure this doesn't occur in any of the code
\newcommand{\TikzListingHighlight}[3][]{\tikz[overlay,remember picture]{\draw[\ifstrempty{#1}{yellow}{#1}, line width=10pt,opacity=0.3](#2) -- (#3);}}
\newcommand*{\TikzListingHighlightStartEnd}[2][]{\tikz[overlay,remember picture]{\draw[\ifstrempty{#1}{yellow}{#1}, line width=10pt,opacity=0.3](pic cs:Start#2) -- (pic cs:End#2);}}
\renewcommand{\UrlFont}{\ttfamily\small}
\newcommand{\baselineboxformatting}[1]{%
% Measure size of contents
\sbox0{#1}%
% Use the difference between the contents' height and the bitbox's height,
% clamped to [-.44\baselineskip, 0], as our minimum depth.
\setlength{\skip0}{\ht0 - \height}%
\ifdim\skip0>0pt%
\setlength{\skip0}{0}%
\else%
\ifdim\skip0<-.44\baselineskip%
\setlength{\skip0}{-.44\baselineskip}%
\fi%
\fi%
\centering\rule[\skip0]{0pt}{\height}#1}
\bytefieldsetup{boxformatting=\baselineboxformatting}
\lstset{basicstyle=\footnotesize\ttfamily}
%\newcommand{\ccode}[1]{\lstinline[language={C}]{#1}}
%\newcommand{\cxxcode}[1]{\lstinline[language={C++}]{#1}}
\newcommand{\ccode}[1]{{\small\ttfamily{#1}}}
\newcommand{\cxxcode}[1]{{\ccode{#1}}}
\newcommand{\cconst}[1]{{\ccode{#1}}}
\newcommand{\cfunc}[1]{{\ccode{#1()}}}
\newcommand{\cvar}[1]{{\ccode{#1}}}
\newcommand{\pathname}[1]{{\ccode{#1}}}
\newcommand{\commandline}[1]{{\ccode{#1}}}
\newcommand{\ptrdifft}{{\ccode{ptrdiff\_t}}\xspace}
\newcommand{\maxalignt}{{\ccode{max\_align\_t}}\xspace}
\newcommand{\sizet}{{\ccode{size\_t}}\xspace}
\newcommand{\ssizet}{{\ccode{ssize\_t}}\xspace}
\newcommand{\ptraddrt}{{\ccode{ptraddr\_t}}\xspace}
\newcommand{\cuintptrt}{{\ccode{uintptr\_t}}\xspace}
\newcommand{\cintptrt}{{\ccode{intptr\_t}}\xspace}
\newcommand{\ccharstar}{{\ccode{char *}}\xspace}
\newcommand{\cvoidstar}{{\ccode{void *}}\xspace}
\newcommand{\clongt}{{\ccode{long}}\xspace}
\newcommand{\cintt}{{\ccode{int}}\xspace}
\newcommand{\cintttt}{{\ccode{int32\_t}}\xspace}
\newcommand{\cintsft}{{\ccode{int64\_t}}\xspace}
\newcommand{\SIGPROT}{{\ccode{SIGPROT}}\xspace}
\newcommand{\note}[2]{{\color{blue}[ Note: #1 - #2]}}
\usepackage{xstring}
\IfSubStr*{\jobname}{final}{
\renewcommand{\note}[2]{\relax\ifhmode\unskip\fi}
}{
% show comments by default
}
\newcommand{\arnote}[1]{\note{#1}{Alex R.}}
\newcommand{\bdnote}[1]{\note{#1}{Brooks D.}}
\newcommand{\rwnote}[1]{\note{#1}{Robert W.}}
\newcommand{\amnote}[1]{\note{#1}{Alfredo M.}}
\newcommand{\psnote}[1]{\note{#1}{Peter S.}}
\newcommand{\pgnnote}[1]{\note{#1}{Peter N.}}
\newcommand{\jrtcnote}[1]{\note{#1}{Jess C.}}
\newcommand{\hmnote}[1]{\note{#1}{Hesham A.}}
\newcommand{\nwfnote}[1]{\note{#1}{nwf}}
% typeset C++ sensibly
\usepackage{relsize}
\newcommand*{\cpp}[1][]{C\textsmaller[2]{\nolinebreak[4]\hspace{-.05em}\raisebox{.45ex}{\textbf{++}}}}
% And another macro sensible PDF metadata:
\newcommand*{\cppInHeader}[1][]{\texorpdfstring{\cpp{}}{C++}}
\newcommand*{\purecapCOrCpp}[1]{CHERI C/\cpp{}}
\hyphenation{Free-BSD}
\hyphenation{Free-RTOS}
\hyphenation{Cheri-BSD}
\hyphenation{Cheri-Free-RTOS}
\hyphenation{Cheri-ABI}
\hyphenation{Web-Kit}
\hyphenation{Postgre-SQL}
\title{CHERI C/\cppInHeader{} Programming Guide \\ (DRAFT)}
\author{Robert N. M. Watson$^*$, Alexander Richardson$^*$,
Brooks Davis$^\dagger$, \\
John Baldwin$^\ddagger$, David Chisnall$^\S$, Jessica Clarke$^*$,
Nathaniel Filardo$^*$, \\
Simon W. Moore$^*$, Edward Napierala$^*$, Peter Sewell$^*$, and \\
Peter G. Neumann$^\dagger$ \\
\\
$^*$University of Cambridge, $^\dagger$SRI International, \\
$^\ddagger$Ararat River Consulting, LLC and $^\S$Microsoft Research}
\begin{document}
\sloppy
%% CL tech-report format provides its own cover page. Comment for final
%% version.
%\maketitle
%% CL tech-report format requires page numbering to start at 3. Uncomment for
%% final version.
\setcounter{page}{3}
%%
%
% Keep Abstract in sync with the Introduction.
%
\newcommand{\abstracttext}{
This document is a brief introduction to the \purecapCOrCpp{}
programming languages, which employ CHERI's architectural capability
primitive to implement C/\cpp{}-language memory safety.
We explain the principles underlying these language variants, and their
grounding in CHERI's multiple architectural instantiations:
CHERI-MIPS, CHERI-RISC-V, and Arm's Morello.
We describe the most commonly encountered differences between these
dialects and C/\cpp{} on conventional architectures, and where existing
software may require minor changes.
We document new compiler warnings and errors that may be experienced compiling
code with the CHERI Clang/LLVM compiler, and suggest how they may be addressed
through typically minor source-code changes.
We explain how modest language extensions allow selected software, such
as memory allocators, to further refine permissions and bounds on pointers.
This guidance is based on our experience adapting the FreeBSD operating-system
userspace, and applications such as PostgreSQL and WebKit, to run in a
CHERI C/\cpp{} capability-based programming environment.
We conclude by recommending further reading.
\psnote{should this mention CheriFreeRTOS and CHERI-RTEMS?}
}
\begin{abstract}
\abstracttext
\end{abstract}
\newpage
\setcounter{tocdepth}{2}
\tableofcontents
\newpage
\section{Introduction}
%
% Keep Abstract in sync with the Introduction.
%
\abstracttext{}
\subsection{Definitions}
CHERI Clang/LLVM and LLD implement the following new language,
code-generation, and linkage models:
\begin{description}
\item[CHERI C/\cpp{}] are C/\cpp{}-language dialects tuned to
requirements arising from implementing all pointers using CHERI capabilities.
This includes all explicit pointers (i.e., those declared by the programmer)
and all implied pointers (e.g., those used to access local and global
variables).
For example, they diverge from C/\cpp{} implementations on conventional
architectures by preventing pointers passed through integer type other
than \cuintptrt and \cintptrt{} from being dereferenced.
New Application Programming Interfaces (APIs) provide access to capability
features of pointers, including getting and setting their bounds, required
by selected software such as memory allocators.
The vast majority of C/\cpp{} source code we have encountered requires
little or no modification to be compiled as CHERI C/\cpp{}.
\item[Pure-capability machine code] is compiled code (or hand-written
assembly) that utilizes CHERI capabilities for all memory accesses --
including loads, stores, and instruction fetches -- rather than integer
addresses.
Capabilities are used to implement pointers explicitly described in the
source program, and also to implement implied pointers in the C execution
environment, such as those used for control flow.
Pure-capability machine code is not binary compatible with
capability-unaware code using integer pointers, not least due to the
different size of the pointer data type.
%Pure-capability code will most frequently be used to implement CHERI C,
%although could also be used for other purposes (e.g., non-CHERI C).
\end{description}
While the focus of this document is \purecapCOrCpp{}, CHERI is an
architectural feature able to support other software use cases including
other C/\cpp{}-language mappings into its features.
Another mapping is hybrid C/\cpp{}, in which only selected pointers are
implemented using capabilities, with the remainder implemented using integers.
We have primarily used hybrid C in systems software that bridges between
environments executing pure-capability machine code and those running largely
or entirely non-CHERI-aware machine code.
For example, a largely CHERI-unaware CheriBSD kernel can host pure-capability
processes using its CheriABI wrapper implemented in hybrid C
(see \Cref{sec:cheriabi}).
Hybrid machine code has stronger binary compatibility, but weaker protection,
than pure-capability machine code.
We do not consider hybrid C further in this document.
\section{Background}
CHERI extends conventional processor Instruction-Set Architectures (ISAs) with
support for \textit{architectural capabilities}.
One important use for this new hardware data type is in the implementation
of safer C/\cpp{} pointers and the code or data they point at.
Our technical report, \textit{An Introduction to CHERI}, provides a more
detailed
overview of the CHERI architecture, ISA modeling, hardware implementations,
and software stack~\cite{UCAM-CL-TR-941}.
\subsection{CHERI capabilities}
\label{sec:cheri-capabilities}
\begin{figure}[b]
\hspace{2.5cm}
% Tag
\begin{subfigure}[t!]{0.1\textwidth}
\begin{bytefield}[bitwidth=3pt]{1}
% \bitheader[endianness=big]{~,~} \\
\begin{leftwordgroup}{1-bit tag}
\bitbox{1}{}
\end{leftwordgroup}
\end{bytefield}
\end{subfigure}
% Capability
\begin{subfigure}[t!]{0.1\textwidth}
\begin{bytefield}[bitwidth=3pt]{64}
\bitheader[endianness=big]{0,63} \\
\begin{rightwordgroup}{128-bit \\ in-memory \\ capability}
\bitbox{16}{perms} & \bitbox{3}{\color{lightgray}\rule{\width}{\height}} & \bitbox{15}{otype} & \bitbox{30}{bounds} \\
\bitbox[lrb]{64}{64-bit~address}
\end{rightwordgroup}
\end{bytefield}
\end{subfigure}
\caption{128-bit CHERI Concentrate capability representation used in
64-bit CHERI-MIPS and 64-bit CHERI-RISC-V: 64-bit address
and metadata in addressable memory; and 1-bit out-of-band tag.}
\label{figure:cheri-capability-representation}
\end{figure}
CHERI capabilities are twice the width of the native integer pointer type of
the baseline architecture: there are 128-bit capabilities on 64-bit platforms,
and 64-bit capabilities on 32-bit platforms.
Each capability consists of an integer (virtual) address of the natural size for
the architecture (e.g., 32 or 64 bit), and also additional metadata that is
compressed in order to fit in the remaining 32 or 64 bits of the capability
(see \Cref{figure:cheri-capability-representation} for an example; details
vary across underlying architectures and word sizes).
In addition, they are associated with a 1-bit validity ``tag'' whose value is
maintained in registers and memory by the architecture, but not part of
addressable memory.
Each element of the additional metadata and tag of the capability contributes
to the protection model:
\begin{description}
\item[Validity tag] The tag tracks the validity of a capability.
If invalid, the capability cannot be used for load, store, instruction
fetch, or other operations.
It is still possible to extract fields from an invalid capability,
including its address.
\item[Bounds] The lower and upper bounds are addresses restricting the
portion of the address space within which the capability can be used for
load, store, and instruction fetch.
%
Setting a capability's address (i.e., where it points) within
bounds will retain the capability's validity tag. Setting addresses out of
bounds is subject to the precision limits of the bounds compression model
(see below and \cref{sec:oob}); broadly speaking, setting addresses ``near''
the capability's bounds will preserve the validity tag. (These out-of-bounds
capabilities continue to authorize access only to memory within bounds.)
\item[Permissions] The permissions mask controls how the capability can be
used -- for example, by authorizing the loading and storing of data and/or
capabilities.
\item[Object type] If this value is not equal to the unsealed object type, the capability is ``sealed'' and
cannot be modified or dereferenced, but can be used to implement opaque
pointer types.
This feature is not described further in this document, as it is primarily
used to implement software compartmentalization rather than object-level
memory protection.\arnote{Should we mention that code pointers are sealed (sentries)?}
\end{description}
When stored in memory, valid capabilities must be naturally aligned -- i.e., at
64-bit or 128-bit boundaries, depending on capability size -- as that is the
granularity at which in-memory tags are maintained.
Partial or complete overwrites with data, rather than a complete overwrite
with a valid capability, lead to the in-memory tag being cleared, preventing
corrupted capabilities from later being dereferenced.
In order to reduce the memory footprint of capabilities, capability
compression is used to reduce the overhead of bounds so that the full
capability, including address, permissions, and bounds fits within 64 or
128 bits (plus the 1-bit out-of-band tag).
Bounds compression takes advantage of redundancy between the address
and the bounds, which occurs because a pointer typically falls within (or
close to) its associated allocation, and because allocations are typically
well aligned.
The compression scheme uses a floating-point representation, allowing high-precision bounds for small
objects, but requiring stronger alignment and padding for larger allocations
(see \cref{sec:bounds_alignment}).
\subsection{Architectural rules for capability use}
The architecture enforces several important security properties on changes to
this metadata:
\begin{description}
\item[Provenance validity] ensures that capabilities can be used -- for
load, store, instruction fetch, etc. -- only if they are derived via valid
transformations of valid capabilities.
This property holds for capabilities in both registers and memory.
% \item[Capability integrity] prevents direct in-memory manipulation of
% capabilities. (Although this property is subsumed
% under the previous property, it seems worth stating on its own.)
% \pgnnote{Does that added sentence work?}
% \rwnote{I'm not really sure that that helps.}
% \psnote{As they are stated above, ``provenance validity''
% subsumes ``capability integrity'', which is a bit confusing. One
% could just lose the latter, or (I suppose) split ``provenance
% validity'' into the case of capability construction in registers,
% via loads and register
% operations and the case of capability (de)construction in memory,
% via good and bad store operations}
% \psnote{As stated above, ``provenance validity'' involves both the
% construction and use of capabilities. Think that's ok, but a
% different slicing of the concepts would be to have it just be
% construction. If sticking with ``can be used'', then somehow the
% text should be elaborated to not forbid use in non-authorising
% ways, e.g., ``using'' a possibly-non-valid capability by pulling
% out its address}
\item[Monotonicity] requires that any capability derived from another
cannot exceed the permissions and bounds of the capability from which it was
derived (leaving aside sealed capabilities, used for domain transition,
whose mechanism is not detailed in this report).
% \psnote{That's a bit odd, as a capability is really just a pure
% value, not a mutable thing. A better (but still a bit fuzzy)
% statement would be something like ``Monotonicity ensures that a
% valid capability can only be constructed from another capability
% with the same or greater authority''.
% Beyond that, one should talk not just about monotonicity of capability
% construction, but monotonicity of the set of all \emph{reachable
% capabilities} -- compare with the cheri\_formal\_paper Section IV. }
% \rwnote{This comment partially addressed.}
% \rwnote{This report does not discuss compartmentalisation at all, so is
% uninterested in the other definition of monotonicity.}
\end{description}
At boot time, the architecture provides initial capabilities to the firmware,
allowing data access and instruction fetch across the full address space.
Additionally, all tags are cleared in memory.
Further capabilities can then be derived (in accordance with the monotonicity
property) as they are passed from firmware to boot loader, from boot loader to
hypervisor, from hypervisor to the OS, and from the OS to the application.
At each stage in the derivation chain, bounds and permissions may be
restricted to further limit access.
For example, the OS may assign capabilities for only a limited portion of the
address space to the user software, preventing use of other portions of the
address space.
% These capabilities describe the set of memory access permissions held by each
% software component.
%The initial capabilities are then
% derived from existing valid capabilities, in accordance with the monotonicity
% property;
% \pgnnote{This seems cleaner.}
% \rwnote{More clear but less correct -- the initial capabilities are never
% derived from other capabilities.}
Similarly, capabilities carry with them \textit{intentionality}: when a
process passes a capability as an argument to a system call, the OS kernel can
carefully use only that capability to ensure that it does not access other
process memory that was not intended by the user process -- even though the
kernel may in fact have permission to access the entire address space through
other capabilities it holds.
This is important, as it prevents ``confused deputy'' problems, in which a more
privileged party uses an excess of privilege when acting on behalf of a less
privileged party, performing operations that were not intended to be
authorized.
For example, this prevents the kernel from overflowing the bounds on a
userspace buffer when a pointer to the buffer is passed as a
system-call argument.
The hardware furthermore guarantees that capability tags and capability data is written atomically.
For example, if one thread stores a valid capability and another writes arbitrary data to the same location, it is impossible to observe the arbitrary data with the validity bit set.
% \arnote{I guess the other way around is fine? I.e. capability data but \textbf{without} the tag?}
These architectural properties provide the foundation on which a
capability-based OS, compiler, and runtime can implement C/\cpp{}-language memory
safety.
They have been made precise and have been proved, with machine-checked proof,
to hold for the CHERI-MIPS architecture~\cite{cheri-formal-SP2020}.
\section{CHERI C/\cppInHeader{}}
The architectural-capability type can be used in a variety of ways by
software.
One particularly useful use case is in implementing \textit{CHERI C/\cpp{}}.
In this model, all C/\cpp{} language-visible pointer types, as well as any
implied pointers implementing vtables, return addresses, global variables,
arrays of variadic-function arguments, and so on, are implemented using
capabilities with tight bounds.
This allows the architecture to imbue pointers with protection by virtue of
architectural provenance validity, bounds checking, and permission checking,
protecting pointers from corruption and providing strong spatial memory
safety.
\subsection{The CHERI C/C++ run-time environment}
CHERI C code executes within a capability-aware run-time environment
-- whether ``bare metal'' with a suitable runtime, or in a richer, OS-based
process environment such as CheriABI (see \Cref{sec:cheriabi}),
which ensures that:
\begin{itemize}
\itemsep0em % the default large spacing looks weird in this list
\item capabilities are context switched (if required);
\item tags are maintained by the OS virtual-memory subsystem (if present);
\item capabilities are supported in OS control operations such as
debugging (as needed);
\item system-call arguments, the
run-time linker, and other aspects of the OS Application Binary Interface
(ABI) utilize capabilities rather than integer pointers; and
\item the C/\cpp{}-language runtime implements suitable capability preservation
\\
(e.g., in \cfunc{memcpy}\kern-2pt) and restriction (e.g., in \cfunc{malloc}\kern-2pt).
\end{itemize}
In CheriBSD, our CHERI-extended version of the open-source FreeBSD operating
system, CheriABI operates as a complete additional OS ABI.
CheriABI is implemented in the style of a 32-bit or 64-bit OS personality, in
that it requires its own set of suitably compiled system libraries and classes.
We have also successfully adapted bare-metal runtimes, such as newlib, and
embedded operating systems, such as FreeRTOS (CheriFreeRTOS) and RTEMS
(CHERI-RTEMS), to support CHERI memory protection.
Outside of the OS and language runtime themselves, CHERI C/\cpp{} require
relatively few source-code-level changes to C/\cpp{}-language software.
We explore those changes in the remainder of this document.
\subsection{Referential, spatial, and temporal safety}
\purecapCOrCpp{} introduces a number of new types of protection not
present in compilation to conventional architectures:
\begin{description}
\item[Referential safety] protects pointers (references) themselves.
This includes \textit{integrity} (corrupted pointers cannot be dereferenced)
and \textit{provenance validity} (only pointers derived from valid pointers
via valid manipulations can be dereferenced).
When pointers are implemented using architectural capabilities, CHERI's
capability tags and provenance validity naturally provide this protection.
\item[Spatial safety] ensures that pointers may be used only to access memory
within bounds of their associated allocation; dually, manipulating an
out-of-bounds pointer will not grant access to another allocation.
This is accomplished by adapting various memory allocators, including the run-time
linker for global variables, the stack allocator, and the heap allocator,
to set the bounds on the capability implementing a pointer before returning
it to the caller.
Due to precision constraints on capability bounds, bounds on returned
pointers may include additional padding, but will still not permit access to any
other allocations (see \cref{sec:bounds_alignment}).
Monotonicity ensures that callers cannot later broaden the bounds to cover
other allocations.
\end{description}
\noindent
Referential safety and spatial safety are implemented in CheriBSD's
pure-capability CheriABI execution environment and for bare-metal in
CheriFreeRTOS and CHERI-RTEMS.
\begin{description}
\item[Temporal safety] prevents a pointer retained after the release of its
underlying allocation from being used to access its memory if that memory
has been reused for a fresh allocation (e.g., after a fresh pointer to that
memory has been returned by a further call to \cfunc{malloc} after the
current pointer passed to \cfunc{free}\kern-2pt).
Heap temporal safety is accomplished by preventing new pointers being
returned to a previously allocated region of memory while any prior pointers
to that memory persist in application-accessible memory.
Memory will be held in \textit{quarantine} until any prior pointers have
been revoked; then the memory may be reallocated.
Architectural capability tags and virtual memory allow intermittent
\textit{revocation sweeps} to accurately and efficiently locate and
overwrite any capabilities implementing stale pointers.
Spatial safety ensures that pointers cannot be used to reference other
memory, including other freed memory.
\end{description}
%\rwnote{I wonder if we should make a note somewhere about the nature of the
%overwrite -- e.g., that we anticipate that it will simply clear the tag, but
%that other implementations are possible?}
% BFG: done below in the Implications of capability revocation for temporal
% safety subsection.
\noindent
Temporal safety is the object of ongoing experiments.
A prototype that guards \emph{heap} allocations has been developed for
CheriABI on CheriBSD, but is not yet integrated with the main development
branch.
We currently have no plans to develop support for temporal memory safety in
CheriFreeRTOS and CHERI-RTEMS, both due to the complexity of the temporal
safety runtime, and also because of CHERI temporal safety's dependence on an
MMU for performance.
%% nwf thinks this might belong in here, lest someone think that the restriction
%% to heap temporal safety is all that we can achieve even in theory.
%
% A blue-sky microkernel operating system project, CheriOS, achieves full spatial
% and temporal safety of its C runtime environment.
\section{Impact on the C/\cppInHeader{} programming model}
Several kinds of changes may be required by programmers; the extent to which
these changes impact a particular library or application will depend
significantly on its idiomatic use of C.
Our experience suggests that low-level system components such as run-time
linkers, debuggers, memory allocators, and language runtimes require a modest
but non-trivial porting effort.
Similarly, support classes that include, for example, custom synchronization
features, may also require moderate adaptation.
Other applications may compile with few or no changes -- especially if they
are already portable across 32-bit and 64-bit platforms and are written in a contemporary C or \cpp{} dialect.
In the following sections, we consider various kinds of programmer-visible
changes required in the CHERI C/\cpp{} programming environment.
In many cases, compiler warnings and errors can be used to identify potential
issues compiling code as \purecapCOrCpp{} (see
\Cref{sec:cheri-compiler-warnings-and-errors}).
\rwnote{Alex: Can we use the word ``most'' instead of ``many''?}
\subsection{Capability-related faults}
\label{sec:faults}
When architectural capability properties are violated, such as by an attempt
to dereference an invalid capability, access memory outside the bounds of a
capability, or perform accesses not authorized by the permissions on a
capability, this typically leads to a hardware exception (trap).
Operating-system kernels are able to catch this exception via a trap handler,
optionally delivering it to the run-time environment via OS-specific
mechanisms.
However, the language-level behavior of CHERI C/C++ is considerably more
subtle: existing undefined behavior semantics in C are retained.
The compiler is free to assume that loads and stores will not trap (i.e., that
any program is free of undefined behavior), and may optimize under this
assumption, including reordering code.
Architectural traps occur when dynamic loads and stores are attempted, and
reordering could lead to potential confusing behavior for programmers.
In the CheriABI process environment, the operating system catches the hardware
exception and delivers a \SIGPROT signal to the user process;
further information may be found in \Cref{sec:cheriabi}.
In other environments, such as bare metal or under an embedded OS, behavior is
specific to those environments, as it will depend both on how architectural
exceptions are handled, and how those events are delivered to the C-language
stack.
Fail stop may be appropriate behavior in some environments, and is in fact the
default behavior in CheriABI when \SIGPROT is not handled.
\rwnote{We've opted to use the term ``hardware exception'' throughout, and
mention ``traps'' only here. This could cause confusion with respect to \cpp{}
exceptions .. but perhaps less so than if we used the word ``exception''
unadorned.}
\subsection{Pointer provenance validity}
\label{sec:pointer_provenance_validity}
\purecapCOrCpp{} implement pointers using architectural
capabilities, rather than using conventional 32-bit or 64-bit integers.
This allows the provenance validity of language-level pointers to be
protected by the provenance properties of CHERI architectural capabilities:
only pointers implemented using valid capabilities can be dereferenced.
Other types that contain pointers, \cuintptrt and \cintptrt,
%\psnote{It would be better to exhaustively list them (is it just intptr\_t and uintptr\_t?) rather than this vague ``such as''}
%\arnote{There are also cases such as C++11 strongly typed enums that use uintcap\_t as the underlying type, but we really don't need to mention this here. And I'm also not sure if we want to keep allowing that since enums should really be integer values only}
are similarly implemented
using architectural capabilities, so that casts through these types
can retain capability properties.
When a dereference is attempted on a capability without a valid tag --
including load, store, and instruction fetch -- a hardware exception fires
(see \Cref{sec:faults}).
On the whole, the effects of pointer provenance validity are non-disruptive to
C/\cpp{} source code.
However, a number of cases exist in language runtimes and other
(typically less portable) C code that conflate integers and pointers that can
disrupt provenance validity.
In general, generated code will propagate provenance validity in only two
situations:
\begin{description}
\item[Pointer types] The compiler will generate suitable code to propagate
the provenance validity of pointers by using capability load and store
instructions.
This occurs when using a pointer type (e.g., \cvoidstar) or an
integer type defined as being able to hold a pointer (e.g.,
\cintptrt).
As with attempting to store 64-bit pointers in 32-bit integers on 64-bit
architectures, passing a pointer through an inappropriate type will lead to
truncation of metadata (e.g., the validity tag and bounds).
It is therefore important that a suitable type be used to hold pointers.
This pattern often occurs where an opaque field exists in a data structure
-- e.g., a \clongt argument to a callback in older C code -- that
needs to be changed to use a capability-oblivious type such as \cintptrt.
\psnote{I'm not sure this document has explained the ISA behavior concretely enough for this stuff to really make sense -- the previous description was quite high-level. Maybe somewhere it should be explicit that registers have tags, that load and store instructions must be via a capability, and that there are both capability and non-capability load and store instructions, with the former preserving tags (both ways) and the latter clearing them?}
\item[Capability-oblivious code] In some portions of the C/\cpp{} runtime and
com\-piler-generated code, it may not be possible to know whether memory is
intended to contain a pointer or not -- and yet preserving pointers is
desirable.
In those cases, memory accesses must be performed in a way that preserves
pointer provenance.
In the C runtime itself, this includes \cfunc{memcpy}\kern-2pt, which must use
capability load and store instructions to transparently propagate capability
metadata and tags.
A useful example of potentially surprising code requiring modification for
\purecapCOrCpp{} is \cfunc{qsort}\kern-2pt.
Some C programs assume that \cfunc{qsort} on an array of data structures
containing pointers will preserve the usability of those pointers.
As a result, \cfunc{qsort} must be modified to perform memory copies using
pointer-based types, such as \cintptrt, when size and alignment
require it.
\end{description}
\subsubsection{Recommended use of C-language types}
\label{sec:recommended-c-types}
As confusion frequently arises about the most appropriate types to use for
integers, pointers, and pointer-related values, we make the following
recommendations:
\begin{description}
\item[\cintt, \cintttt, \clongt, \cintsft,
\ldots{}] These pure integer types should be used to hold integer values
that will never be cast to a pointer type without first combining them with
another pointer value -- e.g., by using them as an array offset.
Most integers in a C/\cpp{}-language program will be of these types.
\item[\ptraddrt] This is a new integer type introduced by CHERI C and should be used to hold
addresses.
\ptraddrt should not be directly cast to a pointer type for
dereference; instead, it must be combined with an existing valid capability
to the address space to generate a dereferenceable pointer.
Typically, this is done using the \ccode{cheri\_address\_set(c, x)} function.
\item[\sizet, \ssizet] These integer types should be used
to hold the unsigned or signed lengths of regions of address space.
\arnote{\sizet not necessary the same as unsigned \ptrdifft.}
\item[\ptrdifft] This integer type describes the difference of indices
between two pointers to elements of the same array, and should not be used for
any other purpose.
It can be added to a pointer to obtain a new pointer, but the result will
be dereferenceable only if the address lies within the bounds of the
pointer from which it was derived.
\note{Isn't that last sentence true of any combination?}{nwf}
Less standards-compliant code sometimes uses \ptrdifft when the
programmer more likely meant \cintptrt or (less commonly)
\sizet.
When porting code, it is worthwhile to audit use of \ptrdifft.
\note{Should we recommend that \sizet be used to hold lengths of
allocations and \ptrdifft be used to talk about spans of
address space (e.g., the offsets between two subobjects of an allocation)? I feel
like the recommendations here are not as concrete as I'd like.}{nwf}
\item[\cintptrt, \cuintptrt] These integer types should be
used to hold values that may be valid pointers if cast back to a pointer
type.
When an \cintptrt is assigned an integer value -- e.g., due to
constant initialization to an integer in the source -- and the result is
cast to a pointer type, the pointer will be invalid and hence
non-dereferenceable.
These types will be used in two cases: (1) Where there is uncertainty as to
whether the value to be held will be an integer or a pointer -- e.g., for an
opaque argument to a callback function; or (2) Where it is more convenient
to place a pointer value in an integer type for the purposes of arithmetic
(which takes place on the capability's address and in units of bytes, as if
the pointer had been cast to \texttt{char *}).
The observable, integer range of a \cuintptrt is the same as
that of a \ptraddrt (or \ptrdifft for \cintptrt), despite the increased \emph{alignment} and \emph{storage} requirements.
\item[\ccode{intmax\_t}, \ccode{uintmax\_t}] According to the C standard\arnote{7.20.1.5 Greatest-width integer types}, these integer types should be \enquote{capable of representing any value of any (unsigned) integer type}.
In \purecapCOrCpp{}, they are not provenance-carrying and can represent the integer \emph{range} of \cuintptrt/\cintptrt, but not the capability metadata or tag bit.
As the observable value of \cuintptrt/\cintptrt is the pointer address range, we believe this choice to be compatible with the C standard.
Additionally, due to ABI constraints, it would be extremely difficult to change the width of these types from 64 to 129 bits.
This is also true for other architectures such as x86: despite Clang and GCC supporting an \ccode{\_\_int128} type, \ccode{intmax\_t} remains 64 bits wide.
We generally do not recommend use of these types in \purecapCOrCpp{}.
However, the types may be useful in \cfunc{printf} calls (using the \ccode{\%j} format string width modifier) as the \pathname{inttypes.h} \ccode{PRI*} macros can be rather verbose.
\item[\maxalignt] This type is defined in C as \enquote{an object type whose alignment is the greatest fundamental alignment} \arnote{C2x \S{}7.19.2} and this includes capability types for \purecapCOrCpp{}.
% and in \cpp{} as a \enquote{type whose alignment requirement is at least as great as that of every scalar type}\arnote{C++17 \S{}21.2.4p5}
We found that some custom allocators use \ccode{sizeof(long double)} or \ccode{sizeof(uint64\_t)} to align their return values.
While this appears to work on most architectures, in \purecapCOrCpp{} this must be changed to \ccode{alignof(\maxalignt)}\kern-2pt.%
%
\footnote{%
It is important to use \ccode{alignof} instead of \ccode{sizeof} since many
common implementations, such as GCC and FreeBSD, define \maxalignt as a
\ccode{struct} and not a \ccode{union}.}
\item[\ccharstar, \ldots{}] These pointer types are suitable for
dereference, but in general \psnote{that ``in general'' makes me wonder about the exceptions?}
\arnote{The only exception I can think of is requiring \cvoidstar due to bad API design (callback parameters, etc).} should not be cast to or from arbitrary integer
values.
Valid pointers are always derived from other valid pointers (including those cast to \cintptrt or \cuintptrt), and cannot be
constructed using arbitrary integer arithmetic.
\end{description}
It is important to note that \cuintptrt is no longer the same size as
\sizet. This difference may require making some changes to
existing code to use the correct type depending on whether the variable
needs to be able store a pointer type. In cases where this is not obvious
(such as for a callback argument), we recommend the use of \cuintptrt.
This ensures that provenance is maintained.
\pgnnote{The above section begs questions relating to what is the
responsibility of programmers and what can be aided or managed by
compilers. Ideally, the latter would be preferable to requiring
programmers to understand things are possibly beyond their so-called
experience.}
\subsubsection{Capability alignment in memory}
Because tags apply only to memory locations that are capability-aligned
and capability-sized,
unaligned storage of pointers will either generate a run-time
hardware exception (if a capability-aware load or store is performed), or discard the
tag (if a capability-oblivious memory copy is performed -- e.g., using
\cfunc{memcpy} to copy from an aligned location to an unaligned one).
One example of this is Berkeley DB (BDB) when used as an in-memory
implementation rather than as an on-disk database format.
Even when patched to use \cfunc{memcpy} to copy objects stored as data, it
does not ensure sufficient alignment in its internal storage to preserve tags.
We therefore recommend against using BDB for this purpose.
While unaligned pointer use is uncommon in C programs, as data-structure
layouts are normally designed to keep them strongly aligned for performance
and atomicity reasons, any code depending on unaligned pointers will need
to be changed.
\amnote{Should we mention code that assumes that it is ok to go out of bounds
for optimization purposes? E.g., strcmp loading a word at a time?}
\psnote{yes}
\subsubsection{Single-origin provenance}
\label{sec:ambiguous-provenance}
In the CHERI memory protection model, capabilities are derived from a single other
capability.
However, in C code, expressions may construct a new \cintptrt value from more
than one provenance-carrying parent \cintptrt{} -- for example, by casting both a
pointer and a literal value to \cintptrt{}-s, and then adding them.
\psnote{That literal value wouldn't have a non-empty provenance, so this isn't the best example. Maybe better to have something like \texttt{p+(q1-q2)} ?}
\psnote{More generally, there is a bit of a mismatch between this and our C provenance treatment of \cintptrt, which there is a plain integer type with no provenance -- but which regains provenance in some cases when cast back to a pointer. To ponder...}
In that case, the compiler must decide which input capability provides the
capability metadata (bounds, permissions, \ldots{}) to be used in the output
value.
Consider for example the following code:
\begin{lstlisting}[language=C]
void *c1 = (void *)((uintptr_t)input_ptr + 1);
void *c2 = (void *)(1 + (uintptr_t)input_ptr);
uintptr_t offset = 1;
void *c3 = (void *)(offset + (uintptr_t)input_ptr);
\end{lstlisting}
In C with integer pointers, the values of \cvar{c1}, \cvar{c2}, and \cvar{c3} might be expected to have the
same value as \cvar{input\_ptr}, except with the address incremented by one.
In CHERI C, each expression includes an arithmetic operation between provenance-carrying types.
While not visible in the source code, the constant \cconst{1} is promoted to a capability type, \cuintptrt.
In the current implementation, the compiler will return the expected provenance-carrying result for cases \cvar{c1} and \cvar{c2} but not \cvar{c3}.%
%
\footnote{%
Historically, the CHERI compiler would select the left-hand-most pointer in the expression as the provenance source.
While this model follows a single consistent rule, it can lead to surprising behavior if an expression places the provenance-carrying value to the right-hand-side.
In the example above, the value of \ccode{c1} would be a valid capability, but \cvar{c2} and \cvar{c3} would hold an untagged value (albeit with the expected address).}
%
For \cvar{c1} and \cvar{c2}, the compiler sees that one of the sides is a non-provenance-carrying integer type that was promoted to \cuintptrt and therefore selects the other operand as the provenance source.
It is not feasible to infer the correct provenance source for the third case, so the compiler will emit a warning.%
%
\footnote{%
We could add a data-flow-sensitive analysis to determine whether values are the result of promotion from a non-provenance-carrying type.
However, this would add significant complexity to the compiler and we have not seen many cases where this would have avoided changes to the source code.
\psnote{from a language-design POV, it'd be pretty horrid to have substantial semantics depend on just how smart one's analysis is}
\arnote{I agree. Even the current behavior is quite ugly, but at least it has measurable compatibility benefits.}
}
%
The current behavior for such ambiguous cases is to select the left-hand-side as the provenance source, but we are considering making this an error in the future.
%
The recommended approach to resolve such ambiguous cases is to change the type of one operand to a non-provenance-carrying type such as \sizet.
Alternatively, if the variable declaration cannot be changed, it is also possible to use a cast in the expression itself.
\begin{lstlisting}[language=C]
size_t offset_size_t = 1;
void *c3_good1 = (void *)(offset_size_t + (uintptr_t)input_ptr);
uintptr_t offset_uintptr_t = 1;
void *c3_good2 = (void *)((size_t)offset_uintptr_t + (uintptr_t)input_ptr);
\end{lstlisting}
We also provide a new attribute \ccode{cheri\_no\_provenance} that can be used to annotate variables or fields of type \cintptrt/\cuintptrt where the underlying type cannot be changed:
\begin{lstlisting}[language=C]
struct S {
uintptr_t maybe_tagged;
uintptr_t never_tagged __attribute__((cheri_no_provenance));
}
void test(struct S s, uintptr_t ptr) {
void *x1 = (void *)(s.maybe_tagged + ptr); // ambiguous, currently uses LHS
void *x2 = (void *)(s.never_tagged + ptr); // not ambiguous, uses RHS
}
\end{lstlisting}
\psnote{This doesn't really explain what \ccode{cheri\_no\_provenance} does? And what it means when applied to other types?}\arnote{compiler error if it's not \cuintptrt. Will try to improve example later.}
\subsection{Bounds}
CHERI C/\cpp{} pointers are implemented using capabilities that enforce lower and
upper bounds on access.
In the pure-capability run-time environment, those bounds are normally set to
the range of the memory allocation into which the pointer is intended to
point.
Because of capability compression, increased alignment requirements may apply
to larger allocations (see \Cref{sec:bounds_alignment}).
Bounds may be set on pointers returned by multiple system components including
the OS kernel, the run-time linker, compiler-generated code, system libraries,
and other utility functions.
As with violations of provenance validity, out-of-bounds accesses -- including
load, store, and instruction fetch -- trigger a hardware exception (see
\Cref{sec:faults}).
\subsubsection{Bounds from the compiler and linker}
The compiler will arrange that language-level pointers to stack allocations have suitable
bounds, and that the run-time linker will return bounded pointers to global
variables.
Bounds will typically be set based on an explicitly requested allocation size
(e.g., via the size passed to \cfunc{alloca}\kern-2pt) or, for compiler-generated
code or linker-allocated memory, by the C type mechanism (e.g.,
\ccode{sizeof(foo)}\kern-2pt), adjusted for precision requirements arising from
capability compression.
In some cases, such as with global variables allocated in multiple object
files, the actual size of the allocation may not be resolved until run time,
by the run-time linker.
These bounds will typically not cause observable changes in behavior -- other than hardware exceptions when (accidentally) performing an out-of-bounds access.
\subsubsection{Bounds from the heap allocator}
\cfunc{malloc} will set bounds on pointers to new heap allocations.
In typical C use, this is not a problem, as programmers expect to access
addresses only within an allocation.
However, in some uses of C, there may be an expectation that memory access can
occur outside the allocation bounds of the pointer via which memory access
takes place.
For example, if an integer pointer difference \cvar{D} is taken between
pointers to two different allocations (\cvar{B} and \cvar{A}), and later
added to pointer \cvar{A}, the new pointer will have an address
within \cvar{B}, but permit access only to \cvar{A}.
This idiom is mostly likely to be found with non-trivial uses of \cfunc{realloc} (e.g., cases where multiple pointers into a buffer allocated or reallocated by \cfunc{realloc} need to be updated).
We note that the subtraction of two pointers from different
allocations is undefined behavior in ISO C, and risks mis-optimization from
breaking compiler alias analysis assumptions.
Further, \emph{any} operation on the pointer passed to \cfunc{realloc} is undefined upon
return. Instead, we suggest that the programmer measure a pointer \cvar{P}'s
offset into an object \cvar{A} \emph{prior to} \cfunc{realloc} and derive new pointers
from the \cfunc{realloc} result \cvar{B} and these offsets. (i.e., compute
$\text{\cvar{B}} + (\text{\cvar{P}} - \text{\cvar{A}})$ rather than
$\text{\cvar{P}} + (\text{\cvar{B}} - \text{\cvar{A}})$).%
%
\footnote{%
While it may seem that \cvar{A} remains available after \cfunc{realloc}\kern-2pt, our
revocation sweeps which enforce temporal safety may have atomically replaced
this with a non-pointer value. The scalar value
$\text{\cvar{D}} = \text{\cvar{P}} - \text{\cvar{A}}$
will naturally be preserved by revocation.}
\subsubsection{Subobject bounds}
\purecapCOrCpp{} also supports automatically restricting the
bounds when a pointer is taken to a subobject -- for example, an array
embedded within another structure that itself has been heap allocated.
This will prevent an overflow on that array from affecting the remainder of
the structure, improving spatial safety.
Subobject bounds are not enabled by default as they may require additional source code changes
for compatibility, but can be enabled using the \mbox{\commandline{-Xclang}} \mbox{\commandline{-cheri-bounds=subobject-safe}} compiler flag.
One example of C code that requires changes for subobject bounds is the \ccode{containerof}
pattern, in which pointer arithmetic on a pointer to a subobject is used to
recover a pointer to the container object -- for example, as seen in the
widely used BSD \pathname{queue.h} linked-list macros or the generic C
hash-table implementation, \pathname{uthash.h}.
In these cases, an opt-out annotation can be applied to a given type, field or variable
that instructs the compiler to not tighten bounds when creating pointers to subobjects.
We currently define three opt-out annotations that can be used to allow
existing code to disable use of subobject bounds:
\paragraph{Completely disable subobject bounds} It is possible to annotate a typedef, record member,
or variable declaration with:
\begin{lstlisting}[language={C}]
__attribute__((cheri_no_subobject_bounds))
\end{lstlisting}
\noindent
to indicate that the compiler should not tighten bounds when taking the address or a \cpp{} reference. In \cpp{}11/C20 mode this can also be spelled as \cxxcode{[[cheri::no\_subobject\_bounds]]}.
\begin{lstlisting}[language={C}]
struct str {
/*
* Nul-terminated string array -- pointers taken to this subobject will
* use the array's bounds, not those of the container structure.
*/
char str_array[128];
/*
* Linked-list entry element -- because of the additional attribute,
* pointers taken to this subobject will use the container structure's
* bounds, not those of the specific field.
*/
struct list_entry str_le __attribute__((cheri_no_subobject_bounds));
} str_instance;
void
fn(void)
{
/* Struct pointer gets bounds of str_instance. */
struct str *strp = &str_instance;