sources.bib

@ARTICLE{1335465,
  author={Avizienis, A. and Laprie, J.-C. and Randell, B. and Landwehr, C.},
  journal={IEEE Transactions on Dependable and Secure Computing}, 
  title={Basic concepts and taxonomy of dependable and secure computing}, 
  year={2004},
  volume={1},
  number={1},
  pages={11-33},
  keywords={Taxonomy;Availability;Fault tolerance;Safety;Maintenance;Communication system security;Uncertainty;Standardization;Books;Index Terms- Dependability;security;trust;faults;errors;failures;vulnerabilities;attacks;fault tolerance;fault removal;fault forecasting.},
  doi={10.1109/TDSC.2004.2}}

@ARTICLE{994926,
  author={Oh, N. and Shirvani, P.P. and McCluskey, E.J.},
  journal={IEEE Transactions on Reliability}, 
  title={Control-flow checking by software signatures}, 
  year={2002},
  volume={51},
  number={1},
  pages={111-122},
  keywords={Monitoring;Runtime;Error correction;Hardware;Multitasking;Central Processing Unit;Process control;Operating systems;Fault detection;Satellites},
  doi={10.1109/24.994926}}

%TRANSIENT and intermittent faults as defined in [2] play a
major role in 'undermining the reliability of digital
systems. It is estimated that they occur 10 to 30 times more
frequently than permanent faults [18]. When testing for
transient and intermittent faults, the system must be tested in
its operational environment and concurrent with its execution
of the application task. This type of testing is referred to as
concurrent testing or on-line monitoring.%
@ARTICLE{1676899,
  author={Schuette and Shen},
  journal={IEEE Transactions on Computers}, 
  title={Processor Control Flow Monitoring Using Signatured Instruction Streams}, 
  year={1987},
  volume={C-36},
  number={3},
  pages={264-276},
  keywords={Control flow monitoring;error detection coverage and latency;fault insertion experiments;roving monitoring;signature analysis;signatured instruction streams;transient and intermittent faults},
  doi={10.1109/TC.1987.1676899}}

%Transient faults (also known as soft errors), unlike
manufacturing or design faults, do not occur consistently.
Instead, these intermittent faults are caused by external
events, such as energetic particles striking the chip. These
events do not cause permanent physical damage to the pro-
cessor, but can alter signal transfers or stored values and
thus cause incorrect program execution.%

%Transient faults have caused significant failures. In
2000, Sun Microsystems acknowledged that cosmic rays
interfered with cache memories and caused crashes in
server systems at major customer sites, including America
Online, eBay, and dozens of others [3]%
@INPROCEEDINGS{1402092,
  author={Reis, G.A. and Chang, J. and Vachharajani, N. and Rangan, R. and August, D.I.},
  booktitle={International Symposium on Code Generation and Optimization}, 
  title={SWIFT: software implemented fault tolerance}, 
  year={2005},
  volume={},
  number={},
  pages={243-254},
  keywords={Fault tolerance;Process design;Voltage;Noise reduction;Noise level;Clocks;Power system reliability;Hardware;Resource management;Redundancy},
  doi={10.1109/CGO.2005.34}}

@misc{nasa:sft,
  author = {Wilfredo Torres-Pomales},
  title = {Software Fault Tolerance: A Tutorial},
  year = {2000},
  note = {Work of the US Gov. Public Use Permitted. },
  url = {https://ntrs.nasa.gov/citations/20000120144}
}

@misc{nasa:stats,
  author = {Lorraine Prokop},
  title = {Historical Aerospace Software Errors Categorized to Influence Fault Tolerance},
  year = {2023},
  note = {Work of the US Gov. Public Use Permitted. },
  url = {https://ntrs.nasa.gov/citations/20230001295}
}

@misc{nasa:mvsr,
  author = {Mladen A. Vouk, David F. McAllister},
  title = {Multiversion software reliability through fault-avoidance and fault-tolerance},
  year = {1990},
  note = {Work of the US Gov. Public Use Permitted. },
  url = {https://ntrs.nasa.gov/citations/19930012458}
}

@misc{nasa:datadiversity,
  author = {John C. Knight},
  title = {Software fault tolerance using data diversity},
  year = {1991},
  note = {Work of the US Gov. Public Use Permitted. },
  url = {https://ntrs.nasa.gov/citations/19910016332}
}

@INPROCEEDINGS{5326,
  author={Lala, J.H. and Alger, L.S.},
  booktitle={[1988] The Eighteenth International Symposium on Fault-Tolerant Computing. Digest of Papers}, 
  title={Hardware and software fault tolerance: a unified architectural approach}, 
  year={1988},
  volume={},
  number={},
  pages={240-245},
  keywords={Hardware;Fault tolerance;Computer architecture;Fault tolerant systems;Computer errors;Application software;Protection;Aerospace control;Reliability;Real time systems},
  doi={10.1109/FTCS.1988.5326}}

@ARTICLE{589928,
  author={Jie Xu and Randell, B.},
  journal={IEEE Transactions on Reliability}, 
  title={Software fault tolerance: t/(n-1)-variant programming}, 
  year={1997},
  volume={46},
  number={1},
  pages={60-68},
  keywords={Fault tolerance;Software safety;Hardware;Fault tolerant systems;Testing;Fault diagnosis;Redundancy;Nuclear magnetic resonance;Application software;Software design},
  doi={10.1109/24.589928}}

@article{Aljarbouh_2021,
doi = {10.1088/1742-6596/2094/3/032026},
url = {https://dx.doi.org/10.1088/1742-6596/2094/3/032026},
year = {2021},
month = {nov},
publisher = {IOP Publishing},
volume = {2094},
number = {3},
pages = {032026},
author = {A Aljarbouh},
title = {Selection of the optimal set of versions of N-version software using the ant colony optimization},
journal = {Journal of Physics: Conference Series},
abstract = {The article discusses the problem of ensuring a high level of software reliability. A software reliability high level can be ensured using N-version programming. N-version software is distinguished by the software components redundancy, designed to solve one problem using different methods. Software redundancy requires additional resources. The challenge is thus to increase the software reliability, while at the same time minimising the resources used. In this article, ant colony optimization is used to solve the problem of choosing the optimal set of versions of N-version software.}
}