From 6b148d59a068d97cfe709f70638ec8965fbc0991 Mon Sep 17 00:00:00 2001 From: Yanglin Zhang <56758695+lucky9-cyou@users.noreply.github.com> Date: Fri, 22 Dec 2023 14:42:07 +0800 Subject: [PATCH 1/3] feat: add clifford simulator(#1) * [feat]: init add qfvm clifford * [feat]: add simulate circuit using clifford * [fix]: fix compile bugs * [feat]: init add python * [feat]: add naive final measure * style: format code * fix: ms aligned malloc * fix: apple align malloc * ci: update macos os version to latest * ci: update macos os version to latest * ci: update eigen version * ci: update macos version min version * ci: update macos version min * ci: update pyproject * ci: update pyproject * feat: fix bugs, add clifford test --- .github/workflows/wheels.yml | 4 +- CMakeLists.txt | 10 +- pyproject.toml | 1 + quafu/results/results.py | 9 +- quafu/simulators/simulator.py | 13 + src/qfvm/qfvm.cpp | 87 ++++ src/qfvm/simulator.hpp | 46 ++ src/qfvm_clifford/bit.h | 55 +++ src/qfvm_clifford/bit_word.h | 512 ++++++++++++++++++++++ src/qfvm_clifford/circuit.h | 179 ++++++++ src/qfvm_clifford/clifford_simulator.h | 185 ++++++++ src/qfvm_clifford/gate_list.h | 246 +++++++++++ src/qfvm_clifford/gate_macro.h | 65 +++ src/qfvm_clifford/packed_bit_word.h | 350 +++++++++++++++ src/qfvm_clifford/packed_bit_word_slice.h | 307 +++++++++++++ src/qfvm_clifford/pauli.h | 176 ++++++++ src/qfvm_clifford/pauli_slice.h | 145 ++++++ src/qfvm_clifford/span_ref.h | 331 ++++++++++++++ src/qfvm_clifford/table.h | 347 +++++++++++++++ src/qfvm_clifford/tableau.h | 434 ++++++++++++++++++ src/qfvm_clifford/utils.h | 40 ++ tests/quafu/simulator/basis_test.py | 79 ++++ 22 files changed, 3614 insertions(+), 7 deletions(-) create mode 100644 src/qfvm_clifford/bit.h create mode 100644 src/qfvm_clifford/bit_word.h create mode 100644 src/qfvm_clifford/circuit.h create mode 100644 src/qfvm_clifford/clifford_simulator.h create mode 100644 src/qfvm_clifford/gate_list.h create mode 100644 src/qfvm_clifford/gate_macro.h create mode 100644 src/qfvm_clifford/packed_bit_word.h create mode 100644 src/qfvm_clifford/packed_bit_word_slice.h create mode 100644 src/qfvm_clifford/pauli.h create mode 100644 src/qfvm_clifford/pauli_slice.h create mode 100644 src/qfvm_clifford/span_ref.h create mode 100644 src/qfvm_clifford/table.h create mode 100644 src/qfvm_clifford/tableau.h create mode 100644 src/qfvm_clifford/utils.h diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 860c3ec..032a0a9 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -27,9 +27,9 @@ jobs: - os-arch: win_amd64 os: windows-2019 - os-arch: macosx_x86_64 - os: macos-11 + os: macos-13 - os-arch: macosx_arm64 - os: macos-11 + os: macos-13 runs-on: ${{ matrix.os }} env: diff --git a/CMakeLists.txt b/CMakeLists.txt index ee1910a..9595c4c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,8 +9,9 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CUDA_ARCHITECTURES 70;75;80;90) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) if(SKBUILD) execute_process( @@ -46,7 +47,7 @@ include(ExternalProject) ExternalProject_Add(Eigen3 PREFIX ${EIGEN3_ROOT} GIT_REPOSITORY https://gitlab.com/libeigen/eigen.git - GIT_TAG 3.3.9 + GIT_TAG 3.4 CONFIGURE_COMMAND "" BUILD_COMMAND "" @@ -56,10 +57,11 @@ ExternalProject_Add(Eigen3 ) list (APPEND PRJ_INCLUDE_DIRS ${EIGEN3_INCLUDE_DIR}) - find_package(pybind11 CONFIG) list (APPEND PRJ_INCLUDE_DIRS ${PYBIND11_INCLUDE_DIR}) +set(MACOS_VERSION_MIN 10.14) + #SIMD if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "AMD64" OR CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "amd64") if(MSVC) @@ -74,7 +76,7 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_HOST_SYSTEM_PROCESSOR endif() endif() -list (APPEND PRJ_INCLUDE_DIRS src/qfvm) +list (APPEND PRJ_INCLUDE_DIRS src/qfvm src/qfvm_clifford) pybind11_add_module(${PROJECT_NAME} MODULE src/${PROJECT_NAME}/${PROJECT_NAME}.cpp) add_dependencies(${PROJECT_NAME} Eigen3) #must add dependence for ninja target_compile_options(${PROJECT_NAME} PUBLIC ${PRJ_COMPILE_OPTIONS}) diff --git a/pyproject.toml b/pyproject.toml index 6679926..1b10e0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ archs = ["x86_64"] [tool.cibuildwheel.macos] +environment = {MACOSX_DEPLOYMENT_TARGET = "13.6"} archs = ["x86_64", "arm64"] repair-wheel-command = [ diff --git a/quafu/results/results.py b/quafu/results/results.py index 1bcc211..d913d55 100644 --- a/quafu/results/results.py +++ b/quafu/results/results.py @@ -91,7 +91,11 @@ class SimuResult(Result): """ def __init__(self, input, input_form, count_dict: dict = None): - self.num = int(np.log2(input.shape[0])) + if input_form != "count_dict": + self.num = int(np.log2(input.shape[0])) + else: + # input is num qubits + self.num = input if input_form == "density_matrix": self.rho = np.array(input) self.probabilities = np.diag(input) @@ -99,6 +103,9 @@ def __init__(self, input, input_form, count_dict: dict = None): self.probabilities = input elif input_form == "state_vector": self.state_vector = input + elif input_form == "count_dict": + # do nothing, only count dict + pass # come form c++ simulator # TODO: add count for py_simu if count_dict is not None: diff --git a/quafu/simulators/simulator.py b/quafu/simulators/simulator.py index e4a6a10..4be6e19 100644 --- a/quafu/simulators/simulator.py +++ b/quafu/simulators/simulator.py @@ -32,6 +32,7 @@ def simulate( shots: int = 100, use_gpu: bool = False, use_custatevec: bool = False, + use_clifford: bool = False, ) -> SimuResult: """Simulate quantum circuit Args: @@ -86,6 +87,7 @@ def simulate( if use_gpu: if qc.executable_on_backend == False: raise QuafuError("classical operation only support for `qfvm_qasm`") + if use_custatevec: try: from .qfvm import simulate_circuit_custate @@ -101,6 +103,14 @@ def simulate( else: count_dict, psi = simulate_circuit(qc, psi, shots) + elif simulator == "qfvm_clifford": + try: + from .qfvm import simulate_circuit_clifford + except ImportError: + raise QuafuError("you are not using the clifford version of pyquafu") + + count_dict = simulate_circuit_clifford(qc, shots) + elif simulator == "py_simu": if qc.executable_on_backend == False: raise QuafuError("classical operation only support for `qfvm_qasm`") @@ -129,6 +139,9 @@ def simulate( elif output == "state_vector": return SimuResult(psi, output, count_dict) + elif output == "count_dict": + return SimuResult(max(qc.used_qubits) + 1, output, count_dict) + else: raise ValueError( "output should in be 'density_matrix', 'probabilities', or 'state_vector'" diff --git a/src/qfvm/qfvm.cpp b/src/qfvm/qfvm.cpp index f3abd10..913262d 100644 --- a/src/qfvm/qfvm.cpp +++ b/src/qfvm/qfvm.cpp @@ -3,6 +3,8 @@ #include #include #include +#include + #ifdef _USE_GPU #include #endif @@ -11,6 +13,12 @@ #include #endif +#ifdef USE_SIMD +constexpr size_t _word_size = 256; +#else +constexpr size_t _word_size = 64; +#endif + namespace py = pybind11; template @@ -115,6 +123,82 @@ simulate_circuit(py::object const& pycircuit, return std::make_pair(outcount, np_inputstate); } +std::map simulate_circuit_clifford(py::object const& pycircuit, + const int& shots) { + + auto circuit = Circuit(pycircuit); + + // If measure all at the end, simulate once + uint actual_shots = shots; + + // qbit, cbit + vector> measures = circuit.measure_vec(); + std::map cbit_measured; + for (auto& pair : measures) { + cbit_measured[pair.second] = true; + } + + // Store outcome's count + std::map outcount; + + circuit_simulator<_word_size> cs(circuit.qubit_num()); + + for (uint i = 0; i < actual_shots; i++) { + + simulate(circuit, cs); + uint outcome = 0; + + if (!circuit.final_measure()) { + // qubit, cbit, measure result + auto measure_results = cs.current_measurement_record(); + + // make sure the order is the same with other simulators + std::sort( + measure_results.begin(), measure_results.end(), + [](auto& a, auto& b) { return std::get<1>(a) < std::get<1>(b); }); + + for (auto& measure_result : measure_results) { + outcome *= 2; + outcome += std::get<2>(measure_result); + } + + } else if (circuit.final_measure() && !measures.empty()) { + for (auto& measure : measures) { + cs.do_circuit_instruction( + {"measure", std::vector{measure.first}, + std::vector{static_cast(measure.second)}}); + } + + // qubit, cbit, measure result + auto measure_results = cs.current_measurement_record(); + + // make sure the order is the same with other simulators + std::sort( + measure_results.begin(), measure_results.end(), + [](auto& a, auto& b) { return std::get<1>(a) < std::get<1>(b); }); + + for (auto& measure_result : measure_results) { + outcome *= 2; + outcome += std::get<2>(measure_result); + } + } + + if (measures.empty()) { + continue; + } + + if (outcount.find(outcome) != outcount.end()) + outcount[outcome]++; + else + outcount[outcome] = 1; + + cs.reset_tableau(); + cs.sim_record.clear(); + } + + return outcount; +} + #ifdef _USE_GPU py::object simulate_circuit_gpu(py::object const& pycircuit, py::array_t>& np_inputstate) { @@ -164,6 +248,9 @@ PYBIND11_MODULE(qfvm, m) { py::arg("circuit"), py::arg("inputstate") = py::array_t>(0), py::arg("shots")); + m.def("simulate_circuit_clifford", &simulate_circuit_clifford, + "Simulate with circuit using clifford", py::arg("circuit"), + py::arg("shots")); #ifdef _USE_GPU m.def("simulate_circuit_gpu", &simulate_circuit_gpu, "Simulate with circuit", diff --git a/src/qfvm/simulator.hpp b/src/qfvm/simulator.hpp index 1e2663d..9e88fb6 100644 --- a/src/qfvm/simulator.hpp +++ b/src/qfvm/simulator.hpp @@ -1,7 +1,12 @@ #pragma once #include "circuit.hpp" +#include "clifford_simulator.h" +#include "qasm.hpp" #include "statevector.hpp" +#include "types.hpp" +#include +#include void apply_op(QuantumOperator& op, StateVector& state) { bool matched = false; @@ -114,3 +119,44 @@ void simulate(Circuit const& circuit, StateVector& state) { apply_op(op, state); } } + +template +void apply_measure(circuit_simulator& cs, const vector& qbits, + const vector& cbits) { + for (size_t i = 0; i < qbits.size(); i++) { + cs.do_circuit_instruction( + {"measure", std::vector{qbits[i]}, + std::vector{static_cast(cbits[i])}}); + } +} + +template +void apply_op(QuantumOperator& op, circuit_simulator& cs) { + // TODO: support args + switch (OPMAP[op.name()]) { + case Opname::measure: + apply_measure(cs, op.qbits(), op.cbits()); + break; + case Opname::reset: + for (auto qubit : op.qbits()) { + cs.do_circuit_instruction( + {"reset", std::vector{static_cast(qubit)}}); + } + break; + default: + auto qubits = op.positions(); + cs.do_circuit_instruction( + {op.name(), std::vector(qubits.begin(), qubits.end())}); + } +} + +template +void simulate(Circuit const& circuit, circuit_simulator& cs) { + // skip measure and handle it in qfvm.cpp + bool skip_measure = circuit.final_measure(); + for (auto op : circuit.instructions()) { + if (skip_measure == true && op.name() == "measure") + continue; + apply_op(op, cs); + } +} diff --git a/src/qfvm_clifford/bit.h b/src/qfvm_clifford/bit.h new file mode 100644 index 0000000..1be3467 --- /dev/null +++ b/src/qfvm_clifford/bit.h @@ -0,0 +1,55 @@ +#ifndef BIT_H_ +#define BIT_H_ + +#include +#include + +// bit in byte +struct bit { + uint8_t* byte; + uint8_t byte_index; + + bit(void* ptr, size_t offset) + : byte(((uint8_t*)ptr + (offset / 8))), byte_index(offset & 7) {} + + // copy assignment for bit in byte + inline bit& operator=(bool value) { + // make bit be 0 + *byte &= ~((uint8_t)1 << byte_index); + // assignment + *byte |= uint8_t(value) << byte_index; + return *this; + } + + inline bit& operator=(const bit& other) { + *this = bool(other); + return *this; + } + + // bit operator + inline bit& operator^=(bool value) { + *byte ^= uint8_t(value) << byte_index; + return *this; + } + + inline bit& operator&=(bool value) { + *byte &= (uint8_t(value) << byte_index) | ~(uint8_t(1) << byte_index); + return *this; + } + + inline bit& operator|=(bool value) { + *byte |= uint8_t(value) << byte_index; + return *this; + } + + // conversion operator + inline operator bool() const { return (*byte >> byte_index) & 1; } + + void swap(bit other) { + bool b = bool(other); + other = bool(*this); + *this = b; + } +}; + +#endif diff --git a/src/qfvm_clifford/bit_word.h b/src/qfvm_clifford/bit_word.h new file mode 100644 index 0000000..e832964 --- /dev/null +++ b/src/qfvm_clifford/bit_word.h @@ -0,0 +1,512 @@ +#ifndef TABLEAU_WORD_H_ +#define TABLEAU_WORD_H_ + +#include "utils.h" +#include +#include +#include +#include + +#ifdef USE_SIMD +#ifdef _MSC_VER +#include +#else +#include +#endif +#endif + +#include +#include + +// A bit_word is a bag of bits, which can be operated by individual CPU +// instructions. +// This template is necessary due to the varying interfaces between native types +// such as uint64_t and intrinsics like __m256i across different architectures +// and operating systems. In certain contexts, operators can be used on __m256i +// values (e.g. a ^= b), while in others this is not possible. The bitword +// template implementations establish a standard set of methods that are +// essential for Clifford's operation, allowing the same code to be compiled +// using either 256-bit or 64-bit registers depending on what is appropriate. +template struct bit_word; + +/* =================================================== */ +/* ================ bit_word operation =============== */ + +template +inline bool operator==(const bit_word& left, + const bit_word& right) { + return left.to_u64_array() == right.to_u64_array(); +} + +template +inline bool operator!=(const bit_word& left, + const bit_word& right) { + return !(left == right); +} + +template +inline bool operator<(const bit_word& left, + const bit_word& right) { + auto array1 = left.to_u64_array(); + auto array2 = right.to_u64_array(); + + for (size_t i = 0; i < array1.size(); ++i) { + if (array1[i] != array2[i]) { + return array1[i] < array2[i]; + } + } + return false; +} + +template +inline bool operator==(const bit_word& left, int right) { + return left == bit_word(right); +} + +template +inline bool operator!=(const bit_word& left, int right) { + return left != right; +} + +template +inline bool operator==(const bit_word& left, uint64_t right) { + return left == bit_word(right); +} + +template +inline bool operator!=(const bit_word& left, uint64_t right) { + return left != right; +} + +template +inline bool operator==(const bit_word& left, int64_t right) { + return left == bit_word(right); +} + +template +inline bool operator!=(const bit_word& left, int64_t right) { + return left != right; +} + +// output 1 for bit 1, . for bit 0 +template +std::ostream& operator<<(std::ostream& os, const bit_word& word) { + os << "bit_word<" << word_size << ">{"; + auto array1 = word.to_u64_array(); + for (size_t i = 0; i < array1.size(); ++i) { + for (size_t j = 0; j < 64; ++j) { + if ((i | j) && (j & 7) == 0) { + os << ' '; + } + // ".1" is a char array + os << ".1"[(array1[i] >> j) & 1]; + } + } + os << "}"; + return os; +} + +template +inline bit_word operator<<(const bit_word& word, + int offset) { + return word.shift(offset); +} + +template +inline bit_word operator>>(const bit_word& word, + int offset) { + return word.shift(-offset); +} + +template +inline bit_word operator<<=(bit_word& word, int offset) { + return word = word.shift(offset); +} + +template +inline bit_word operator>>=(bit_word& word, int offset) { + return word = word.shift(-offset); +} + +template +inline bit_word operator<<(const bit_word& word, + uint64_t offset) { + return word.shift((int)offset); +} + +template +inline bit_word operator>>(const bit_word& word, + uint64_t offset) { + return word.shift(-(int)offset); +} + +template +inline bit_word operator<<=(bit_word& word, + uint64_t offset) { + return word = word.shift((int)offset); +} + +template +inline bit_word operator>>=(bit_word& word, + uint64_t offset) { + return word = word.shift(-(int)offset); +} + +template +inline bit_word operator&(const bit_word& word, + uint64_t mask) { + return word & bit_word(mask); +} + +template +inline bit_word operator|(const bit_word& word, + uint64_t mask) { + return word | bit_word(mask); +} + +template +inline bit_word operator^(const bit_word& word, + uint64_t mask) { + return word ^ bit_word(mask); +} + +template +inline bit_word operator&(const bit_word& word, + int64_t mask) { + return word & bit_word(mask); +} + +template +inline bit_word operator|(const bit_word& word, + int64_t mask) { + return word | bit_word(mask); +} + +template +inline bit_word operator^(const bit_word& word, + int64_t mask) { + return word ^ bit_word(mask); +} + +template +inline bit_word operator&(const bit_word& word, + int mask) { + return word & bit_word(mask); +} + +template +inline bit_word operator|(const bit_word& word, + int mask) { + return word | bit_word(mask); +} + +template +inline bit_word operator^(const bit_word& word, + int mask) { + return word ^ bit_word(mask); +} + +/* =================================================== */ +/* ================ 64 bit version =================== */ +template <> struct bit_word<64> { + constexpr static size_t WORD_SIZE = 64; + constexpr static size_t BIT_POW = 6; + + union { + uint8_t u8[8]; + uint64_t u64[1]; + }; + + inline constexpr bit_word<64>() : u64{} {} + inline constexpr bit_word<64>(uint64_t v) : u64{v} {} + inline constexpr bit_word<64>(int64_t v) : u64{(uint64_t)v} {} + inline constexpr bit_word<64>(int v) : u64{(uint64_t)v} {} + + inline operator bool() const { return bool(u64[0]); } + inline operator int64_t() const { return int64_t(u64[0]); } + inline operator int() const { return int64_t(*this); } + inline operator uint64_t() const { return u64[0]; } + + inline bit_word<64>& operator^=(const bit_word<64>& other) { + u64[0] ^= other.u64[0]; + return *this; + } + + inline bit_word<64>& operator&=(const bit_word<64>& other) { + u64[0] &= other.u64[0]; + return *this; + } + + inline bit_word<64>& operator|=(const bit_word<64>& other) { + u64[0] |= other.u64[0]; + return *this; + } + + inline bit_word<64> operator^(const bit_word<64>& other) const { + return bit_word<64>(u64[0] ^ other.u64[0]); + } + + inline bit_word<64> operator&(const bit_word<64>& other) const { + return bit_word<64>(u64[0] & other.u64[0]); + } + + inline bit_word<64> operator|(const bit_word<64>& other) const { + return bit_word<64>(u64[0] | other.u64[0]); + } + + inline bit_word<64> andnot(const bit_word<64>& other) const { + return bit_word<64>(~u64[0] & other.u64[0]); + } + + // convert bit word to string + operator std::string() const { + std::stringstream ss; + ss << *this; + return ss.str(); + } + + std::array to_u64_array() const { + return std::array{u64[0]}; + } + + inline bit_word<64> shift(int offset) const { + auto array1 = u64[0]; + if (64 <= offset || -64 >= offset) { + return 0; + } else if (offset > 0) { + return bit_word<64>(array1 << offset); + } else { + return bit_word<64>(array1 >> -offset); + } + } + + inline uint16_t count() const { return count_uint64_bits(u64[0]); } + + static void* aligned_malloc(size_t bits) { return malloc(bits); } + + static void aligned_free(void* ptr) { free(ptr); } + + template + static void inplace_transpose_64_step(uint64_t* data, size_t stride) { + for (size_t k = 0; k < 64; k++) { + if (k & shift) + continue; + uint64_t& x = data[stride * k]; + uint64_t& y = data[stride * (k + shift)]; + uint64_t a = x & mask; + uint64_t b = x & ~mask; + uint64_t c = y & mask; + uint64_t d = y & ~mask; + x = a | (c << shift); + y = (b >> shift) | d; + } + } + + static void inplace_transpose_square(bit_word<64>* block_start, + size_t stride) { + inplace_transpose_64_step<0x5555555555555555ull, 1>((uint64_t*)block_start, + stride); + inplace_transpose_64_step<0x3333333333333333ull, 2>((uint64_t*)block_start, + stride); + inplace_transpose_64_step<0x0F0F0F0F0F0F0F0Full, 4>((uint64_t*)block_start, + stride); + inplace_transpose_64_step<0x00FF00FF00FF00FFull, 8>((uint64_t*)block_start, + stride); + inplace_transpose_64_step<0x0000FFFF0000FFFFull, 16>((uint64_t*)block_start, + stride); + inplace_transpose_64_step<0x00000000FFFFFFFFull, 32>((uint64_t*)block_start, + stride); + } +}; + +#ifdef USE_SIMD +/* =================================================== */ +/* ================ 256 bit version ================== */ +template <> struct bit_word<256> { + constexpr static size_t WORD_SIZE = 256; + constexpr static size_t BIT_POW = 8; + + union { + uint8_t u8[32]; + uint64_t u64[4]; + __m256i m256; + }; + + inline constexpr bit_word<256>() : m256(__m256i{}) {} + inline constexpr bit_word<256>(__m256i v) : m256(v) {} + inline bit_word<256>(uint64_t v) : m256{_mm256_set_epi64x(0, 0, 0, v)} {} + inline bit_word<256>(int64_t v) + : m256{_mm256_set_epi64x(-(v < 0), -(v < 0), -(v < 0), v)} {} + inline bit_word<256>(int v) + : m256{_mm256_set_epi64x(-(v < 0), -(v < 0), -(v < 0), v)} {} + + inline operator bool() const { + return bool(u64[0] | u64[1] | u64[2] | u64[3]); + } + inline operator int64_t() const { + auto words = to_u64_array(); + // x86 is little endian default + int64_t result = int64_t(words[0]); + uint64_t expected = result < 0 ? uint64_t(-1) : uint64_t(0); + if (words[1] != expected || words[2] != expected || words[3] != expected) { + throw std::runtime_error("int64_t overflow"); + } + return result; + } + inline operator int() const { return int64_t(*this); } + inline operator uint64_t() const { + if (u64[1] || u64[2] || u64[3]) { + throw std::runtime_error("uint64_t overflow"); + } + return u64[0]; + } + + inline bit_word<256>& operator^=(const bit_word<256>& other) { + m256 = _mm256_xor_si256(m256, other.m256); + return *this; + } + + inline bit_word<256>& operator&=(const bit_word<256>& other) { + m256 = _mm256_and_si256(m256, other.m256); + return *this; + } + + inline bit_word<256>& operator|=(const bit_word<256>& other) { + m256 = _mm256_or_si256(m256, other.m256); + return *this; + } + + inline bit_word<256> operator^(const bit_word<256>& other) const { + return bit_word<256>(_mm256_xor_si256(m256, other.m256)); + } + + inline bit_word<256> operator&(const bit_word<256>& other) const { + return bit_word<256>(_mm256_and_si256(m256, other.m256)); + } + + inline bit_word<256> operator|(const bit_word<256>& other) const { + return bit_word<256>(_mm256_or_si256(m256, other.m256)); + } + + inline bit_word<256> andnot(const bit_word<256>& other) const { + return bit_word<256>(_mm256_andnot_si256(m256, other.m256)); + } + + // convert bit word to string + operator std::string() const { + std::stringstream ss; + ss << *this; + return ss.str(); + } + + std::array to_u64_array() const { + return std::array{u64[0], u64[1], u64[2], u64[3]}; + } + + inline bit_word<256> shift(int offset) const { + auto array = to_u64_array(); + while (offset <= -64) { + array[0] = array[1]; + array[1] = array[2]; + array[2] = array[3]; + array[3] = 0; + offset += 64; + } + + while (offset >= 64) { + array[3] = array[2]; + array[2] = array[1]; + array[1] = array[0]; + array[0] = 0; + offset -= 64; + } + + __m256i low2high; + __m256i high2low; + if (offset < 0) { + low2high = _mm256_set_epi64x(0, array[3], array[2], array[1]); + high2low = _mm256_set_epi64x(array[3], array[2], array[1], array[0]); + offset += 64; + } else { + low2high = _mm256_set_epi64x(array[3], array[2], array[1], array[0]); + high2low = _mm256_set_epi64x(array[2], array[1], array[0], 0); + } + + uint64_t mask = (uint64_t{1} << offset) - 1; + low2high = _mm256_slli_epi64(low2high, offset); + high2low = _mm256_srli_epi64(high2low, 64 - offset); + // for offset < 0, only w[1] in lower 64 bits is used + low2high = _mm256_and_si256(low2high, _mm256_set1_epi64x(~mask)); + // for offset > 0, only w[0] in upper 64 bits is used + high2low = _mm256_and_si256(high2low, _mm256_set1_epi64x(mask)); + return _mm256_or_si256(low2high, high2low); + } + + inline uint16_t count() const { + return count_uint64_bits(u64[0]) + count_uint64_bits(u64[1]) + + count_uint64_bits(u64[2]) + count_uint64_bits(u64[3]); + } + + static void* aligned_malloc(size_t bits) { + return _mm_malloc(bits, sizeof(__m256i)); + } + + static void aligned_free(void* ptr) { _mm_free(ptr); } + + template + static void inplace_transpose_256_step(__m256i mask, __m256i* data, + size_t stride) { + for (std::size_t k = 0; k < 256; k++) { + if (k & shift) + continue; + + __m256i& x = data[stride * k]; + __m256i& y = data[stride * (k + shift)]; + __m256i a = _mm256_and_si256(x, mask); + __m256i b = _mm256_andnot_si256(mask, x); + __m256i c = _mm256_and_si256(y, mask); + __m256i d = _mm256_andnot_si256(mask, y); + + x = _mm256_or_si256(a, _mm256_slli_epi64(c, shift)); + y = _mm256_or_si256(_mm256_srli_epi64(b, shift), d); + } + } + + static void inplace_transpose_64_and_128_step(bit_word<256>* data, + size_t stride) { + uint64_t* u64_ptr = (uint64_t*)data; + stride <<= 2; + for (std::size_t k = 0; k < 64; k++) { + std::swap(u64_ptr[stride * (k + 64 * 0) + 1], + u64_ptr[stride * (k + 64 * 1) + 0]); + std::swap(u64_ptr[stride * (k + 64 * 0) + 2], + u64_ptr[stride * (k + 64 * 2) + 0]); + std::swap(u64_ptr[stride * (k + 64 * 0) + 3], + u64_ptr[stride * (k + 64 * 3) + 0]); + std::swap(u64_ptr[stride * (k + 64 * 1) + 2], + u64_ptr[stride * (k + 64 * 2) + 1]); + std::swap(u64_ptr[stride * (k + 64 * 1) + 3], + u64_ptr[stride * (k + 64 * 3) + 1]); + std::swap(u64_ptr[stride * (k + 64 * 2) + 3], + u64_ptr[stride * (k + 64 * 3) + 2]); + } + } + + static void inplace_transpose_square(bit_word<256>* data, size_t stride) { + inplace_transpose_256_step<1>(_mm256_set1_epi8(0x55), (__m256i*)data, + stride); + inplace_transpose_256_step<2>(_mm256_set1_epi8(0x33), (__m256i*)data, + stride); + inplace_transpose_256_step<4>(_mm256_set1_epi8(0x0F), (__m256i*)data, + stride); + inplace_transpose_256_step<8>(_mm256_set1_epi16(0x00FF), (__m256i*)data, + stride); + inplace_transpose_256_step<16>(_mm256_set1_epi32(0x0000FFFF), + (__m256i*)data, stride); + inplace_transpose_256_step<32>(_mm256_set1_epi64x(0x00000000FFFFFFFFull), + (__m256i*)data, stride); + } +}; +#endif + +#endif diff --git a/src/qfvm_clifford/circuit.h b/src/qfvm_clifford/circuit.h new file mode 100644 index 0000000..958bad6 --- /dev/null +++ b/src/qfvm_clifford/circuit.h @@ -0,0 +1,179 @@ +#ifndef CIRCUIT_H_ +#define CIRCUIT_H_ + +#include "span_ref.h" +#include +#include +#include +#include + +// gate instruction, include gate name, target qubits and arguments +struct circuit_instruction { + std::string gate; + span_ref targets; + span_ref args; + + circuit_instruction() = delete; + circuit_instruction(std::string gate, span_ref targets, + span_ref args = {}) + : gate(gate), targets(targets), args(args) {} + + // TODO: add validation for instruction + void validate() {} +}; + +inline std::ostream& operator<<(std::ostream& os, + const circuit_instruction& instr) { + os << instr.gate << " " + << "targets: "; + for (auto& target : instr.targets) { + os << target << " "; + } + + if (instr.args.size() > 0) { + os << "args: "; + for (auto& arg : instr.args) { + os << arg << " "; + } + } + os << "\n"; + return os; +} + +// quantum circuit, include a list of gate instructions, and buffer for targets +// and args +struct quantum_circuit { + std::vector instr_list; + monotonic_buffer targets_buf; + monotonic_buffer args_buf; + + quantum_circuit() : instr_list(), targets_buf(), args_buf() {} + quantum_circuit(const std::string& gate, const std::vector& targets, + const std::vector& args = {}) + : instr_list(), targets_buf(), args_buf() { + _append(gate, targets, args); + } + + // copy constructor + quantum_circuit(const quantum_circuit& other) + : instr_list(other.instr_list), + targets_buf(other.targets_buf.total_allocated()), + args_buf(other.args_buf.total_allocated()) { + + // take copy of targets and args + for (auto& instr : instr_list) { + instr.targets = targets_buf.take_copy(instr.targets); + instr.args = args_buf.take_copy(instr.args); + } + }; + + // move constructor + quantum_circuit(quantum_circuit&& other) noexcept + : instr_list(std::move(other.instr_list)), + targets_buf(std::move(other.targets_buf)), + args_buf(std::move(other.args_buf)) {} + + // copy assignment + quantum_circuit& operator=(const quantum_circuit& other) { + instr_list = other.instr_list; + targets_buf = monotonic_buffer(other.targets_buf.total_allocated()); + args_buf = monotonic_buffer(other.args_buf.total_allocated()); + for (auto& instr : instr_list) { + instr.targets = targets_buf.take_copy(instr.targets); + instr.args = args_buf.take_copy(instr.args); + } + return *this; + } + + // move assignment + quantum_circuit& operator=(quantum_circuit&& other) noexcept { + instr_list = std::move(other.instr_list); + targets_buf = std::move(other.targets_buf); + args_buf = std::move(other.args_buf); + return *this; + } + + // concatenate two quantum circuits + quantum_circuit& operator+=(const quantum_circuit& other) { + span_ref other_instrs(other.instr_list); + + if (&other == this) { + instr_list.insert(instr_list.end(), other_instrs.begin(), + other_instrs.end()); + return *this; + } + + for (auto& instr : other_instrs) { + auto instr_targets = targets_buf.take_copy(instr.targets); + auto instr_args = args_buf.take_copy(instr.args); + instr_list.push_back({instr.gate, instr_targets, instr_args}); + } + + return *this; + } + + // concatenate two quantum circuits + quantum_circuit operator+(const quantum_circuit& other) { + quantum_circuit result(*this); + result += other; + return result; + } + + friend std::ostream& operator<<(std::ostream& os, const quantum_circuit& qc) { + for (auto& instr : qc.instr_list) { + os << instr; + } + return os; + } + + operator std::string() const { return str(); } + + std::string str() const { + std::stringstream ss; + ss << *this; + return ss.str(); + } + + quantum_circuit& append(const std::string& gate, + const std::vector& targets, + const std::vector& args = {}) { + + _append(gate, targets, args); + + return *this; + } + + quantum_circuit& _append(const std::string& gate, + span_ref targets, + span_ref args) { + circuit_instruction instr(gate, targets, args); + instr.validate(); + + // TODO: fuse some instr + instr.targets = targets_buf.take_copy(targets); + instr.args = args_buf.take_copy(args); + instr_list.push_back(instr); + + return *this; + } + + size_t max_qubit() const { + size_t max_qubit = 0; + for (auto& instr : instr_list) { + for (auto& target : instr.targets) { + max_qubit = std::max(max_qubit, target); + } + } + + return max_qubit; + } + + template + void for_each_circuit_instruction(const func& callback) const { + for (auto& instr : instr_list) { + callback(instr); + } + } +}; + +#endif diff --git a/src/qfvm_clifford/clifford_simulator.h b/src/qfvm_clifford/clifford_simulator.h new file mode 100644 index 0000000..cc01cf6 --- /dev/null +++ b/src/qfvm_clifford/clifford_simulator.h @@ -0,0 +1,185 @@ +#ifndef SIMULATOR_H_ +#define SIMULATOR_H_ + +#include "circuit.h" +#include "span_ref.h" +#include "table.h" +#include "tableau.h" +#include +#include +#include +#include + +// strore measurement result +struct measurement_record { + using result = std::tuple; + std::vector storage; + + void record(size_t qubit, size_t cbit, bool result) { + storage.push_back({qubit, cbit, result}); + } + auto operator[](size_t index) { return storage[index]; } + auto const operator[](size_t index) const { return storage[index]; } + auto begin() { return storage.begin(); } + auto end() { return storage.end(); } + auto begin() const { return storage.begin(); } + auto end() const { return storage.end(); } + auto cbegin() const { return storage.cbegin(); } + auto cend() const { return storage.cend(); } + auto size() const { return storage.size(); } + void clear() { storage.clear(); } +}; + +// quantum circuit simulator, include original tableau, measurement record and a +// random number generator +template struct circuit_simulator { + + tableau sim_tableau; + measurement_record sim_record; + std::mt19937_64 rng; + + // constructor + // The number of qubits is specified by num_qubits. And the random number + // generator is used to generate random numbers for error gate and + // measurement. + explicit circuit_simulator(size_t num_qubits, size_t seed = 42) + : sim_tableau(num_qubits), sim_record(), rng() { + rng.seed(seed); + } + + // do a quantum gate except measurement gate + template + result unpack_vector(auto f, tableau& t, auto& vec, + std::index_sequence) { + + // TODO: maybe can be better + return std::get(f)(t, vec[S]...); + } + + template + result unpack_vector(auto f, tableau& t, auto& vec) { + if (vec.size() != vec_size) + throw std::runtime_error("wrong number of qubits for gate"); + return unpack_vector(f, t, vec, + std::make_index_sequence()); + } + + // do a measurement gate + template + result unpack_vector(auto f, tableau& t, auto& vec, + std::mt19937_64& rng, std::index_sequence) { + + // TODO: maybe can be better + return std::get(f)(t, rng, vec[S]...); + } + + template + result unpack_vector(auto f, tableau& t, auto& vec, + std::mt19937_64& rng) { + if (vec.size() != vec_size) + throw std::runtime_error("wrong number of qubits for gate"); + return unpack_vector(f, t, vec, rng, std::make_index_sequence()); + } + + // do a quantum circuit, check the max qubit of the quantum circuit, if it is + // larger than the number of qubits of the tableau, expand the tableau + void do_circuit(const quantum_circuit& qc) { + if (qc.max_qubit() >= sim_tableau.num_qubits) { + sim_tableau.expand(qc.max_qubit(), 1.0); + std::cerr << "WARNING: expanding tableau to " << qc.max_qubit() + << " qubits\n"; + } + + // according quantum circuit instruction type to do the instruction + qc.for_each_circuit_instruction([&](const circuit_instruction& ci) { + if (!gate_map.contains(ci.gate + "_gate")) { + throw std::runtime_error("unknown gate"); + } + + auto pair = gate_map[ci.gate + "_gate"]; + if (SINGLE_QUBIT_GATE == pair.first) { + unpack_vector<1>(pair.second, sim_tableau, ci.targets); + } else if (TWO_QUBIT_GATE == pair.first) { + unpack_vector<2>(pair.second, sim_tableau, ci.targets); + } else if (COLLAPSING_GATE == pair.first) { + auto record = + unpack_vector<1>(pair.second, sim_tableau, ci.targets, rng); + if (record.has_value()) + sim_record.record(ci.targets[0], static_cast(ci.args[0]), + record.value()); + } else if (ERROR_QUBIT_GATE == pair.first) { + std::bernoulli_distribution d(ci.args[0]); + if (d(rng)) + unpack_vector<1>(pair.second, sim_tableau, ci.targets); + } else { + throw std::runtime_error("unknown gate"); + } + }); + } + + // do a quantum circuit instruction + void do_circuit_instruction(const circuit_instruction& ci) { + + if (!gate_map.contains(ci.gate + "_gate")) { + throw std::runtime_error("unknown gate"); + } + + auto pair = gate_map[ci.gate + "_gate"]; + + if (SINGLE_QUBIT_GATE == pair.first) { + unpack_vector<1>(pair.second, sim_tableau, ci.targets); + } else if (TWO_QUBIT_GATE == pair.first) { + unpack_vector<2>(pair.second, sim_tableau, ci.targets); + } else if (COLLAPSING_GATE == pair.first) { + auto record = unpack_vector<1>(pair.second, sim_tableau, ci.targets, rng); + if (record.has_value()) + sim_record.record(ci.targets[0], static_cast(ci.args[0]), + record.value()); + } else if (ERROR_QUBIT_GATE == pair.first) { + std::bernoulli_distribution d(ci.args[0]); + if (d(rng)) + unpack_vector<1>(pair.second, sim_tableau, ci.targets); + } else { + throw std::runtime_error("unknown gate"); + } + } + + // sample the quantum circuit, after each iteration, reset the tableau to + // identity + void sample(const quantum_circuit& qc, size_t num_samples) { + for (size_t i = 0; i < num_samples; i++) { + reset_tableau(); + do_circuit(qc); + } + } + + // reset the tableau to identity + void reset_tableau() { sim_tableau.reset(); } + + // z-basis reset + void reset(size_t qubit) { sim_tableau.reset(rng, qubit); } + + // // x-basis reset + // void reset_x(size_t qubit) { sim_tableau.reset_x(rng, qubit); } + + // // y-basis reset + // void reset_y(size_t qubit) { sim_tableau.reset_y(rng, qubit); } + + // measurement record size + size_t record_size() { return sim_record.size(); } + + // get the measurement record + auto current_measurement_record() const { return sim_record; } + + auto measure_all() { + measurement_record mr; + for (size_t i = 0; i < sim_tableau.num_qubits; i++) { + auto record = sim_tableau.m_gate(rng, i); + if (record.has_value()) + mr.record(record.value()); + } + return mr; + } +}; + +#endif diff --git a/src/qfvm_clifford/gate_list.h b/src/qfvm_clifford/gate_list.h new file mode 100644 index 0000000..3d74a18 --- /dev/null +++ b/src/qfvm_clifford/gate_list.h @@ -0,0 +1,246 @@ +#include "gate_macro.h" +#include "tableau.h" +#include + +SINGLE_QUBIT_GATE(h, { + t.distabilizer[qubit].swap(t.stabilizer[qubit]); + return {}; +}) + +SINGLE_QUBIT_GATE(i, { return {}; }) + +SINGLE_QUBIT_GATE(x, { + t.stabilizer[qubit].sign ^= true; + return {}; +}) + +SINGLE_QUBIT_GATE(y, { + t.distabilizer[qubit].sign ^= true; + t.stabilizer[qubit].sign ^= true; + return {}; +}) + +SINGLE_QUBIT_GATE(z, { + t.distabilizer[qubit].sign ^= true; + return {}; +}) + +ERROR_QUBIT_GATE(x_error, { + t.stabilizer[qubit].sign ^= true; + return {}; +}) + +ERROR_QUBIT_GATE(y_error, { + t.distabilizer[qubit].sign ^= true; + t.stabilizer[qubit].sign ^= true; + return {}; +}) + +ERROR_QUBIT_GATE(z_error, { + t.distabilizer[qubit].sign ^= true; + return {}; +}) + +SINGLE_QUBIT_GATE(h_yz, { + t.stabilizer[qubit].mul_ignore_anti_commute(t.distabilizer[qubit]); + t.z_gate(qubit); + return {}; +}) + +SINGLE_QUBIT_GATE(s_dag, { + t.distabilizer[qubit].mul_ignore_anti_commute(t.stabilizer[qubit]); + return {}; +}) + +SINGLE_QUBIT_GATE(s, { + t.s_dag_gate(qubit); + t.z_gate(qubit); + return {}; +}) + +// control, target +TWO_QUBIT_GATE(cnot, { + t.stabilizer[qubit2] *= t.stabilizer[qubit1]; + t.distabilizer[qubit1] *= t.distabilizer[qubit2]; + return {}; +}) + +// control, target +TWO_QUBIT_GATE(cx, { + t.stabilizer[qubit2] *= t.stabilizer[qubit1]; + t.distabilizer[qubit1] *= t.distabilizer[qubit2]; + return {}; +}) + +TWO_QUBIT_GATE(swap, { + t.stabilizer[qubit1].swap(t.stabilizer[qubit2]); + t.distabilizer[qubit1].swap(t.distabilizer[qubit2]); + return {}; +}) + +// Z-basis measurement. Projects each target qubit into |0> or |1> and reports +// its value (false=|0>, true=|1>). +COLLAPSING_GATE(m, { + if (!t.is_deterministic_z(qubit)) { + + tableau_trans t_trans(t); + t.collapse_qubit_along_z(t_trans, qubit, rng); + } + + return t.stabilizer.signs[qubit]; +}) + +// Z-basis measurement. Projects each target qubit into |0> or |1> and reports +// its value (false=|0>, true=|1>). +COLLAPSING_GATE(measure, { + if (!t.is_deterministic_z(qubit)) { + + tableau_trans t_trans(t); + t.collapse_qubit_along_z(t_trans, qubit, rng); + } + + return t.stabilizer.signs[qubit]; +}) + +// Z-basis measurement. Projects each target qubit into |0> or |1> and reports +// its value (false=|0>, true=|1>). +COLLAPSING_GATE(mz, { + if (!t.is_deterministic_z(qubit)) { + + tableau_trans t_trans(t); + t.collapse_qubit_along_z(t_trans, qubit, rng); + } + + return t.stabilizer.signs[qubit]; +}) + +// Y-basis measurement. Projects each target qubit into |i> or |-i> and reports +// its value (false=|i>, true=|-i>). +COLLAPSING_GATE(my, { + if (!t.is_deterministic_y(qubit)) { + + t.h_gate(qubit); + tableau_trans t_trans(t); + t.collapse_qubit_along_z(t_trans, qubit, rng); + t.h_gate(qubit); + } + + return t.eval_y_obs(qubit).sign; +}) + +// X-basis measurement. Projects each target qubit into |+> or |-> and reports +// its value (false=|+>, true=|->). +COLLAPSING_GATE(mx, { + if (!t.is_deterministic_x(qubit)) { + + t.h_yz_gate(qubit); + tableau_trans t_trans(t); + t.collapse_qubit_along_z(t_trans, qubit, rng); + t.h_yz_gate(qubit); + } + + return t.distabilizer.signs[qubit]; +}) + +// Z-basis reset. Forces each target qubit into the |0> state by silently +// measuring it in the Z basis and applying an X gate if it ended up in the |1> +// state. +COLLAPSING_GATE(r, { + // Collapse the qubits to be reset. + if (!t.is_deterministic_z(qubit)) { + + tableau_trans t_trans(t); + t.collapse_qubit_along_z(t_trans, qubit, rng); + } + + // Force the collapsed qubits into the ground state. + t.distabilizer.signs[qubit] = false; + t.stabilizer.signs[qubit] = false; + + return {}; +}) + +// Z-basis reset. Forces each target qubit into the |0> state by silently +// measuring it in the Z basis and applying an X gate if it ended up in the |1> +// state. +COLLAPSING_GATE(reset, { + // Collapse the qubits to be reset. + if (!t.is_deterministic_z(qubit)) { + + tableau_trans t_trans(t); + t.collapse_qubit_along_z(t_trans, qubit, rng); + } + + // Force the collapsed qubits into the ground state. + t.distabilizer.signs[qubit] = false; + t.stabilizer.signs[qubit] = false; + + return {}; +}) + +// // Z-basis reset. Forces each target qubit into the |0> state by silently +// // measuring it in the Z basis and applying an X gate if it ended up in the +// |1> +// // state. +// COLLAPSING_GATE(rz, { +// // Collapse the qubits to be reset. +// if (!t.is_deterministic_z(qubit)) { + +// tableau_trans t_trans(t); +// t.collapse_qubit_along_z(t_trans, qubit, rng); +// } + +// // Force the collapsed qubits into the ground state. +// t.distabilizer.signs[qubit] = false; +// t.stabilizer.signs[qubit] = false; + +// return {}; +// }) + +// // X-basis reset. Forces each target qubit into the |+> state by silently +// // measuring it in the X basis and applying a Z gate if it ended up in the +// |-> +// // state. +// COLLAPSING_GATE(rx, { +// // Collapse the qubits to be reset. +// if (!t.is_deterministic_x(qubit)) { + +// t.h_yz_gate(qubit); +// tableau_trans t_trans(t); +// t.collapse_qubit_along_z(t_trans, qubit, rng); +// t.h_yz_gate(qubit); +// } + +// // Force the collapsed qubits into the ground state. +// t.distabilizer.signs[qubit] = false; +// t.stabilizer.signs[qubit] = false; + +// return {}; +// }) + +// // Y-basis reset. Forces each target qubit into the |i> state by silently +// // measuring it in the Y basis and applying an X gate if it ended up in the +// |-i> +// // state. +// COLLAPSING_GATE(ry, { +// // Collapse the qubits to be reset. +// if (!t.is_deterministic_y(qubit)) { + +// t.h_gate(qubit); +// tableau_trans t_trans(t); +// t.collapse_qubit_along_z(t_trans, qubit, rng); +// t.h_gate(qubit); +// } + +// // Force the collapsed qubits into the ground state. +// t.distabilizer.signs[qubit] = false; +// t.stabilizer.signs[qubit] = false; +// t.stabilizer.signs[qubit] ^= t.eval_y_obs(qubit).sign; + +// return {}; +// }) + +#undef SINGLE_QUBIT_GATE +#undef TWO_QUBIT_GATE +#undef COLLAPSING_GATE +#undef ERROR_QUBIT_GATE diff --git a/src/qfvm_clifford/gate_macro.h b/src/qfvm_clifford/gate_macro.h new file mode 100644 index 0000000..4544bea --- /dev/null +++ b/src/qfvm_clifford/gate_macro.h @@ -0,0 +1,65 @@ +#include + +#ifdef FUNCTION_REGISTRATION +#define SINGLE_QUBIT_GATE(GATE_NAME, ...) \ + template \ + result GATE_NAME##_gate(tableau& t, const size_t qubit) \ + __VA_ARGS__ + +#define TWO_QUBIT_GATE(GATE_NAME, ...) \ + template \ + result GATE_NAME##_gate(tableau& t, const size_t qubit1, \ + const size_t qubit2) __VA_ARGS__ + +#define COLLAPSING_GATE(GATE_NAME, ...) \ + template \ + result GATE_NAME##_gate(tableau& t, std::mt19937_64& rng, \ + const size_t qubit) __VA_ARGS__ + +#define ERROR_QUBIT_GATE(GATE_NAME, ...) \ + template \ + result GATE_NAME##_gate(tableau& t, const size_t qubit) \ + __VA_ARGS__ +#endif + +#ifdef STRUCT_FUNCTION_REGISTRATION +#define SINGLE_QUBIT_GATE(GATE_NAME, ...) \ + result GATE_NAME##_gate(const size_t qubit) { \ + tableau& t = *this; \ + __VA_ARGS__ \ + } + +#define TWO_QUBIT_GATE(GATE_NAME, ...) \ + result GATE_NAME##_gate(const size_t qubit1, const size_t qubit2) { \ + tableau& t = *this; \ + __VA_ARGS__ \ + } + +#define COLLAPSING_GATE(GATE_NAME, ...) \ + result GATE_NAME##_gate(std::mt19937_64& rng, const size_t qubit) { \ + tableau& t = *this; \ + __VA_ARGS__ \ + } + +#define ERROR_QUBIT_GATE(GATE_NAME, ...) \ + result GATE_NAME##_gate(const size_t qubit) { \ + tableau& t = *this; \ + __VA_ARGS__ \ + } +#endif + +#ifdef GATE_MAP_REGISTRATION +#define SINGLE_QUBIT_GATE(GATE_NAME, ...) \ + {STRINGIZE(GATE_NAME##_gate), \ + {SINGLE_QUBIT_GATE, GATE_NAME##_gate}}, + +#define TWO_QUBIT_GATE(GATE_NAME, ...) \ + {STRINGIZE(GATE_NAME##_gate), {TWO_QUBIT_GATE, GATE_NAME##_gate}}, + +#define COLLAPSING_GATE(GATE_NAME, ...) \ + {STRINGIZE(GATE_NAME##_gate), {COLLAPSING_GATE, GATE_NAME##_gate}}, + +#define ERROR_QUBIT_GATE(GATE_NAME, ...) \ + {STRINGIZE(GATE_NAME##_gate), \ + {ERROR_QUBIT_GATE, GATE_NAME##_gate}}, +#endif diff --git a/src/qfvm_clifford/packed_bit_word.h b/src/qfvm_clifford/packed_bit_word.h new file mode 100644 index 0000000..5cb3fab --- /dev/null +++ b/src/qfvm_clifford/packed_bit_word.h @@ -0,0 +1,350 @@ +#ifndef PACKED_BIT_WORD_H_ +#define PACKED_BIT_WORD_H_ + +#include "bit.h" +#include "bit_word.h" +#include "packed_bit_word_slice.h" +#include +#include +#include +#include +#include +#include + +// Pack bit_words, allocated with alignment and padding enabling SIMD +// instructions. Due to padding, the smallest tableaus are 256 bits. For +// performance tableau_element does not store "intendend" size. Only store the +// padded size. +template struct packed_bit_word { + size_t num_bit_words; + union { + uint8_t* u8; + uint64_t* u64; + bit_word* bw; + }; + + explicit packed_bit_word(size_t num_bits) + : num_bit_words(bits_to_word_padded(num_bits)), + u64(malloc_aligned_padded(num_bits)) {} + + ~packed_bit_word() { + if (nullptr != u64) { + bit_word::aligned_free(u64); + u64 = nullptr; + num_bit_words = 0; + } + } + + // copy constructor + packed_bit_word(const packed_bit_word& other) + : num_bit_words(other.num_bit_words), + u64(malloc_aligned_padded(other.num_bit_words * word_size)) { + memcpy(u8, other.u8, other.num_bit_words * word_size / 8); + } + + packed_bit_word(const packed_bit_word_slice& other) + : num_bit_words(other.num_bit_words), + u64(malloc_aligned_padded(other.num_bit_words * word_size)) { + memcpy(u8, other.u8, other.num_bit_words * word_size / 8); + } + + // move constructor, is not allowed to throw generally + packed_bit_word(packed_bit_word&& other) noexcept + : num_bit_words(other.num_bit_words), u64(other.u64) { + other.u64 = nullptr; + other.num_bit_words = 0; + } + + // copy assignment, deep copy + packed_bit_word& operator=(const packed_bit_word& other) { + return *this = packed_bit_word_slice(other); + } + + packed_bit_word& + operator=(const packed_bit_word_slice& other) { + if (num_bit_words == other.num_bit_words) { + // avoid re-allocating memory + packed_bit_word_slice(*this) = other; + return *this; + } + + this->~packed_bit_word(); + new (this) packed_bit_word(other); + return *this; + } + + // move assignment + packed_bit_word& operator=(const packed_bit_word&& other) noexcept { + this->~packed_bit_word(); + new (this) packed_bit_word(std::move(other)); + return *this; + } + + // equality + bool operator==(const packed_bit_word& other) const { + return num_bit_words == other.num_bit_words && + memcmp(bw, other.bw, word_size * num_bit_words / 8) == 0; + } + + bool operator==(const packed_bit_word_slice& other) const { + return num_bit_words == other.num_bit_words && + memcmp(bw, other.bw, word_size * num_bit_words / 8) == 0; + } + + // inequality + bool operator!=(const packed_bit_word& other) const { + return !(*this == other); + } + + bool operator!=(const packed_bit_word_slice& other) const { + return !(*this == other); + } + + // convert packed_bit_word to packed_bit_word_slice + operator packed_bit_word_slice() { + return packed_bit_word_slice(bw, num_bit_words); + } + + operator const packed_bit_word_slice() const { + return packed_bit_word_slice(bw, num_bit_words); + } + + // index operation + bit operator[](size_t index) { return bit(u64, index); } + const bit operator[](size_t index) const { return bit(u64, index); } + + // assignment + packed_bit_word& + operator^=(const packed_bit_word& other) { + packed_bit_word_slice(*this) ^= + packed_bit_word_slice(other); + return *this; + } + + packed_bit_word + operator&=(const packed_bit_word& other) { + packed_bit_word_slice(*this) &= + packed_bit_word_slice(other); + return *this; + } + + packed_bit_word + operator|=(const packed_bit_word& other) { + packed_bit_word_slice(*this) |= + packed_bit_word_slice(other); + return *this; + } + + packed_bit_word& + operator+=(const packed_bit_word& other) { + size_t num_u64 = (num_bit_words * word_size) >> 6; + for (size_t i = 0; i < num_u64 - 1; ++i) { + u64[i] += other.u64[i]; + // carry + u64[i + 1] += (u64[i] < other.u64[i]); + } + u64[num_u64 - 1] += other.u64[num_u64 - 1]; + return *this; + } + + // compare operation + bool operator<(const packed_bit_word& other) const { + return packed_bit_word_slice(*this) < + packed_bit_word_slice(other); + } + + // shift operator + packed_bit_word& operator>>=(int offset) { + packed_bit_word_slice(*this) >>= offset; + return *this; + } + + packed_bit_word& operator<<=(int offset) { + packed_bit_word_slice(*this) <<= offset; + return *this; + } + + // convert packed_bit_word to string + operator std::string() const { + std::stringstream ss; + ss << *this; + return ss.str(); + } + + // swap two packed_bit_word + void swap(packed_bit_word other) { + packed_bit_word_slice(*this).swap( + packed_bit_word_slice(other)); + } + + // slice operator, offset and num_bit_words should be the number of bit words + inline packed_bit_word_slice slice(size_t offset, + size_t num_bit_words) { + return packed_bit_word_slice(bw + offset, num_bit_words); + } + + // slice operator, offset and num_bit_words should be the number of bit words + inline const packed_bit_word_slice + slice(size_t offset, size_t num_bit_words) const { + return packed_bit_word_slice(bw + offset, num_bit_words); + } + + // determine the list of bit_word is not all zero + bool is_not_all_zero() const { + return packed_bit_word_slice(*this).is_not_all_zero(); + } + + // generate random packed_bit_word + static packed_bit_word random(size_t num_bits, + std::mt19937_64& rng) { + packed_bit_word result(num_bits); + result.randomize(num_bits, rng); + return result; + } + + // if num_bits != this.num_bit_words * word_size, this function will change + // data from low to high + void randomize(size_t num_bits, std::mt19937_64& rng) { + auto num_u64 = num_bits >> 6; + for (size_t k = 0; k < num_u64; ++k) + u64[k] = rng(); + + auto remaining_bits = num_bits & 63; + if (remaining_bits) { + auto mask = (uint64_t(1) << remaining_bits) - 1; + u64[num_u64] &= ~mask; + u64[num_u64] |= rng() & mask; + } + } + + void truncated_overwrite_from(packed_bit_word_slice& other, + size_t num_bits) { + size_t num_u8 = num_bits >> 3; + memcpy(u8, other.u8, num_u8); + auto remaining_bits = num_bits & 7; + if (remaining_bits) { + auto mask = (uint8_t(1) << remaining_bits) - 1; + u8[num_u8] &= ~mask; + u8[num_u8] |= other.u8[num_u8] & mask; + } + } + + // count the number of 1s + size_t count() { return packed_bit_word_slice(*this).count(); } + + // malloc aligned padded + uint64_t* malloc_aligned_padded(size_t bits) { + size_t num_u8 = bits_to_bits_padded(bits); + void* result = bit_word::aligned_malloc(num_u8); + memset(result, 0, num_u8); + return reinterpret_cast(result); + } +}; + +// bit operations +template +packed_bit_word +operator^(const packed_bit_word_slice& left, + const packed_bit_word_slice& right) { + assert(left.num_bit_words == right.num_bit_words); + packed_bit_word result(left.num_bit_words); + packed_bit_word_slice(result).for_each_word( + left, right, [](auto& a, auto& b, auto& c) { a = b ^ c; }); + return result; +} + +template +packed_bit_word operator^(const packed_bit_word& left, + const packed_bit_word& right) { + return packed_bit_word_slice(left) ^ + packed_bit_word_slice(right); +} + +template +packed_bit_word +operator^(const packed_bit_word& left, + const packed_bit_word_slice& right) { + return packed_bit_word_slice(left) ^ right; +} + +template +packed_bit_word +operator^(const packed_bit_word_slice& left, + const packed_bit_word& right) { + return left ^ packed_bit_word_slice(right); +} + +template +packed_bit_word +operator&(const packed_bit_word_slice& left, + const packed_bit_word_slice& right) { + assert(left.num_bit_words == right.num_bit_words); + packed_bit_word result(left.num_bit_words); + packed_bit_word_slice(result).for_each_word( + left, right, [](auto& a, auto& b, auto& c) { a = b & c; }); + return result; +} + +template +packed_bit_word operator&(const packed_bit_word& left, + const packed_bit_word& right) { + return packed_bit_word_slice(left) & + packed_bit_word_slice(right); +} + +template +packed_bit_word +operator&(const packed_bit_word& left, + const packed_bit_word_slice& right) { + return packed_bit_word_slice(left) & right; +} + +template +packed_bit_word +operator&(const packed_bit_word_slice& left, + const packed_bit_word& right) { + return left & packed_bit_word_slice(right); +} + +template +packed_bit_word +operator|(const packed_bit_word_slice& left, + const packed_bit_word_slice& right) { + assert(left.num_bit_words == right.num_bit_words); + packed_bit_word result(left.num_bit_words); + packed_bit_word_slice(result).for_each_word( + left, right, [](auto& a, auto& b, auto& c) { a = b | c; }); + return result; +} + +template +packed_bit_word operator|(const packed_bit_word& left, + const packed_bit_word& right) { + return packed_bit_word_slice(left) | + packed_bit_word_slice(right); +} + +template +packed_bit_word +operator|(const packed_bit_word& left, + const packed_bit_word_slice& right) { + return packed_bit_word_slice(left) | right; +} + +template +packed_bit_word +operator|(const packed_bit_word_slice& left, + const packed_bit_word& right) { + return left | packed_bit_word_slice(right); +} + +template +std::ostream& operator<<(std::ostream& os, + const packed_bit_word word) { + for (size_t i = 0; i < word.num_bit_words * word_size; ++i) + os << "_1"[word[i]]; + + return os; +} + +#endif diff --git a/src/qfvm_clifford/packed_bit_word_slice.h b/src/qfvm_clifford/packed_bit_word_slice.h new file mode 100644 index 0000000..6041225 --- /dev/null +++ b/src/qfvm_clifford/packed_bit_word_slice.h @@ -0,0 +1,307 @@ +#ifndef PACKED_BIT_WORD_SLICE_H_ +#define PACKED_BIT_WORD_SLICE_H_ + +#include "bit.h" +#include "bit_word.h" +#include "utils.h" +#include +#include +#include +#include +#include +#include + +// reference to a slice of a packed bit word +template struct packed_bit_word_slice { + const size_t num_bit_words; + + union { + uint8_t* u8; + uint64_t* u64; + bit_word* bw; + }; + + packed_bit_word_slice(bit_word* bw, size_t num_bit_words) + : num_bit_words(num_bit_words), bw(bw) {} + + // assignment operator + packed_bit_word_slice& + operator=(const packed_bit_word_slice& other) { + memcpy(u8, other.u8, num_bit_words * word_size / 8); + return *this; + } + + packed_bit_word_slice& + operator^=(const packed_bit_word_slice& other) { + for_each_word(other, [](auto& a, auto& b) { a ^= b; }); + return *this; + } + + packed_bit_word_slice& + operator&=(const packed_bit_word_slice& other) { + for_each_word(other, [](auto& a, auto& b) { a &= b; }); + return *this; + } + + packed_bit_word_slice& + operator|=(const packed_bit_word_slice& other) { + for_each_word(other, [](auto& a, auto& b) { a |= b; }); + return *this; + } + + packed_bit_word_slice& + operator+=(const packed_bit_word_slice& other) { + size_t num_u64 = (num_bit_words * word_size) >> 6; + for (size_t i = 0; i < num_u64 - 1; ++i) { + u64[i] += other.u64[i]; + // carry + u64[i + 1] += (u64[i] < other.u64[i]); + } + u64[num_u64 - 1] += other.u64[num_u64 - 1]; + return *this; + } + + packed_bit_word_slice& operator>>=(int offset) { + uint64_t incoming_word; + uint64_t current_word; + + if (0 == offset) + return *this; + + // move right every u64 by offset + while (64 <= offset) { + incoming_word = 0; + for (int w = ((num_bit_words * word_size) >> 6) - 1; w >= 0; w--) { + current_word = u64[w]; + u64[w] = incoming_word; + incoming_word = current_word; + } + offset -= 64; + } + + if (0 == offset) + return *this; + + incoming_word = 0; + for (int w = ((num_bit_words * word_size) >> 6) - 1; w >= 0; w--) { + current_word = u64[w]; + // move right + u64[w] >>= offset; + // add high bits + u64[w] |= incoming_word << (64 - offset); + // update next incoming word + incoming_word = current_word & ((uint64_t(1) << offset) - 1); + } + + return *this; + } + + packed_bit_word_slice& operator<<=(int offset) { + uint64_t incoming_word; + uint64_t current_word; + + if (0 == offset) + return *this; + + // move left every u64 by offset + while (64 <= offset) { + incoming_word = 0; + for (uint64_t w = 0; w < (num_bit_words * word_size) >> 6; w++) { + current_word = u64[w]; + u64[w] = incoming_word; + incoming_word = current_word; + } + offset -= 64; + } + + if (0 == offset) + return *this; + + incoming_word = 0; + for (uint64_t w = 0; w < (num_bit_words * word_size) >> 6; w++) { + current_word = u64[w]; + // move left + u64[w] <<= offset; + // add low bits + u64[w] |= incoming_word; + // update next incoming word + incoming_word = current_word >> (64 - offset); + } + + return *this; + } + + // equality operator + bool operator==(const packed_bit_word_slice& other) const { + return num_bit_words == other.num_bit_words && + 0 == memcmp(bw, other.bw, num_bit_words * word_size / 8); + } + + bool operator!=(const packed_bit_word_slice& other) const { + return !(*this == other); + } + + // index operator + inline bit operator[](size_t index) { return bit(u8, index); } + + inline const bit operator[](size_t index) const { return bit(u8, index); } + + // convert packed bit word slice to string + operator std::string() const { + std::stringstream ss; + ss << *this; + return ss.str(); + } + + void swap(packed_bit_word_slice other) { + for_each_word(other, [](auto& a, auto& b) { std::swap(a, b); }); + } + + // slice operator, offset and num_bit_words should be the number of bit words + inline packed_bit_word_slice slice(size_t offset, + size_t num_bit_words) { + return packed_bit_word_slice(bw + offset, num_bit_words); + } + + inline const packed_bit_word_slice + slice(size_t offset, size_t num_bit_words) const { + return packed_bit_word_slice(bw + offset, num_bit_words); + } + + // determine the list of bit_word is not all zero + bool is_not_all_zero() const { + bit_word res{}; + for_each_word([&res](auto& a) { res |= a; }); + return bool(res); + } + + // if num_bits != this.num_bit_words * word_size, this function will change + // data from low to high + void randomize(size_t num_bits, std::mt19937_64& rng) { + auto num_u64 = num_bits >> 6; + for (size_t k = 0; k < num_u64; ++k) + u64[k] = rng(); + + auto remaining_bits = num_bits & 63; + if (remaining_bits) { + auto mask = (uint64_t(1) << remaining_bits) - 1; + u64[num_u64] &= ~mask; + u64[num_u64] |= rng() & mask; + } + } + + // write the truncated data from other to this + void truncated_overwrite_from(packed_bit_word_slice other, + size_t num_bits) { + size_t num_u8 = num_bits >> 3; + memcpy(u8, other.u8, num_u8); + auto remaining_bits = num_bits & 7; + if (remaining_bits) { + auto mask = (uint8_t(1) << remaining_bits) - 1; + u8[num_u8] &= ~mask; + u8[num_u8] |= other.u8[num_u8] & mask; + } + } + + // count the number of 1s + template size_t count() const { + auto end = u64 + (num_bit_words * word_size >> 6); + size_t result = 0; + for (const uint64_t* p = u64; p != end; p++) { + result += count_uint64_bits(*p); + } + return result; + } + + template inline void for_each_word(func f) const { + auto* bw_start = bw; + auto* bw_end = bw + num_bit_words; + while (bw_start != bw_end) { + f(*bw_start); + ++bw_start; + } + } + + template + inline void for_each_word(packed_bit_word_slice other, + func f) const { + auto* bw_start = bw; + auto* bw_end = bw + num_bit_words; + auto* other_bw_start = other.bw; + while (bw_start != bw_end) { + f(*bw_start, *other_bw_start); + ++bw_start; + ++other_bw_start; + } + } + + template + inline void for_each_word(packed_bit_word_slice other1, + packed_bit_word_slice other2, + func f) const { + auto* bw_start = bw; + auto* bw_end = bw + num_bit_words; + auto* other1_bw_start = other1.bw; + auto* other2_bw_start = other2.bw; + while (bw_start != bw_end) { + f(*bw_start, *other1_bw_start, *other2_bw_start); + ++bw_start; + ++other1_bw_start; + ++other2_bw_start; + } + } + + template + inline void for_each_word(packed_bit_word_slice other1, + packed_bit_word_slice other2, + packed_bit_word_slice other3, + func f) const { + auto* bw_start = bw; + auto* bw_end = bw + num_bit_words; + auto* other1_bw_start = other1.bw; + auto* other2_bw_start = other2.bw; + auto* other3_bw_start = other3.bw; + while (bw_start != bw_end) { + f(*bw_start, *other1_bw_start, *other2_bw_start, *other3_bw_start); + ++bw_start; + ++other1_bw_start; + ++other2_bw_start; + ++other3_bw_start; + } + } + + template + inline void for_each_word(packed_bit_word_slice other1, + packed_bit_word_slice other2, + packed_bit_word_slice other3, + packed_bit_word_slice other4, + func f) const { + auto* bw_start = bw; + auto* bw_end = bw + num_bit_words; + auto* other1_bw_start = other1.bw; + auto* other2_bw_start = other2.bw; + auto* other3_bw_start = other3.bw; + auto* other4_bw_start = other4.bw; + while (bw_start != bw_end) { + f(*bw_start, *other1_bw_start, *other2_bw_start, *other3_bw_start, + *other4_bw_start); + ++bw_start; + ++other1_bw_start; + ++other2_bw_start; + ++other3_bw_start; + ++other4_bw_start; + } + } +}; + +// output operator for packed bit word slice +template +std::ostream& operator<<(std::ostream& os, + const packed_bit_word_slice& word) { + for (size_t i = 0; i < word.num_bit_words * word_size; ++i) + os << "_1"[word[i]]; + + return os; +} + +#endif diff --git a/src/qfvm_clifford/pauli.h b/src/qfvm_clifford/pauli.h new file mode 100644 index 0000000..4e7604a --- /dev/null +++ b/src/qfvm_clifford/pauli.h @@ -0,0 +1,176 @@ +#ifndef PAULI_H_ +#define PAULI_H_ + +#include "bit.h" +#include "packed_bit_word.h" +#include "pauli_slice.h" +#include "table.h" +#include +#include +#include +#include +#include +#include +#include + +// a pauli string is a product of pauli operators (I, X, Y, Z) on n qubits +template struct pauli_string { + // the length of the pauli string + size_t num_qubits; + + // whether the pauli string is a negative, true if negative and false if + // positive + bool sign; + + // the paulis in the pauli string, paulis are encoded by xz (I=00, X=10, Y=11, + // Z=01) + packed_bit_word xs, zs; + + explicit pauli_string(size_t n) : num_qubits(n), sign(false), xs(n), zs(n) {} + + explicit pauli_string(std::string& str) + : num_qubits(0), sign(false), xs(0), zs(0) { + *this = std::move(from_cstr(str.c_str())); + } + + // copy constructor + pauli_string(const pauli_string& other) + : num_qubits(other.num_qubits), sign(other.sign), xs(other.xs), + zs(other.zs) {} + + pauli_string(const pauli_string_slice& other) + : num_qubits(other.num_qubits), sign(other.sign), xs(other.xs), + zs(other.zs) {} + + // move constructor + pauli_string(pauli_string&& other) noexcept + : num_qubits(other.num_qubits), sign(other.sign), xs(std::move(other.xs)), + zs(std::move(other.zs)) {} + + // copy assignment + pauli_string& operator=(const pauli_string& other) { + this->num_qubits = other.num_qubits, this->sign = other.sign; + this->xs = other.xs, this->zs = other.zs; + + return *this; + } + + pauli_string& + operator=(const pauli_string_slice& other) { + this->num_qubits = other.num_qubits, this->sign = other.sign; + this->xs = other.xs, this->zs = other.zs; + + return *this; + } + + // move assignment + pauli_string& operator=(pauli_string&& other) { + this->~pauli_string(); + new (this) pauli_string(std::move(other)); + + return *this; + } + + // equality operator + bool operator==(const pauli_string& other) const { + return num_qubits == other.num_qubits && bool(sign) == bool(other.sign) && + xs == other.xs && zs == other.zs; + } + + bool operator==(const pauli_string_slice& other) const { + return num_qubits == other.num_qubits && bool(sign) == bool(other.sign) && + xs == other.xs && zs == other.zs; + } + + bool operator!=(const pauli_string& other) const { + return !(*this == other); + } + + bool operator!=(const pauli_string_slice& other) const { + return !(*this == other); + } + + // convert to pauli string slice + operator const pauli_string_slice() const { + return pauli_string_slice(num_qubits, bit((void*)&sign, 0), xs, + zs); + } + + operator pauli_string_slice() { + return pauli_string_slice(num_qubits, bit(&sign, 0), xs, zs); + } + + // convert pauli string to string + std::string str() const { return std::string(*this); } + + operator std::string() const { + std::stringstream ss; + ss << *this; + return ss.str(); + } + + // generate a random pauli string + static pauli_string random(size_t num_qubits, + std::mt19937_64& rng) { + auto result = pauli_string(num_qubits); + result.xs.randomize(num_qubits, rng); + result.zs.randomize(num_qubits, rng); + result.sign ^= rng() & 1; + return result; + } + + // parse char one by one + static pauli_string + parse_cstr(size_t num_qubits, bool sign, + const std::function& func) { + pauli_string ret(num_qubits); + ret.sign = sign; + + for (size_t i = 0; i < num_qubits; i++) { + bool x, z; + switch (func(i)) { + case 'X': + x = true, z = false; + break; + case 'Y': + x = true, z = true; + break; + case 'Z': + x = false, z = true; + break; + case 'I': + x = false, z = false; + break; + default: + throw std::invalid_argument("can not parse character: " + + std::to_string(func(i))); + } + + ret.xs.u64[i / 64] ^= (uint64_t)x << (i & 63); + ret.zs.u64[i / 64] ^= (uint64_t)z << (i & 63); + } + + return ret; + } + + // convert c style string to pauli string + static pauli_string from_cstr(const char* cstr) { + // default is positive + auto sign = cstr[0] == '-'; + if ('-' == cstr[0] || '+' == cstr[0]) + cstr++; + + return parse_cstr(strlen(cstr), sign, [&](size_t i) { return cstr[i]; }); + } +}; + +template +std::ostream& operator<<(std::ostream& os, const pauli_string& str) { + os << (str.sign ? '-' : '+'); + for (size_t i = 0; i < str.num_qubits; i++) { + os << "IXZY"[str.xs[i] | (str.zs[i] << 1)]; + } + return os; +} + +#endif diff --git a/src/qfvm_clifford/pauli_slice.h b/src/qfvm_clifford/pauli_slice.h new file mode 100644 index 0000000..eef0cc0 --- /dev/null +++ b/src/qfvm_clifford/pauli_slice.h @@ -0,0 +1,145 @@ +#ifndef PAULI_SLICE_H_ +#define PAULI_SLICE_H_ + +#include "bit.h" +#include "packed_bit_word_slice.h" +#include +#include +#include +#include +#include + +// reference to a slice of a pauli string +template struct pauli_string_slice { + size_t num_qubits; + bit sign; + packed_bit_word_slice xs, zs; + + // num_qubits should be the same with the number of padded bit_words, that + // means, (num_qubits + word_size - 1 / word_size) == xs.num_bit_words == + // zs.num_bit_words + pauli_string_slice(size_t num_qubits, bit sign, + packed_bit_word_slice xs, + packed_bit_word_slice zs) + : num_qubits(num_qubits), sign(sign), xs(xs), zs(zs) {} + + // assign operator + pauli_string_slice& + operator=(const pauli_string_slice& other) { + num_qubits = other.num_qubits; + sign = other.sign; + xs = other.xs; + zs = other.zs; + + return *this; + } + + // mulitply a commuting pauli string + pauli_string_slice& + operator*=(const pauli_string_slice& other) { + auto res = inplace_right_mul(other); + // must be commute + assert((res & 1) == 0); + // if the result phase is positive, then compute excluse-or of the signs + sign ^= res & 2; + return *this; + } + + // mulitply a pauli string, ignore anti-commuting terms + pauli_string_slice& + mul_ignore_anti_commute(const pauli_string_slice& other) { + auto res = inplace_right_mul(other); + // if the result phase is positive, then compute excluse-or of the signs + sign ^= res & 2; + return *this; + } + + // euqality operator + bool operator==(const packed_bit_word_slice& other) { + return num_qubits == other.num_qubits && sign == other.sign && + xs == other.xs && zs == other.zs; + } + + bool operator!=(const packed_bit_word_slice& other) { + return !(*this == other); + } + + // convert pauli string to string + operator std::string() const { + std::stringstream ss; + ss << *this; + return ss.str(); + } + + // swap operator, swap signs, x matrix and z matrix + void swap(pauli_string_slice other) { + sign.swap(other.sign); + xs.swap(other.xs); + zs.swap(other.zs); + } + + // Intuitively, this functionreturns the exponent to which the imaginary + // number i is raised when the Pauli matrices represented by x1z1 and x2z2 + // are multiplied together. For example, if x1 = z2 = 0 and z1 = x2 = 1 then + // Definition 2 shows that x1z1 and x2z2 represent Z and X, respectively. + // Multiplying Z and X together gives ZX = iY . Since the exponent on i is 1, + // the result of this function is 1. + // Returns: + // 0 if the product is 1 + // 1 if the product is i + // 2 if the product is -1 + // 3 if the product is -i + uint8_t + inplace_right_mul(const pauli_string_slice& other) noexcept { + bit_word count1{}; + bit_word count2{}; + + xs.for_each_word( + zs, other.xs, other.zs, + [&count1, &count2](auto& x1, auto& z1, auto& x2, auto& z2) { + // accumulate anti-commutation (+i or -i) counts + auto x1z2 = x1 & z2; + auto anti_commutes = (x2 & z1) ^ x1z2; + + // update left side pauli + x1 ^= x2; + z1 ^= z2; + + // accumulate anti-commutation (+i or -i) counts + count2 ^= (count1 ^ x1 ^ z1 ^ x1z2) & anti_commutes; + count1 ^= anti_commutes; + }); + + // combine final anti-commutation phase tally (mod 4) + auto s = count1.count(); + s ^= count2.count() << 1; + s ^= other.sign << 1; + return s & 3; + } + + // determines if the pauli string commutes with the given pauli string + bool commutes(const pauli_string_slice& other) const noexcept { + if (num_qubits > other.num_qubits) + return other.commutes(*this); + + bit_word count{}; + xs.for_each_word(zs, other.xs, other.zs, + [&count](auto& x1, auto& z1, auto& x2, auto& z2) { + count ^= (x1 & z2) ^ (x2 & z1); + }); + return (count.count() & 1) == 0; + } +}; + +template +std::ostream& operator<<(std::ostream& os, + const pauli_string_slice str) { + os << "+-"[str.sign]; + for (size_t i = 0; i < str.num_qubits; i++) { + os << "IXZY"[str.xs[i] | (str.zs[i] << 1)]; + } + + return os; +} + +#endif diff --git a/src/qfvm_clifford/span_ref.h b/src/qfvm_clifford/span_ref.h new file mode 100644 index 0000000..8214087 --- /dev/null +++ b/src/qfvm_clifford/span_ref.h @@ -0,0 +1,331 @@ +#ifndef SPAN_REF_H_ +#define SPAN_REF_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// A significant distinction between the semantics of this class and the +// std::span class introduced in C++20 is that this class defines equality and +// ordering operators based on *the content being pointed to* rather than the +// values of the pointers themselves. Two range references are not considered +// equal simply because they have identical pointers; they are deemed equal +// because they point to ranges with matching contents. In essence, this class +// behaves more like a *reference* rather than a pointer. + +template struct span_ref { + T* ptr_start; + T* ptr_end; + + span_ref() : ptr_start(nullptr), ptr_end(nullptr) {} + span_ref(T* begin, T* end) : ptr_start(begin), ptr_end(end) {} + + // Implicit conversions. + span_ref(T* singleton) : ptr_start(singleton), ptr_end(singleton + 1) {} + + span_ref(const span_ref::type>& other) + : ptr_start(other.ptr_start), ptr_end(other.ptr_end) {} + + span_ref(std::vector& items) + : ptr_start(items.data()), ptr_end(items.data() + items.size()) {} + + span_ref(const std::vector::type>& items) + : ptr_start(items.data()), ptr_end(items.data() + items.size()) {} + + template + span_ref(std::array& items) + : ptr_start(items.data()), ptr_end(items.data() + items.size()) {} + + template + span_ref(const std::array::type, K>& items) + : ptr_start(items.data()), ptr_end(items.data() + items.size()) {} + + span_ref sub(size_t start_offset, size_t end_offset) const { + return span_ref(ptr_start + start_offset, ptr_start + end_offset); + } + + size_t size() const { return ptr_end - ptr_start; } + + const T* begin() const { return ptr_start; } + + const T* end() const { return ptr_end; } + + const T& back() const { return *(ptr_end - 1); } + + const T& front() const { return *ptr_start; } + + bool empty() const { return ptr_end == ptr_start; } + + T* begin() { return ptr_start; } + + T* end() { return ptr_end; } + + T& back() { return *(ptr_end - 1); } + + T& front() { return *ptr_start; } + + const T& operator[](size_t index) const { return ptr_start[index]; } + + T& operator[](size_t index) { return ptr_start[index]; } + + bool operator==(const span_ref& other) const { + size_t n = size(); + if (n != other.size()) { + return false; + } + for (size_t k = 0; k < n; k++) { + if (ptr_start[k] != other[k]) { + return false; + } + } + return true; + } + + bool + operator==(const span_ref::type>& other) const { + return span_ref(ptr_start, ptr_end) == + span_ref(other.ptr_start, other.ptr_end); + } + + bool operator!=(const span_ref& other) const { + return !(*this == other); + } + + bool + operator!=(const span_ref::type>& other) const { + return !(*this == other); + } + + std::string str() const { + std::stringstream ss; + ss << *this; + return ss.str(); + } + + // Lexicographic ordering. + bool operator<(const span_ref& other) const { + auto n = std::min(size(), other.size()); + for (size_t k = 0; k < n; k++) { + if ((*this)[k] != other[k]) { + return (*this)[k] < other[k]; + } + } + return size() < other.size(); + } + + bool + operator<(const span_ref::type>& other) const { + return span_ref(ptr_start, ptr_end) < + span_ref(other.ptr_start, other.ptr_end); + } +}; + +// Wraps an iterable object so that its values are printed with comma +// separators. +template struct seperator; + +/// A wrapper indicating a range of values should be printed with comma +/// separators. +template struct seperator { + const t_iter& iter; + const char* sep; + std::string str() const { + std::stringstream out; + out << *this; + return out.str(); + } +}; + +template +seperator seperate(const t_iter& v, const char* sep = ", ") { + return seperator{v, sep}; +} + +template +std::ostream& operator<<(std::ostream& out, const seperator& v) { + bool first = true; + for (const auto& t : v.iter) { + if (first) { + first = false; + } else { + out << v.sep; + } + out << t; + } + return out; +} + +template +std::ostream& operator<<(std::ostream& out, const span_ref& v) { + out << "span_ref{" << seperate(v) << "}"; + return out; +} + +// A memory resource that efficiently accumulates data incrementally. + +// There are three important types of "region" involved: the tail region, the +// current region, and old regions. + +// The tail is used for adding contiguous data to the buffer in an incremental +// manner. When the tail exceeds the available storage, more memory is allocated +// and the tail is copied into it to maintain contiguity. The tail can be +// discarded or committed at any time. Discarding allows reusing the covered +// memory when writing the next tail. Committing permanently preserves the data +// (until clearing or deconstructing the monotonic buffer) and ensures that it +// will not move so that pointers to it can be stored. + +// The current region is a continuous block of memory where the tail is being +// written. If the tail grows beyond this region and triggers an allocation, +// then this current region becomes an old region, while newly allocated memory +// becomes the new current region. Each subsequent current region will have at +// least double size compared to its predecessor. + +// Old regions are finalized memory segments that will be retained until +// clearing or deconstructing of the buffer occurs. + +// some ref: https://zhuanlan.zhihu.com/p/96089089 +template struct monotonic_buffer { + + // Contiguous memory that is being appended to, but has not yet been + // committed. + span_ref tail; + + // The current contiguous memory region with a mix of committed, staged, and + // unused memory. + span_ref cur; + + // Old contiguous memory regions that have been committed and now need to be + // kept. + std::vector> old_areas; + + // Constructs an empty monotonic buffer. + monotonic_buffer() : tail(), cur(), old_areas() {} + + // Constructs an empty monotonic buffer with initial capacity for its current + // region. + monotonic_buffer(size_t reserve) { ensure_available(reserve); } + + void _soft_clear() { + cur.ptr_start = nullptr; + cur.ptr_end = nullptr; + tail.ptr_start = nullptr; + tail.ptr_end = nullptr; + old_areas.clear(); + } + + void _hard_clear() { + for (auto old : old_areas) { + free(old.ptr_start); + } + if (nullptr != cur.ptr_start) { + free(cur.ptr_start); + } + } + + ~monotonic_buffer() { _hard_clear(); } + + monotonic_buffer(monotonic_buffer&& other) noexcept + : tail(other.tail), cur(other.cur), + old_areas(std::move(other.old_areas)) { + other._soft_clear(); + } + + monotonic_buffer(const monotonic_buffer& other) = delete; + + monotonic_buffer& operator=(monotonic_buffer&& other) noexcept { + _hard_clear(); + cur = other.cur; + tail = other.tail; + old_areas = std::move(other.old_areas); + other._soft_clear(); + return *this; + } + + // Invalidates all previous data and resets the class into a clean state. + // Happens to keep the current contiguous memory region and free old regions. + void clear() { + for (auto old : old_areas) { + free(old.ptr_start); + } + old_areas.clear(); + tail.ptr_end = tail.ptr_start = cur.ptr_start; + } + + // Returns the size of memory allocated and held by this monotonic buffer (in + // units of sizeof(T)). + size_t total_allocated() const { + size_t result = cur.size(); + for (auto old : old_areas) { + result += old.size(); + } + return result; + } + + // Appends and commits data. + // Requires the tail to be empty, to avoid bugs where previously staged data + // is committed. + span_ref take_copy(span_ref data) { + assert(tail.size() == 0); + append_tail(data); + return commit_tail(); + } + + // Adds a staged data item. + void append_tail(T item) { + ensure_available(1); + *tail.ptr_end = item; + tail.ptr_end++; + } + + // Adds staged data. + void append_tail(span_ref data) { + ensure_available(data.size()); + std::copy(data.begin(), data.end(), tail.ptr_end); + tail.ptr_end += data.size(); + } + + // Throws away staged data, so its memory can be re-used. + void discard_tail() { tail.ptr_end = tail.ptr_start; } + + // Changes staged data into committed data that will be kept until the buffer + // is cleared or deconstructed. + span_ref commit_tail() { + span_ref result(tail); + tail.ptr_start = tail.ptr_end; + return result; + } + + // Ensures it is possible to stage at least `min_required` more items without + // more reallocations. + void ensure_available(size_t min_required) { + size_t available = cur.ptr_end - tail.ptr_end; + if (available >= min_required) { + return; + } + + size_t alloc_count = std::max(min_required, cur.size() << 1); + if (nullptr != cur.ptr_start) { + old_areas.push_back(cur); + } + cur.ptr_start = (T*)malloc(alloc_count * sizeof(T)); + cur.ptr_end = cur.ptr_start + alloc_count; + + // Staged data is not complete yet; keep it contiguous by copying it to the + // new larger memory region. + size_t tail_size = tail.size(); + if (tail_size) { + std::move(tail.ptr_start, tail.ptr_end, cur.ptr_start); + } + + tail = {cur.ptr_start, cur.ptr_start + tail_size}; + } +}; + +#endif diff --git a/src/qfvm_clifford/table.h b/src/qfvm_clifford/table.h new file mode 100644 index 0000000..e06fba4 --- /dev/null +++ b/src/qfvm_clifford/table.h @@ -0,0 +1,347 @@ +#ifndef TABLE_H_ +#define TABLE_H_ + +#include "bit.h" +#include "bit_word.h" +#include "packed_bit_word.h" +#include "packed_bit_word_slice.h" +#include "utils.h" +#include +#include +#include +#include +#include +#include + +// row-major table, padded and aligned to make table more efficient +// major represents the row (not contiguous in memory), minor represents the +// column (contiguous in memory), the smallest table is word_size * word_size +template struct table { + size_t num_bit_words_major; + size_t num_bit_words_minor; + + packed_bit_word data; + + table(size_t min_bits_major, size_t min_bits_minor) + : num_bit_words_major(bits_to_word_padded(min_bits_major)), + num_bit_words_minor(bits_to_word_padded(min_bits_minor)), + data(bits_to_bits_padded(min_bits_major) * + bits_to_bits_padded(min_bits_minor)) {} + + // index operator, major index should be the number of rows + inline packed_bit_word_slice operator[](size_t major_index) { + return data.slice(major_index * num_bit_words_minor, num_bit_words_minor); + } + + // index operator, major index should be the number of rows + inline const packed_bit_word_slice + operator[](size_t major_index) const { + return data.slice(major_index * num_bit_words_minor, num_bit_words_minor); + } + + // equality operator + bool operator==(const table& other) const { + return num_bit_words_major == other.num_bit_words_major && + num_bit_words_minor == other.num_bit_words_minor && + data == other.data; + } + + bool operator!=(const table& other) const { + return !(*this == other); + } + + // convert stablizer tableau to string + std::string str() const { return std::string(*this); } + + // for better printing + std::string str(size_t n) const { + std::stringstream ss; + for (size_t i = 0; i < n; i++) { + if (i) + ss << "\n"; + for (size_t j = 0; j < n; j++) + ss << "_1"[(*this)[i][j]]; + } + + return ss.str(); + } + + operator std::string() const { + std::stringstream ss; + ss << *this; + return ss.str(); + } + + // major_index is the index of bit_word in the row, minor_index is the index + // of bit_word in the column, major_index_sub is the index of bit in the + // bit_word + inline size_t get_index_bit_word(const size_t major_index, + const size_t minor_index, + const size_t major_index_sub) const { + auto index = + (major_index << bit_word::BIT_POW) + major_index_sub; + return index * num_bit_words_minor + minor_index; + } + + // transpose the table + table transpose() const { + table result(num_bit_words_minor * word_size, + num_bit_words_major * word_size); + + for (size_t major_word = 0; major_word < num_bit_words_major; + major_word++) { + for (size_t minor_word = 0; minor_word < num_bit_words_minor; + minor_word++) { + for (size_t major_word_sub = 0; major_word_sub < word_size; + major_word_sub++) { + size_t src_index = + get_index_bit_word(major_word, minor_word, major_word_sub); + size_t dst_index = + result.get_index_bit_word(minor_word, major_word, major_word_sub); + result.data.bw[dst_index] = data.bw[src_index]; + } + } + } + + // transpose the bit word block, the shape of the block is (word_size, + // word_size) + for (size_t major_word = 0; major_word < result.num_bit_words_major; + major_word++) { + for (size_t minor_word = 0; minor_word < result.num_bit_words_minor; + minor_word++) { + size_t block_start = + result.get_index_bit_word(major_word, minor_word, 0); + bit_word::inplace_transpose_square( + result.data.bw + block_start, result.num_bit_words_minor); + } + } + + return result; + } + + // inplace transpose, only for square matrix + table& inplace_transpose() { + + // transpose the bit word block, the shape of the block is (word_size, + // word_size) + for (size_t major_word = 0; major_word < num_bit_words_major; + major_word++) { + for (size_t minor_word = 0; minor_word < num_bit_words_minor; + minor_word++) { + size_t block_start = get_index_bit_word(major_word, minor_word, 0); + bit_word::inplace_transpose_square(data.bw + block_start, + num_bit_words_minor); + } + } + + // transpose the table + for (size_t major_word = 0; major_word < num_bit_words_major; major_word++) + for (size_t minor_word = major_word + 1; minor_word < num_bit_words_minor; + minor_word++) + for (size_t major_word_sub = 0; major_word_sub < word_size; + major_word_sub++) + std::swap(data.bw[get_index_bit_word(major_word, minor_word, + major_word_sub)], + data.bw[get_index_bit_word(minor_word, major_word, + major_word_sub)]); + + return *this; + } + + // square matrix multiplication (assuming row indexing) + table square_matrix_mul(const table& right, + size_t n) const { + auto tmp = right.transpose(); + + table result(n, n); + for (std::size_t i = 0; i < n; i++) { + for (std::size_t j = 0; j < n; j++) { + bit_word accumulater{}; + (*this)[i].for_each_word( + tmp[j], [&](auto& a, auto& b) { accumulater ^= a & b; }); + result[i][j] = accumulater.count() & 1; + } + } + + return result; + } + + // sqaure matrix inverse for lower triangular matrix + table inverse_for_lower_triangular_matrix(size_t n) const { + table result = table::identity(n); + packed_bit_word tmp(num_bit_words_minor * word_size); + + for (size_t i = 0; i < n; i++) { + tmp = (*this)[i]; + // pivot + for (size_t j = 0; j < i; j++) { + if (tmp[j]) { + tmp ^= (*this)[j]; + result[i] ^= result[j]; + } + } + } + + return result; + } + + // concatenate four tables + static table + concatenate_four(size_t n, const table& upper_left, + const table& upper_right, + const table& lower_left, + const table& lower_right) { + table result(n << 1, n << 1); + for (size_t i = 0; i < n; i++) { + for (size_t j = 0; j < n; j++) { + result[i][j] = upper_left[i][j]; + result[i][j + n] = upper_right[i][j]; + result[i + n][j] = lower_left[i][j]; + result[i + n][j + n] = lower_right[i][j]; + } + } + + return result; + } + + // generate identity table + static table identity(size_t n) { + table result(n, n); + for (size_t i = 0; i < n; i++) + result[i][i] = true; + + return result; + } + + // generate random table + static table random(size_t random_bits_major, + size_t random_bits_minor, + std::mt19937_64& rng) { + table result(random_bits_major, random_bits_minor); + for (size_t major = 0; major < random_bits_major; major++) + result[major].randomize(random_bits_minor, rng); + + return result; + } + + // Sample from the quantum Mallows distribution, generate a bit string h and a + // permutation S + // TODO: fix -Wstringop-overflow in test when n is 1 + static inline std::pair, std::vector> + sample_quantum_mallows(size_t n, std::mt19937_64& rng) { + auto r_dis = std::uniform_real_distribution(0, 1); + std::vector h; + std::vector S; + std::vector A; + + for (size_t i = 0; i < n; i++) + A.push_back(i); + + for (size_t i = 0; i < n; i++) { + auto m = A.size(); + auto r = r_dis(rng); + auto eps = pow(4, -int(m)); + auto k = size_t(-ceil(log2(r + (1 - r) * eps))); + h.push_back(k < m); + if (k >= m) + k = 2 * m - k - 1; + S.push_back(A[k]); + A.erase(A.begin() + k); + } + + return {h, S}; + } + + // Samples a random valid stabilizer tableau. + // reference: Generation of random Clifford operators in + // https://arxiv.org/pdf/2003.09412.pdf + static table random_valid_stabilizer_table(size_t n, + std::mt19937_64& rng) { + auto h_S = sample_quantum_mallows(n, rng); + + const auto& h = h_S.first; + const auto& S = h_S.second; + + table symmetric(n, n); + for (size_t i = 0; i < n; i++) { + symmetric[i].randomize(i + 1, rng); + for (size_t j = 0; j < i; j++) + symmetric[j][i] = symmetric[i][j]; + } + + table symmetric_m(n, n); + for (size_t i = 0; i < n; i++) { + symmetric_m[i].randomize(i + 1, rng); + symmetric_m[i][i] &= h[i]; + for (size_t j = 0; j < i; j++) { + bool b = h[i] && h[j]; + b |= h[i] > h[j] && S[i] < S[j]; + b |= h[i] < h[j] && S[i] > S[j]; + symmetric_m[i][j] &= b; + symmetric_m[j][i] = symmetric_m[i][j]; + } + } + + auto lower = table::identity(n); + for (size_t i = 0; i < n; i++) + lower[i].randomize(i, rng); + + auto lower_m = table::identity(n); + for (size_t i = 0; i < n; i++) { + lower_m[i].randomize(i, rng); + for (size_t j = 0; j < i; j++) { + bool b = h[i] < h[j]; + b |= h[i] && h[j] && S[i] > S[j]; + b |= !h[i] && !h[j] && S[i] < S[j]; + lower_m[i][j] &= b; + } + } + + // a normalized probability distribution, P_n(h, S) is the fraction of + // n-qubit Clifford operators U such that the canonical form of U defined in + // Theorem 1 contains a layer of h gates labeled by h and a qubit + // permutation S. + auto prod = symmetric.square_matrix_mul(lower, n); + auto prod_m = symmetric_m.square_matrix_mul(lower_m, n); + + auto inv = lower.inverse_for_lower_triangular_matrix(n); + auto inv_m = lower_m.inverse_for_lower_triangular_matrix(n); + + inv.inplace_transpose(); + inv_m.inplace_transpose(); + + // the first n columns represent Pauli operators Fx_iF^{-1} (ignoring the + // phase) and the last n columns represent Fz_iF^{−1} . Stabilizer tableau + // of the Hadamard stage and qubit permutation layers in the canonical form + auto fused = table::concatenate_four( + n, lower, table(n, n), prod, inv); + auto fused_m = table::concatenate_four( + n, lower_m, table(n, n), prod_m, inv_m); + + table u(2 * n, 2 * n); + for (size_t i = 0; i < n; i++) { + u[i] = fused[S[i]]; + u[i + n] = fused[S[i] + n]; + } + + // hadamards + for (size_t i = 0; i < n; i++) + if (h[i]) + u[i].swap(u[i + n]); + + return fused_m.square_matrix_mul(u, 2 * n); + } +}; + +template +std::ostream& operator<<(std::ostream& os, const table& table) { + for (size_t i = 0; i < table.num_bit_words_major; i++) { + if (i) + os << "\n"; + os << table[i]; + } + + return os; +} + +#endif diff --git a/src/qfvm_clifford/tableau.h b/src/qfvm_clifford/tableau.h new file mode 100644 index 0000000..7cb880e --- /dev/null +++ b/src/qfvm_clifford/tableau.h @@ -0,0 +1,434 @@ +#ifndef TABLEAU_H_ +#define TABLEAU_H_ + +#include "gate_macro.h" +#include "packed_bit_word.h" +#include "pauli.h" +#include "pauli_slice.h" +#include "table.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// tableau trans is the transpose of the tableau +template struct tableau_trans; + +// tableau is the main class of the clifford simulator +template struct tableau; + +// quantum gate type +// SINGLE_QUBIT_GATE: single qubit gate +// TWO_QUBIT_GATE: two qubit gate +// COLLAPSING_GATE: measurement gate +// ERROR_QUBIT_GATE: error gate, which has some probability to apply the gate +enum gate_type { + SINGLE_QUBIT_GATE, + TWO_QUBIT_GATE, + COLLAPSING_GATE, + ERROR_QUBIT_GATE +}; + +// quantum gate function return type +// std::nullopt: The return value of non-measurement quantum gate +// (COLLASPING_GATE) bool: The return value of measurement quantum gate +using result = std::optional; + +// quantum gate function type +// The type is used to represent the all the quantum gate function type, which +// is for store gate map. The first type is for single qubit gate and error +// qubit gate, the second type is for two qubit gate, the third type is for +// collasping gate +template +using func_type = std::variant< + std::function& t, const size_t qubit)>, + std::function& t, const size_t qubit1, + const size_t qubit2)>, + std::function& t, std::mt19937_64& rng, + const size_t qubit)>>; + +// the implementation of the quantum gate function +#define FUNCTION_REGISTRATION +#include "gate_list.h" +#undef FUNCTION_REGISTRATION + +// the gate map, which is used to store all the quantum gate function, the index +// is the gate name +template +std::unordered_map>> + gate_map = { +#define GATE_MAP_REGISTRATION +#include "gate_list.h" +#undef GATE_MAP_REGISTRATION +}; + +// Inner struct of the tableau, which is used to store the the distabilizer and +// stabilizer of the tableau. +// +// Reference: https://arxiv.org/pdf/quant-ph/0406196.pdf +template struct _tableau { + size_t num_qubits; + + // The stabilizer tableau is represented by two bit tables, one for X and one + // for Z. + table xs_t, zs_t; + + // The signs of the tableau + packed_bit_word signs; + + // constructor, n is the number of qubits + _tableau(size_t n) : num_qubits(n), xs_t(n, n), zs_t(n, n), signs(n) {} + + // index operator + pauli_string_slice operator[](size_t qubit) { + // if padding, the number of real words is not the same with the number of + // padded words + size_t num_words = (num_qubits + word_size - 1) / word_size; + return pauli_string_slice(num_qubits, signs[qubit], + xs_t[qubit].slice(0, num_words), + zs_t[qubit].slice(0, num_words)); + } + + const pauli_string_slice operator[](size_t qubit) const { + size_t num_words = (num_qubits + word_size - 1) / word_size; + return pauli_string_slice(num_qubits, signs[qubit], + xs_t[qubit].slice(0, num_words), + zs_t[qubit].slice(0, num_words)); + } +}; + +// A Clifford operation is a unitary quantum operation that conjugates Pauli +// products into Pauli products. C is Clifford if, for all pauli products P, it +// is the case that C^*PC is also a Pauli product. In fact, a Clifford operation +// can be uniquely identified (up to global phase) by how it conjugates Pauli +// products. +// A stabilizer tableau is a representation of a Clifford operation +// that simply directly stores how the Clifford operation conjugates each +// generator of the Pauli group. +template struct tableau { + + size_t num_qubits; + + // n distabilizer generators which are Pauli operators that together with the + // stabilizer generators generate the full Pauli group + _tableau distabilizer; + _tableau stabilizer; + + // constructor + explicit tableau(size_t num_qubits) + : num_qubits(num_qubits), distabilizer(num_qubits), + stabilizer(num_qubits) { + // Initialize identity elements along the diagonal. The state is |0...0> + for (size_t q = 0; q < num_qubits; q++) { + distabilizer.xs_t[q][q] = true; + stabilizer.zs_t[q][q] = true; + } + } + + // convert tableau to string + operator std::string() const { + std::stringstream ss; + ss << *this; + return ss.str(); + } + +// quantum gate function registration +#define STRUCT_FUNCTION_REGISTRATION +#include "gate_list.h" +#undef STRUCT_FUNCTION_REGISTRATION + + std::string str() const { return std::string(*this); } + + // return identity tableau, that means the state is |0...0> + static tableau identity(size_t num_qubits) { + return tableau(num_qubits); + } + + void reset() { *this = tableau(num_qubits); } + void reset(std::mt19937_64& rng, size_t qubit) { r_gate(rng, qubit); } + // void reset_x(std::mt19937_64& rng, size_t qubit) { rx_gate(rng, qubit); } + // void reset_y(std::mt19937_64& rng, size_t qubit) { ry_gate(rng, qubit); } + + // expand current tableau to new_num_qubits + // args: + // new_num_qubits: the new number of qubits + // resize_pad_factor: the resize pad factor, which means leave more space + // for future storage of more quantum bits. + void expand(size_t new_num_qubits, double resize_pad_factor) { + + assert(new_num_qubits >= num_qubits); + assert(resize_pad_factor >= 1); + + if (new_num_qubits <= distabilizer.xs_t.num_bit_words_major * word_size) { + size_t old_num_qubits = num_qubits; + num_qubits = new_num_qubits; + distabilizer.num_qubits = new_num_qubits; + stabilizer.num_qubits = new_num_qubits; + + // Initialize identity elements along the diagonal. + for (size_t k = old_num_qubits; k < new_num_qubits; k++) { + distabilizer.xs_t[k][k] = true; + stabilizer.zs_t[k][k] = true; + } + + return; + } + + size_t old_num_bit_words = distabilizer.xs_t.num_bit_words_major; + size_t old_num_qubits = num_qubits; + tableau old_state = std::move(*this); + *this = tableau((size_t)(new_num_qubits * resize_pad_factor)); + this->num_qubits = new_num_qubits; + this->distabilizer.num_qubits = new_num_qubits; + this->stabilizer.num_qubits = new_num_qubits; + + // Copy stored state back into new larger space. + auto partial_copy = [=](packed_bit_word_slice dst, + packed_bit_word_slice src) { + dst.slice(0, old_num_bit_words) = src; + }; + partial_copy(distabilizer.signs, old_state.distabilizer.signs); + partial_copy(stabilizer.signs, old_state.stabilizer.signs); + for (size_t k = 0; k < old_num_qubits; k++) { + partial_copy(distabilizer[k].xs, old_state.distabilizer[k].xs); + partial_copy(distabilizer[k].zs, old_state.distabilizer[k].zs); + partial_copy(stabilizer[k].xs, old_state.stabilizer[k].xs); + partial_copy(stabilizer[k].zs, old_state.stabilizer[k].zs); + } + } + + // transpose each table of the tableau + void inplace_transpose() { + stabilizer.xs_t.inplace_transpose(); + stabilizer.zs_t.inplace_transpose(); + distabilizer.xs_t.inplace_transpose(); + distabilizer.zs_t.inplace_transpose(); + } + + // Clifford state measurements only have three probabilities: (p0, p1) = (0.5, + // 0.5), (1, 0), or (0, 1) The random case happens if there is a row + // anti-commuting with Z[qubit] + bool is_deterministic_z(size_t target_qubit) const { + return !stabilizer[target_qubit].xs.is_not_all_zero(); + } + + bool is_deterministic_x(size_t target_qubit) const { + return !distabilizer[target_qubit].xs.is_not_all_zero(); + } + + bool is_deterministic_y(size_t target_qubit) const { + return distabilizer[target_qubit].xs == stabilizer[target_qubit].xs; + } + + pauli_string eval_y_obs(size_t qubit) const { + pauli_string result = distabilizer[qubit]; + uint8_t log_i = pauli_string_slice(result).inplace_right_mul( + stabilizer[qubit]); + log_i++; + assert((log_i & 1) == 0); + if (log_i & 2) { + result.sign ^= true; + } + return result; + } + + // collapse the qubit along z axis + // args: + // t_trans: the transpose of the tableau + // target_qubit: the target qubit + // rng: the random number generator + size_t collapse_qubit_along_z(tableau_trans& t_trans, + size_t target_qubit, std::mt19937_64& rng) { + + size_t pivot = 0; + + // search for any generator that anti-commutes with the measurement + // observable + while (pivot < num_qubits && + !t_trans.t.stabilizer.xs_t[pivot][target_qubit]) + pivot++; + + // Such an p does not exist. In this case the outcome is determinate, so + // measuring the state will not change it; the only task is to determine + // whether 0 or 1 is observed. + if (pivot == num_qubits) + return SIZE_MAX; + + // perform partial gaussian elimination over the stabilizer generators that + // anti-commute with the measurement. do this by introducing + // no-effect-because-control-is-zero CNOT at the beginning of time. + for (size_t k = pivot + 1; k < num_qubits; k++) + if (t_trans.t.stabilizer.xs_t[k][target_qubit]) + t_trans.cnot_gate(pivot, k); + + // swap the non-isolated anti-commuting stablizer generator for one that + // commutes with the measurement + if (t_trans.t.stabilizer.zs_t[pivot][target_qubit]) { + t_trans.h_yz_gate(pivot); + } else { + t_trans.h_gate(pivot); + } + + // assign measure result + bool result_if_measured = rng() & 1; + if (stabilizer.signs[target_qubit] != result_if_measured) { + t_trans.x_gate(pivot); + }; + + return pivot; + } + + // random valid stablizer tableau + // reference: https://arxiv.org/abs/2003.09412 + static tableau + random_valid_stabilizer_tableau(size_t num_qubits, std::mt19937_64& rng) { + auto raw = table::random_valid_stabilizer_table(num_qubits, rng); + tableau result(num_qubits); + for (size_t row = 0; row < num_qubits; row++) { + for (size_t col = 0; col < num_qubits; col++) { + result.distabilizer[row].xs[col] = raw[row][col]; + result.distabilizer[row].zs[col] = raw[row][col + num_qubits]; + result.stabilizer[row].xs[col] = raw[row + num_qubits][col]; + result.stabilizer[row].zs[col] = + raw[row + num_qubits][col + num_qubits]; + } + } + + result.distabilizer.signs.randomize(num_qubits, rng); + result.stabilizer.signs.randomize(num_qubits, rng); + return result; + } + + // check whether the tableau satisfy the invariants, the tableau need to + // preserve commutativity + // everything must commute, except for X_k anticommuting with Z_k for each k. + bool satisfy_invariants() const { + for (size_t q1 = 0; q1 < num_qubits; q1++) { + auto x1 = distabilizer[q1]; + auto z1 = stabilizer[q1]; + + if (x1.commutes(z1)) + return false; + + for (size_t q2 = q1 + 1; q2 < num_qubits; q2++) { + auto x2 = distabilizer[q2]; + auto z2 = stabilizer[q2]; + + if (!x1.commutes(x2) || !x1.commutes(z2) || !z1.commutes(x2) || + !z1.commutes(z2)) + return false; + } + } + + return true; + } +}; + +template +std::ostream& operator<<(std::ostream& out, const tableau& t) { + out << "+-"; + for (size_t k = 0; k < t.num_qubits; k++) { + out << "xz-"; + } + out << "+\n|"; + for (size_t k = 0; k < t.num_qubits; k++) { + out << ' ' << "+-"[t.distabilizer[k].sign] << "+-"[t.stabilizer[k].sign]; + } + for (size_t q = 0; q < t.num_qubits; q++) { + out << " |\n|"; + for (size_t k = 0; k < t.num_qubits; k++) { + out << ' ' + << "IXZY"[t.distabilizer[k].xs[q] | t.distabilizer[k].zs[q] << 1] + << "IXZY"[t.stabilizer[k].xs[q] | t.stabilizer[k].zs[q] << 1]; + } + } + out << " |"; + return out; +} + +// reference to the tableau, transpose the tableau at the construction, after +// some computation, transpose back at the deconstruction +template struct tableau_trans { + // referece to the tableau + tableau& t; + + // constructor + explicit tableau_trans(tableau& t_in) : t(t_in) { + t.inplace_transpose(); + }; + tableau_trans() = delete; + + // copt and move constructor + tableau_trans(const tableau_trans& t) = delete; + tableau_trans(tableau_trans&& t) = delete; + + // deconstructor + ~tableau_trans() { t.inplace_transpose(); } + + // Iterates over the Paulis in a row of the tableau. + // + // args + // q: The row to iterate over. + // body: A function taking X, Z, and SIGN words. The X and Z words are + // chunks of xz-encoded Paulis from the row. The SIGN word is the + // corresponding chunk of sign bits from the sign row. + template + inline void for_each_trans_obs(const size_t q, FUNC body) { + for (size_t k = 0; k < 2; k++) { + _tableau& h = k == 0 ? t.distabilizer : t.stabilizer; + pauli_string_slice p = h[q]; + p.xs.for_each_word(p.zs, h.signs, body); + } + } + + template + inline void for_each_trans_obs(const size_t q1, const size_t q2, FUNC body) { + for (size_t k = 0; k < 2; k++) { + _tableau& h = k == 0 ? t.distabilizer : t.stabilizer; + pauli_string_slice p1 = h[q1]; + pauli_string_slice p2 = h[q2]; + p1.xs.for_each_word(p1.zs, p2.xs, p2.zs, h.signs, body); + } + } + + tableau_trans& x_gate(const size_t qubit) { + for_each_trans_obs(qubit, [](auto& x, auto& z, auto& s) { s ^= z; }); + return *this; + } + + tableau_trans& h_gate(const size_t qubit) { + for_each_trans_obs(qubit, [](auto& x, auto& z, auto& s) { + std::swap(x, z); + s ^= x & z; + }); + return *this; + } + + tableau_trans& h_yz_gate(const size_t qubit) { + for_each_trans_obs(qubit, [](auto& x, auto& z, auto& s) { + s ^= z.andnot(x); + x ^= z; + }); + return *this; + } + + tableau_trans& cnot_gate(const size_t control_qubit, + const size_t target_qubit) { + for_each_trans_obs(control_qubit, target_qubit, + [](auto& cx, auto& cz, auto& tx, auto& tz, auto& s) { + s ^= (cz ^ tx).andnot(cx & tz); + cz ^= tz; + tx ^= cx; + }); + return *this; + } +}; +#endif diff --git a/src/qfvm_clifford/utils.h b/src/qfvm_clifford/utils.h new file mode 100644 index 0000000..3ec0a27 --- /dev/null +++ b/src/qfvm_clifford/utils.h @@ -0,0 +1,40 @@ +#ifndef TABLEAU_ELEMENT_UTILS_H_ +#define TABLEAU_ELEMENT_UTILS_H_ + +#include +#include + +template constexpr size_t bits_to_bits_padded(size_t bits) { + return (bits + (word_size - 1)) & ~(word_size - 1); +} + +template constexpr size_t bits_to_word_padded(size_t bits) { + return bits_to_bits_padded(bits) / word_size; +} + +// reference: +// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel +inline uint8_t count_uint64_bits(uint64_t value) { + value = value - ((value >> 1) & 0x5555555555555555ull); + value = + (value & 0x3333333333333333ull) + ((value >> 2) & 0x3333333333333333ull); + return (((value + (value >> 4)) & 0xF0F0F0F0F0F0F0Full) * + 0x101010101010101ull) >> + 56; +} + +// Concatenate preprocessor tokens A and B without expanding macro definitions +// (however, if invoked from a macro, macro arguments are expanded). +#define PPCAT_NX(A, B) A##B + +// Concatenate preprocessor tokens A and B after macro-expanding them. +#define PPCAT(A, B) PPCAT_NX(A, B) + +// Turn A into a string literal without expanding macro definitions (however, if +// invoked from a macro, macro arguments are expanded). +#define STRINGIZE_NX(A) #A + +// Turn A into a string literal after macro-expanding it. +#define STRINGIZE(A) STRINGIZE_NX(A) + +#endif diff --git a/tests/quafu/simulator/basis_test.py b/tests/quafu/simulator/basis_test.py index 4afa8f4..6521845 100644 --- a/tests/quafu/simulator/basis_test.py +++ b/tests/quafu/simulator/basis_test.py @@ -263,3 +263,82 @@ def test_after_measure(self): [diff_00, diff_11], [2, 0] ) self.assertTrue(success) + + +class TestCliffordSimulatorBasis(BaseTest): + """Test C++ Clifford simulator""" + + circuit = None + assertEqual = unittest.TestCase.assertEqual + assertAlmostEqual = unittest.TestCase.assertAlmostEqual + assertDictEqual = unittest.TestCase.assertDictEqual + assertListEqual = unittest.TestCase.assertListEqual + assertTrue = unittest.TestCase.assertTrue + + @pytest.mark.skipif( + sys.platform == "darwin", reason="Avoid error on MacOS arm arch." + ) + def test_simulate(self): + print("test_simulate") + self.circuit = BellCircuits.bell_no_measure() + result = simulate( + qc=self.circuit, simulator="qfvm_clifford", output="count_dict" + ) + count = result.count + self.assertDictAlmostEqual(count, {}) + + def test_singleQgate_measure_atlast(self): + self.circuit = BasicCircuits.singleQgate_measure_atlast() + result = simulate( + qc=self.circuit, shots=1, simulator="qfvm_clifford", output="count_dict" + ) + counts = result.count + self.assertDictAlmostEqual(counts, {"11": 1}) + + def test_singleQgate_no_measure(self): + self.circuit = BasicCircuits.singleQgate_no_measure() + result = simulate( + qc=self.circuit, shots=1, simulator="qfvm_clifford", output="count_dict" + ) + counts = result.count + self.assertDictAlmostEqual(counts, {}) + + def test_singleQgate_measure_normal(self): + self.circuit = BasicCircuits.singleQgate_measure_normal() + result = simulate( + qc=self.circuit, shots=10, simulator="qfvm_clifford", output="count_dict" + ) + counts = result.count + self.assertDictAlmostEqual(counts, {"11": 10}) + + def test_multiQgate_measure_atlast(self): + self.circuit = BasicCircuits.multiQgate_measure_atlast() + result = simulate( + qc=self.circuit, shots=10, simulator="qfvm_clifford", output="count_dict" + ) + counts = result.count + self.assertDictAlmostEqual(counts, {"11": 10}) + + def test_multiQgate_no_measure(self): + self.circuit = BasicCircuits.multiQgate_no_measure() + result = simulate( + qc=self.circuit, shots=1, simulator="qfvm_clifford", output="count_dict" + ) + counts = result.count + self.assertDictAlmostEqual(counts, {}) + + def test_multiQgate_measure_normal(self): + self.circuit = BasicCircuits.multiQgate_measure_normal() + result = simulate( + qc=self.circuit, shots=10, simulator="qfvm_clifford", output="count_dict" + ) + counts = result.count + self.assertDictAlmostEqual(counts, {"11": 10}) + + def test_anycbit_measure(self): + self.circuit = BasicCircuits.any_cbit_measure() + result = simulate( + qc=self.circuit, shots=10, simulator="qfvm_clifford", output="count_dict" + ) + counts = result.count + self.assertDictAlmostEqual(counts, {"0101": 10}) From 099cb01110eea99537544ead4e3a0220a3a060bf Mon Sep 17 00:00:00 2001 From: Yanglin Zhang <56758695+lucky9-cyou@users.noreply.github.com> Date: Fri, 22 Dec 2023 14:45:14 +0800 Subject: [PATCH 2/3] build: delete useless cmake code --- CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9595c4c..ab2d4e6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,8 +60,6 @@ list (APPEND PRJ_INCLUDE_DIRS ${EIGEN3_INCLUDE_DIR}) find_package(pybind11 CONFIG) list (APPEND PRJ_INCLUDE_DIRS ${PYBIND11_INCLUDE_DIR}) -set(MACOS_VERSION_MIN 10.14) - #SIMD if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "AMD64" OR CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "amd64") if(MSVC) From a951126f4f67cac2ebe2c304c96868824fffb4d7 Mon Sep 17 00:00:00 2001 From: ylin <17835344407@163.com> Date: Fri, 22 Dec 2023 14:47:45 +0800 Subject: [PATCH 3/3] feat: delete useless python code --- quafu/simulators/simulator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/quafu/simulators/simulator.py b/quafu/simulators/simulator.py index 4be6e19..609b282 100644 --- a/quafu/simulators/simulator.py +++ b/quafu/simulators/simulator.py @@ -32,7 +32,6 @@ def simulate( shots: int = 100, use_gpu: bool = False, use_custatevec: bool = False, - use_clifford: bool = False, ) -> SimuResult: """Simulate quantum circuit Args: