From 51a0731d0bac62b4453e54932f99c41cb2d60396 Mon Sep 17 00:00:00 2001 From: Tyson Jones Date: Tue, 26 Nov 2024 00:29:35 +0100 Subject: [PATCH 1/7] misc bug fixes which progress toward making the v3 deprecated unit tests run without error. We also relax exclusivity of multithreading and GPU-acceleration, which was pointless and annoying. --- quest/src/api/decoherence.cpp | 3 +-- quest/src/api/operations.cpp | 16 ++++++------- quest/src/api/qureg.cpp | 3 --- quest/src/core/autodeployer.cpp | 40 +++++++++++++++---------------- quest/src/core/errors.cpp | 2 +- quest/src/core/localiser.cpp | 7 ++++++ quest/src/core/utilities.cpp | 3 +++ quest/src/core/validation.cpp | 17 ++----------- quest/src/core/validation.hpp | 2 -- quest/src/cpu/cpu_subroutines.cpp | 3 +++ quest/src/gpu/gpu_thrust.cuh | 3 +++ 11 files changed, 48 insertions(+), 51 deletions(-) diff --git a/quest/src/api/decoherence.cpp b/quest/src/api/decoherence.cpp index 3287bbf4..30437c95 100644 --- a/quest/src/api/decoherence.cpp +++ b/quest/src/api/decoherence.cpp @@ -123,8 +123,7 @@ void mixQureg(Qureg outQureg, Qureg inQureg, qreal inProb) { validate_quregFields(outQureg, __func__); validate_quregFields(inQureg, __func__); validate_probability(inProb, __func__); - validate_quregIsDensityMatrix(outQureg, __func__); - validate_quregsCanBeMixed(outQureg, inQureg, __func__); + validate_quregsCanBeMixed(outQureg, inQureg, __func__); // checks outQureg is densmatr qreal outProb = 1 - inProb; localiser_densmatr_mixQureg(outProb, outQureg, inProb, inQureg); diff --git a/quest/src/api/operations.cpp b/quest/src/api/operations.cpp index 8c9c05dc..f0ce3eac 100644 --- a/quest/src/api/operations.cpp +++ b/quest/src/api/operations.cpp @@ -376,8 +376,8 @@ void multiplyFullStateDiagMatr(Qureg qureg, FullStateDiagMatr matrix) { bool onlyMultiply = true; qcomp exponent = qcomp(1, 0); (qureg.isDensityMatrix)? - localiser_statevec_allTargDiagMatr(qureg, matrix, exponent) : - localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, onlyMultiply); + localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, onlyMultiply): + localiser_statevec_allTargDiagMatr(qureg, matrix, exponent); } void multiplyFullStateDiagMatrPower(Qureg qureg, FullStateDiagMatr matrix, qcomp exponent) { @@ -387,8 +387,8 @@ void multiplyFullStateDiagMatrPower(Qureg qureg, FullStateDiagMatr matrix, qcomp bool onlyMultiply = true; (qureg.isDensityMatrix)? - localiser_statevec_allTargDiagMatr(qureg, matrix, exponent) : - localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, onlyMultiply); + localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, onlyMultiply): + localiser_statevec_allTargDiagMatr(qureg, matrix, exponent); } void applyFullStateDiagMatr(Qureg qureg, FullStateDiagMatr matrix) { @@ -400,8 +400,8 @@ void applyFullStateDiagMatr(Qureg qureg, FullStateDiagMatr matrix) { bool onlyMultiply = false; qcomp exponent = qcomp(1, 0); (qureg.isDensityMatrix)? - localiser_statevec_allTargDiagMatr(qureg, matrix, exponent) : - localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, onlyMultiply); + localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, onlyMultiply): + localiser_statevec_allTargDiagMatr(qureg, matrix, exponent); } void applyFullStateDiagMatrPower(Qureg qureg, FullStateDiagMatr matrix, qcomp exponent) { @@ -412,8 +412,8 @@ void applyFullStateDiagMatrPower(Qureg qureg, FullStateDiagMatr matrix, qcomp ex bool onlyMultiply = false; (qureg.isDensityMatrix)? - localiser_statevec_allTargDiagMatr(qureg, matrix, exponent) : - localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, onlyMultiply); + localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, onlyMultiply): + localiser_statevec_allTargDiagMatr(qureg, matrix, exponent); } diff --git a/quest/src/api/qureg.cpp b/quest/src/api/qureg.cpp index 0444b60d..6db094bb 100644 --- a/quest/src/api/qureg.cpp +++ b/quest/src/api/qureg.cpp @@ -146,9 +146,6 @@ Qureg validateAndCreateCustomQureg(int numQubits, int isDensMatr, int useDistrib // automatically overwrite distrib, GPU, and multithread fields which were left as modeflag::USE_AUTO autodep_chooseQuregDeployment(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, env); - // throw error if the user had forced multithreading but GPU accel was auto-chosen - validate_newQuregNotBothMultithreadedAndGpuAccel(useGpuAccel, useMultithread, caller); - Qureg qureg = qureg_populateNonHeapFields(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread); // always allocate CPU memory diff --git a/quest/src/core/autodeployer.cpp b/quest/src/core/autodeployer.cpp index 7ab13dec..d53f99ec 100644 --- a/quest/src/core/autodeployer.cpp +++ b/quest/src/core/autodeployer.cpp @@ -67,7 +67,8 @@ void chooseWhetherToDistributeQureg(int numQubits, int isDensMatr, int &useDistr // it's ok if we cannot query RAM; if we'd have exceeded it, it's likely we'll exceed auto-threshold and will still distribute } catch (mem::COULD_NOT_QUERY_RAM &e) {} - // force distribution if GPU deployment is possible but we exceed local VRAM + // force distribution if GPU deployment is available but we exceed local VRAM; + // this is preferable over falling back to CPU-only which would be astonishingly slow if (useGpuAccel == 1 || useGpuAccel == modeflag::USE_AUTO) { size_t localGpuMem = gpu_getCurrentAvailableMemoryInBytes(); if (!mem_canQuregFitInMemory(numQubits, isDensMatr, 1, localGpuMem)) { @@ -76,47 +77,44 @@ void chooseWhetherToDistributeQureg(int numQubits, int isDensMatr, int &useDistr } } - // by now, we know that Qureg can definitely fit into a single GPU, or principally fit into RAM, - // but we may still wish to distribute it so that multiple Quregs don't choke up memory. + // to reach here, we know that Qureg can fit into the remaining memory of a single GPU, or principally + // fit into RAM, but we may still wish to distribute for improved parallelisation and to avoid memory saturation int effectiveNumQubitsPerNode = mem_getEffectiveNumStateVecQubitsPerNode(numQubits, isDensMatr, numEnvNodes); useDistrib = (effectiveNumQubitsPerNode >= MIN_NUM_LOCAL_QUBITS_FOR_AUTO_QUREG_DISTRIBUTION); } -void chooseWhetherToGpuAccelQureg(int numQubits, int isDensMatr, int useDistrib, int &useGpuAccel, int numQuregNodes) { +void chooseWhetherToGpuAccelQureg(int numQubits, int isDensMatr, int &useGpuAccel, int numQuregNodes) { // if the flag is already set, don't change it if (useGpuAccel != modeflag::USE_AUTO) return; - // determine the 'effective number of qubits' each GPU would have to simulate, if distributed + // determine the 'effective number of qubits' each GPU would have to simulate, if distributed... int effectiveNumQubits = mem_getEffectiveNumStateVecQubitsPerNode(numQubits, isDensMatr, numQuregNodes); - // choose to GPU accelerate only if that's not too few + // and choose to GPU accelerate only if that's not too few useGpuAccel = (effectiveNumQubits >= MIN_NUM_LOCAL_QUBITS_FOR_AUTO_QUREG_GPU_ACCELERATION); // notice there was no automatic disabling of GPU acceleration in the scenario that the local // partition exceeded GPU memory. This is because such a scenario would be catastrophically // slow and astonish users by leaving GPUs idle in intensive simulation. Instead, we auto-deploy - // to GPU and subsequent validation will notice we exceeded GPU memory. + // to GPU anyway and subsequent validation will notice we exceeded GPU memory and report an error. } -void chooseWhetherToMultithreadQureg(int numQubits, int isDensMatr, int useDistrib, int useGpuAccel, int &useMultithread, int numQuregNodes) { +void chooseWhetherToMultithreadQureg(int numQubits, int isDensMatr, int &useMultithread, int numQuregNodes) { // if the flag is already set (user-given, or inferred from env), don't change it if (useMultithread != modeflag::USE_AUTO) return; - // if GPU-aceleration was chosen, disable auto multithreading... - if (useGpuAccel) { - useMultithread = 0; - return; - } - - // otherwise, we're not GPU-accelerating, and should choose to multithread based on Qureg size + // otherwise, choose to multithread based on Qureg size int effectiveNumQubits = mem_getEffectiveNumStateVecQubitsPerNode(numQubits, isDensMatr, numQuregNodes); useMultithread = (effectiveNumQubits >= MIN_NUM_LOCAL_QUBITS_FOR_AUTO_QUREG_MULTITHREADING); + + // note the qureg may be simultaneously GPU-accelerated and so never use its + // multithreaded CPU routines, except in functions which accept multiple Quregs } @@ -125,8 +123,6 @@ void autodep_chooseQuregDeployment(int numQubits, int isDensMatr, int &useDistri // preconditions: // - the given configuration is compatible with env (assured by prior validation) // - this means no deployment is forced (=1) which is incompatible with env - // - it also means GPU-acceleration and multithreading are not simultaneously forced - // (although they may still be left automatic and need explicit revision) // disable any automatic deployments not permitted by env (it's gauranteed we never overwrite =1 to =0) if (!env.isDistributed) @@ -141,11 +137,15 @@ void autodep_chooseQuregDeployment(int numQubits, int isDensMatr, int &useDistri if (env.numNodes == 1) useDistrib = 0; - // overwrite any auto options (== modeflag::USE_AUTO) + // overwrite useDistrib chooseWhetherToDistributeQureg(numQubits, isDensMatr, useDistrib, useGpuAccel, env.numNodes); int numQuregNodes = (useDistrib)? env.numNodes : 1; - chooseWhetherToGpuAccelQureg(numQubits, isDensMatr, useDistrib, useGpuAccel, numQuregNodes); - chooseWhetherToMultithreadQureg(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, numQuregNodes); + + // overwrite useGpuAccel + chooseWhetherToGpuAccelQureg(numQubits, isDensMatr, useGpuAccel, numQuregNodes); + + // overwrite useMultithread + chooseWhetherToMultithreadQureg(numQubits, isDensMatr, useMultithread, numQuregNodes); } diff --git a/quest/src/core/errors.cpp b/quest/src/core/errors.cpp index 1e9d9ad0..c9985709 100644 --- a/quest/src/core/errors.cpp +++ b/quest/src/core/errors.cpp @@ -185,7 +185,7 @@ void assert_pairRankIsDistinct(Qureg qureg, int pairRank) { void assert_bufferSendRecvDoesNotOverlap(qindex sendInd, qindex recvInd, qindex numAmps) { - if (sendInd + numAmps > recvInd) + if (sendInd < recvInd + numAmps) raiseInternalError("A distributed function attempted to send and receive portions of the buffer which overlapped."); } diff --git a/quest/src/core/localiser.cpp b/quest/src/core/localiser.cpp index fe624b3c..3abf3ddd 100644 --- a/quest/src/core/localiser.cpp +++ b/quest/src/core/localiser.cpp @@ -1427,6 +1427,13 @@ void localiser_densmatr_twoQubitDepolarising(Qureg qureg, int qubit1, int qubit2 bool comm1 = doesChannelRequireComm(qureg, qubit1); bool comm2 = doesChannelRequireComm(qureg, qubit2); + + + // TODO: + // this is bugged, even when non-distributed!!! + + + if (comm2 && comm1) twoQubitDepolarisingOnPrefixAndPrefix(qureg, qubit1, qubit2, prob); if (comm2 && !comm1) diff --git a/quest/src/core/utilities.cpp b/quest/src/core/utilities.cpp index 176b53e5..00ed7f2f 100644 --- a/quest/src/core/utilities.cpp +++ b/quest/src/core/utilities.cpp @@ -221,6 +221,9 @@ qindex util_getNumLocalDiagonalAmpsWithBits(Qureg qureg, vector qubits, vec if (util_getRankBitOfBraQubit(qubits[i], qureg) != outcomes[i]) return 0; + // TODO: + // I THINK THIS MAY BE BUGGED / INCORRECT LOGIC!! + // otherwise, every 2^#qubits local diagonal is consistent with outcomes qindex numColsPerNode = powerOf2(qureg.logNumColsPerNode); qindex numDiags = numColsPerNode / powerOf2(qubits.size()); diff --git a/quest/src/core/validation.cpp b/quest/src/core/validation.cpp index eceab0e9..67850ea2 100644 --- a/quest/src/core/validation.cpp +++ b/quest/src/core/validation.cpp @@ -167,10 +167,6 @@ namespace report { "Cannot enable multithreaded processing of a Qureg created in a non-multithreaded QuEST environment."; - string NEW_GPU_QUREG_CANNOT_USE_MULTITHREADING = - "Cannot simultaneously GPU-accelerate and multithread a Qureg. Please disable multithreading, or set it to ${AUTO_DEPLOYMENT_FLAG} for QuEST to automatically disable it when deploying to GPU."; - - string NEW_QUREG_CANNOT_FIT_INTO_NON_DISTRIB_CPU_MEM = "The non-distributed Qureg (isDensity=${IS_DENS}) of ${NUM_QUBITS} qubits would be too large (${QCOMP_BYTES} * ${EXP_BASE}^${NUM_QUBITS} bytes) to fit into a single node's RAM (${RAM_SIZE} bytes). See reportQuESTEnv(), and consider using distribution."; @@ -1430,13 +1426,6 @@ void assertQuregFitsInGpuMem(int numQubits, int isDensMatr, int isDistrib, int i } } -void validate_newQuregNotBothMultithreadedAndGpuAccel(int useGpu, int useMultithread, const char* caller) { - - // note either or both of useGpu and useMultithread are permitted to be modeflag::USE_AUTO (=-1) - tokenSubs vars = {{"${AUTO_DEPLOYMENT_FLAG}", modeflag::USE_AUTO}}; - assertThat(useGpu != 1 || useMultithread != 1, report::NEW_GPU_QUREG_CANNOT_USE_MULTITHREADING, vars, caller); -} - void validate_newQuregParams(int numQubits, int isDensMatr, int isDistrib, int isGpuAccel, int isMultithread, QuESTEnv env, const char* caller) { // some of the below validation involves getting distributed node consensus, which @@ -1452,8 +1441,6 @@ void validate_newQuregParams(int numQubits, int isDensMatr, int isDistrib, int i assertQuregNotDistributedOverTooManyNodes(numQubits, isDensMatr, isDistrib, env, caller); assertQuregFitsInCpuMem(numQubits, isDensMatr, isDistrib, env, caller); assertQuregFitsInGpuMem(numQubits, isDensMatr, isDistrib, isGpuAccel, env, caller); - - validate_newQuregNotBothMultithreadedAndGpuAccel(isGpuAccel, isMultithread, caller); } void validate_newQuregAllocs(Qureg qureg, const char* caller) { @@ -1549,8 +1536,8 @@ void assertMatrixTotalNumElemsDontExceedMaxIndex(int numQubits, bool isDense, co int maxNumQubits = mem_getMaxNumMatrixQubitsBeforeIndexOverflow(isDense); string msg = (isDense)? - report::NEW_DIAG_MATR_NUM_ELEMS_WOULD_EXCEED_QINDEX : - report::NEW_COMP_MATR_NUM_ELEMS_WOULD_EXCEED_QINDEX ; + report::NEW_COMP_MATR_NUM_ELEMS_WOULD_EXCEED_QINDEX: + report::NEW_DIAG_MATR_NUM_ELEMS_WOULD_EXCEED_QINDEX; tokenSubs vars = { {"${NUM_QUBITS}", numQubits}, diff --git a/quest/src/core/validation.hpp b/quest/src/core/validation.hpp index 92637457..ca898617 100644 --- a/quest/src/core/validation.hpp +++ b/quest/src/core/validation.hpp @@ -95,8 +95,6 @@ void validate_newMaxNumReportedSigFigs(int numSigFigs, const char* caller); void validate_newQuregParams(int numQubits, int isDensMatr, int isDistrib, int isGpuAccel, int numCpuThreads, QuESTEnv env, const char* caller); -void validate_newQuregNotBothMultithreadedAndGpuAccel(int useGpu, int numThreads, const char* caller); - void validate_newQuregAllocs(Qureg qureg, const char* caller); diff --git a/quest/src/cpu/cpu_subroutines.cpp b/quest/src/cpu/cpu_subroutines.cpp index 89c7e2bf..65678e33 100644 --- a/quest/src/cpu/cpu_subroutines.cpp +++ b/quest/src/cpu/cpu_subroutines.cpp @@ -1627,6 +1627,9 @@ qreal cpu_densmatr_calcProbOfMultiQubitOutcome_sub(Qureg qureg, vector qubi #pragma omp parallel for reduction(+:prob) if(qureg.isMultithreaded) for (qindex n=0; n q auto ampIter = thrust::make_permutation_iterator(getStartPtr(qureg), diagIter); auto probIter = thrust::make_transform_iterator(ampIter, probFunctor); + // TODO: + // I BELIEVE THIS IS BUGGED AND/OR HAS INCORRECT LOGIC! + qindex numIts = util_getNumLocalDiagonalAmpsWithBits(qureg, qubits, outcomes); qreal prob = thrust::reduce(probIter, probIter + numIts); return prob; From fb46979b13f9a8e69b29e5dde6286759b2ee1421 Mon Sep 17 00:00:00 2001 From: Tyson Jones Date: Sun, 15 Dec 2024 23:07:31 +0100 Subject: [PATCH 2/7] patching density-matrix prob-calc --- quest/src/core/localiser.cpp | 13 +++++++++++-- quest/src/core/utilities.cpp | 19 ------------------- quest/src/core/utilities.hpp | 2 -- quest/src/cpu/cpu_subroutines.cpp | 25 +++++++++++++------------ quest/src/gpu/gpu_thrust.cuh | 5 +---- 5 files changed, 25 insertions(+), 39 deletions(-) diff --git a/quest/src/core/localiser.cpp b/quest/src/core/localiser.cpp index 3abf3ddd..208995bd 100644 --- a/quest/src/core/localiser.cpp +++ b/quest/src/core/localiser.cpp @@ -1762,8 +1762,17 @@ qreal localiser_densmatr_calcProbOfMultiQubitOutcome(Qureg qureg, vector qu if (doAnyLocalStatesHaveQubitValues(qureg, braQubits, outcomes)) { - // such nodes need to know all ket qubits (which are all suffix) - prob += accel_densmatr_calcProbOfMultiQubitOutcome_sub(qureg, qubits, outcomes); + // such nodes need only know the ket qubits/outcomes for which the bra-qubits are in suffix + vector ketQubitsWithBraInSuffix; + vector ketOutcomesWithBraInSuffix; + for (int q=0; q qubits, vector outcomes) { - assert_utilsGivenDensMatr(qureg); - - // a corresponding bra-qubit in the prefix with an inconsistent outcome means the node - // contains no diagonal basis states consistent with the given outcomes - for (size_t i=0; i qubits, vector outcomes); - qindex util_getGlobalFlatIndex(Qureg qureg, qindex globalRow, qindex globalCol); int util_getRankContainingIndex(Qureg qureg, qindex globalInd); diff --git a/quest/src/cpu/cpu_subroutines.cpp b/quest/src/cpu/cpu_subroutines.cpp index 65678e33..4a046b45 100644 --- a/quest/src/cpu/cpu_subroutines.cpp +++ b/quest/src/cpu/cpu_subroutines.cpp @@ -1585,19 +1585,20 @@ qreal cpu_statevec_calcProbOfMultiQubitOutcome_sub(Qureg qureg, vector qubi qreal prob = 0; // each iteration visits one amp per 2^qubits.size() amps + // (>=1 since all qubits are in suffix, so qubits.size() <= suffix size) qindex numIts = qureg.numAmpsPerNode / powerOf2(qubits.size()); auto sortedQubits = util_getSorted(qubits); // all in suffix auto qubitStateMask = util_getBitMask(qubits, outcomes); // use template param to compile-time unroll loop in insertBits() - SET_VAR_AT_COMPILE_TIME(int, numQubits, NumQubits, qubits.size()); + SET_VAR_AT_COMPILE_TIME(int, numBits, NumQubits, qubits.size()); #pragma omp parallel for reduction(+:prob) if(qureg.isMultithreaded) for (qindex n=0; n qubi assert_numTargsMatchesTemplateParam(qubits.size(), NumQubits); + // note that qubits are only ket qubits for which the corresponding bra-qubit is in the suffix; + // this function is not invoked upon nodes where prefix bra-qubits do not correspond to given outcomes + qreal prob = 0; - // each iteration visits one column (contributing one diagonal amp) per 2^qubits.size() possible - qindex numIts = util_getNumLocalDiagonalAmpsWithBits(qureg, qubits, outcomes); - qindex firstDiagInd = util_getLocalIndexOfFirstDiagonalAmp(qureg); + // each iteration visits one relevant diagonal amp (= one column) + qindex numIts = powerOf2(qureg.logNumColsPerNode - qubits.size()); qindex numAmpsPerCol = powerOf2(qureg.numQubits); + qindex firstDiagInd = util_getLocalIndexOfFirstDiagonalAmp(qureg); - auto sortedQubits = util_getSorted(qubits); // all in suffix + auto sortedQubits = util_getSorted(qubits); // all in suffix, with corresponding bra's all in suffix auto qubitStateMask = util_getBitMask(qubits, outcomes); // use template param to compile-time unroll loop in insertBits() @@ -1627,13 +1631,10 @@ qreal cpu_densmatr_calcProbOfMultiQubitOutcome_sub(Qureg qureg, vector qubi #pragma omp parallel for reduction(+:prob) if(qureg.isMultithreaded) for (qindex n=0; n q auto ampIter = thrust::make_permutation_iterator(getStartPtr(qureg), diagIter); auto probIter = thrust::make_transform_iterator(ampIter, probFunctor); - // TODO: - // I BELIEVE THIS IS BUGGED AND/OR HAS INCORRECT LOGIC! - - qindex numIts = util_getNumLocalDiagonalAmpsWithBits(qureg, qubits, outcomes); + qindex numIts = powerOf2(qureg.logNumColsPerNode - qubits.size()); qreal prob = thrust::reduce(probIter, probIter + numIts); return prob; } From 41868c93ae9100c4bb52cbfc44311d321d5e3309 Mon Sep 17 00:00:00 2001 From: Tyson Jones Date: Tue, 17 Dec 2024 18:04:44 +0100 Subject: [PATCH 3/7] added missing amps-fit-in-node validation --- quest/src/api/decoherence.cpp | 1 + quest/src/api/operations.cpp | 15 ++++++-- quest/src/core/validation.cpp | 65 +++++++++++++++++++++++++++-------- quest/src/core/validation.hpp | 19 +++++----- 4 files changed, 75 insertions(+), 25 deletions(-) diff --git a/quest/src/api/decoherence.cpp b/quest/src/api/decoherence.cpp index 30437c95..d887e270 100644 --- a/quest/src/api/decoherence.cpp +++ b/quest/src/api/decoherence.cpp @@ -111,6 +111,7 @@ void mixPaulis(Qureg qureg, int qubit, qreal probX, qreal probY, qreal probZ) { void mixKrausMap(Qureg qureg, int* qubits, int numQubits, KrausMap map) { validate_quregFields(qureg, __func__); validate_quregIsDensityMatrix(qureg, __func__); + validate_mixedAmpsFitInNode(qureg, numQubits, __func__); validate_targets(qureg, qubits, numQubits, __func__); validate_krausMapIsCPTP(map, __func__); // also checks fields and is-sync validate_krausMapMatchesTargets(map, numQubits, __func__); diff --git a/quest/src/api/operations.cpp b/quest/src/api/operations.cpp index f0ce3eac..21f80289 100644 --- a/quest/src/api/operations.cpp +++ b/quest/src/api/operations.cpp @@ -39,6 +39,7 @@ void validateAndApplyAnyCtrlAnyTargUnitaryMatrix(Qureg qureg, int* ctrls, int* s validate_controlsAndTargets(qureg, ctrls, numCtrls, targs, numTargs, caller); validate_controlStates(states, numCtrls, caller); validate_matrixDimMatchesTargets(matr, numTargs, caller); // also checks fields and is-synced + validate_mixedAmpsFitInNode(qureg, numTargs, caller); validate_matrixIsUnitary(matr, caller); // harmlessly rechecks fields and is-synced auto ctrlVec = util_getVector(ctrls, numCtrls); @@ -117,6 +118,7 @@ void multiplyCompMatr2(Qureg qureg, int target1, int target2, CompMatr2 matrix) validate_quregFields(qureg, __func__); validate_twoTargets(qureg, target1, target2, __func__); validate_matrixFields(matrix, __func__); // matrix can be non-unitary + validate_mixedAmpsFitInNode(qureg, 2, __func__); bool conj = false; localiser_statevec_anyCtrlTwoTargDenseMatr(qureg, {}, {}, target1, target2, matrix, conj); @@ -156,6 +158,7 @@ void multiplyCompMatr(Qureg qureg, int* targets, int numTargets, CompMatr matrix validate_quregFields(qureg, __func__); validate_targets(qureg, targets, numTargets, __func__); validate_matrixDimMatchesTargets(matrix, numTargets, __func__); // also validates fields and is-sync, but not unitarity + validate_mixedAmpsFitInNode(qureg, numTargets, __func__); bool conj = false; localiser_statevec_anyCtrlAnyTargDenseMatr(qureg, {}, {}, util_getVector(targets, numTargets), matrix, conj); @@ -617,9 +620,14 @@ void applyMultiStateControlledSqrtSwap(Qureg qureg, int* controls, int* states, validate_controlsAndTwoTargets(qureg, controls, numControls, target1, target2, __func__); validate_controlStates(states, numControls, __func__); // permits states==nullptr - // this is likely suboptimal, and there must exist a more - // efficient bespoke strategy for sqrt-SWAP, although given - // it is a little esoteric, optimisation is not worthwhile + // TODO: + // this function exacts sqrtSwap as a dense 2-qubit matrix, + // where as bespoke communication and simulation strategy is + // clearly possible which we have not supported because the gate + // is somewhat esoteric. As such, we must validate mixed-amps fit + + validate_mixedAmpsFitInNode(qureg, 2, __func__); // to throw SqrtSwap error, not generic CompMatr2 error + CompMatr2 matr = getCompMatr2({ {1, 0, 0, 0}, {0, .5+.5_i, .5-.5_i, 0}, @@ -1224,6 +1232,7 @@ void applySuperOp(Qureg qureg, SuperOp superop, int* targets, int numTargets) { validate_superOpFields(superop, __func__); validate_superOpIsSynced(superop, __func__); validate_superOpDimMatchesTargs(superop, numTargets, __func__); + validate_mixedAmpsFitInNode(qureg, numTargets, __func__); localiser_densmatr_superoperator(qureg, superop, util_getVector(targets, numTargets)); } diff --git a/quest/src/core/validation.cpp b/quest/src/core/validation.cpp index 67850ea2..ae0447c0 100644 --- a/quest/src/core/validation.cpp +++ b/quest/src/core/validation.cpp @@ -783,6 +783,20 @@ namespace report { "The GPU has less available memory (${MEM_AVAIL} bytes) than that needed (${MEM_NEEDED} bytes) to temporarily store the ${NUM_OUTCOMES} outcome probabilities of the specified ${NUM_QUBITS} qubits."; + /* + * MISC GATE PARAMETERS + */ + + string ROTATION_AXIS_VECTOR_IS_ZERO = + "The rotation axis vector was all zero, or within epsilion magnitude to the zero vector."; + + + string CANNOT_FIT_MIXED_STATEVEC_AMPS_INTO_SINGLE_NODE = + "Cannot perform this ${NUM_TARGS}-target operation upon a ${NUM_QUREG_QUBITS}-qubit statevector distributed between ${NUM_NODES} nodes, since each node's communication buffer (with capacity for ${NUM_QUREG_AMPS_PER_NODE} amps) cannot simultaneously store the ${NUM_TARG_AMPS} mixed remote amplitudes."; + + string CANNOT_FIT_MIXED_DENSMATR_AMPS_INTO_SINGLE_NODE = + "Cannot perform this ${NUM_TARGS}-target operation upon a ${NUM_QUREG_QUBITS}-qubit density-matrix distributed between ${NUM_NODES} nodes, since each node's communication buffer (with capacity for ${NUM_QUREG_AMPS_PER_NODE} amps) cannot simultaneously store the ${NUM_TARG_AMPS} mixed remote amplitudes."; + /* * CHANNEL PARAMETERS @@ -816,7 +830,7 @@ namespace report { */ string MIXED_QUREG_NOT_DENSITY_MATRIX = - "The first Qureg, which will undergo mixing, must be a density matrx."; + "The first Qureg, which will undergo mixing, must be a density matrix."; string MIXED_QUREGS_HAVE_DIFFERENT_NUM_QUBITS = "The given Quregs contain an inconsistent number of qubits (${NUM_A} and ${NUM_B}) and cannot be mixed."; @@ -3230,19 +3244,6 @@ void validate_controlStates(int* states, int numCtrls, const char* caller) { -/* - * ROTATION PARAMETERS - */ - -void validate_rotationAxisNotZeroVector(qreal x, qreal y, qreal z, const char* caller) { - - qreal norm = pow(x,2) + pow(y,2) + pow(z,2); - - assertThat(norm > global_validationEpsilon, report::ROTATION_AXIS_VECTOR_IS_ZERO, caller); -} - - - /* * MEASUREMENT PARAMETERS */ @@ -3299,6 +3300,42 @@ void validate_measurementOutcomesFitInGpuMem(Qureg qureg, int numQubits, const c +/* + * MISC GATE PARAMETERS + */ + +void validate_rotationAxisNotZeroVector(qreal x, qreal y, qreal z, const char* caller) { + + qreal norm = pow(x,2) + pow(y,2) + pow(z,2); + + assertThat(norm > global_validationEpsilon, report::ROTATION_AXIS_VECTOR_IS_ZERO, caller); +} + +void validate_mixedAmpsFitInNode(Qureg qureg, int numTargets, const char* caller) { + + // only relevant to distributed quregs + if (!qureg.isDistributed) + return; + + qindex numTargAmps = powerOf2(numTargets * (qureg.isDensityMatrix? 2:1)); + + tokenSubs vars = { + {"${NUM_TARGS}", numTargets}, + {"${NUM_TARG_AMPS}", numTargAmps}, + {"${NUM_NODES}", qureg.numNodes}, + {"${NUM_QUREG_QUBITS}", qureg.numQubits}, + {"${NUM_QUREG_AMPS_PER_NODE}", qureg.numAmpsPerNode} + }; + + string msg = (qureg.isDensityMatrix)? + report::CANNOT_FIT_MIXED_DENSMATR_AMPS_INTO_SINGLE_NODE: + report::CANNOT_FIT_MIXED_STATEVEC_AMPS_INTO_SINGLE_NODE; + + assertThat(qureg.numAmpsPerNode >= numTargAmps, msg, vars, caller); +} + + + /* * CHANNEL PARAMETERS */ diff --git a/quest/src/core/validation.hpp b/quest/src/core/validation.hpp index ca898617..76d26856 100644 --- a/quest/src/core/validation.hpp +++ b/quest/src/core/validation.hpp @@ -344,14 +344,6 @@ void validate_controlsAndTwoTargets(Qureg qureg, int* ctrls, int numCtrls, int t -/* - * ROTATION PARAMETERS - */ - -void validate_rotationAxisNotZeroVector(qreal x, qreal y, qreal z, const char* caller); - - - /* * MEASUREMENT PARAMETERS */ @@ -368,6 +360,16 @@ void validate_measurementOutcomesFitInGpuMem(Qureg qureg, int numQubits, const c +/* + * MISC GATE PARAMETERS + */ + +void validate_rotationAxisNotZeroVector(qreal x, qreal y, qreal z, const char* caller); + +void validate_mixedAmpsFitInNode(Qureg qureg, int numTargets, const char* caller); + + + /* * DECOHERENCE */ @@ -427,6 +429,7 @@ void validate_numInitRandomPureStates(qindex numPureStates, const char* caller) void validate_expecValIsReal(qcomp value, bool isDensMatr, const char* caller); + /* * FILE IO */ From 3b1150abede7e93f2c615ac245bbba1022596c0f Mon Sep 17 00:00:00 2001 From: Tyson Jones Date: Tue, 17 Dec 2024 18:05:20 +0100 Subject: [PATCH 4/7] patched bug in many-target general matrix gates --- quest/src/core/localiser.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/quest/src/core/localiser.cpp b/quest/src/core/localiser.cpp index 208995bd..339b2d81 100644 --- a/quest/src/core/localiser.cpp +++ b/quest/src/core/localiser.cpp @@ -803,9 +803,15 @@ void anyCtrlMultiSwapBetweenPrefixAndSuffix(Qureg qureg, vector ctrls, vect // a communicator which may be inelegant alongside our own distribution scheme. // perform necessary swaps to move all targets into suffix, each of which invokes communication - for (size_t i=0; i Date: Sun, 22 Dec 2024 15:49:53 +0100 Subject: [PATCH 5/7] avoid overwriting .isCPTP when eps=0 --- quest/src/core/errors.cpp | 6 +++++ quest/src/core/errors.hpp | 2 ++ quest/src/core/utilities.cpp | 25 ++++-------------- quest/src/core/validation.cpp | 49 ++++++++++++++++++++++------------- quest/src/core/validation.hpp | 2 +- 5 files changed, 45 insertions(+), 39 deletions(-) diff --git a/quest/src/core/errors.cpp b/quest/src/core/errors.cpp index c9985709..8347e202 100644 --- a/quest/src/core/errors.cpp +++ b/quest/src/core/errors.cpp @@ -684,6 +684,12 @@ void assert_utilsGivenDensMatr(Qureg qureg) { raiseInternalError("A utility function was given a statevector where a density matrix was expected."); } +void assert_utilsGivenNonZeroEpsilon(qreal eps) { + + if (eps == 0) + raiseInternalError("A utility function (isUnitary, isHermitian, isCPTP) received an epsilon of zero, which should have precluded it being called."); +} + /* diff --git a/quest/src/core/errors.hpp b/quest/src/core/errors.hpp index 215746be..109911c1 100644 --- a/quest/src/core/errors.hpp +++ b/quest/src/core/errors.hpp @@ -290,6 +290,8 @@ void assert_utilsGivenStateVec(Qureg qureg); void assert_utilsGivenDensMatr(Qureg qureg); +void assert_utilsGivenNonZeroEpsilon(qreal eps); + /* diff --git a/quest/src/core/utilities.cpp b/quest/src/core/utilities.cpp index 881a3760..19d9ede0 100644 --- a/quest/src/core/utilities.cpp +++ b/quest/src/core/utilities.cpp @@ -318,10 +318,7 @@ void util_setConj(DiagMatr matrix) { // type T can be qcomp** or qcomp*[] template bool isUnitary(T elems, qindex dim, qreal eps) { - - // skip expensive unitarity check if eps is infinite (encoded by 0) - if (eps == 0) - return true; + assert_utilsGivenNonZeroEpsilon(eps); qreal epsSq = eps * eps; @@ -347,10 +344,7 @@ bool isUnitary(T elems, qindex dim, qreal eps) { // diagonal version doesn't need templating because array decays to pointer, yay! bool isUnitary(qcomp* diags, qindex dim, qreal eps) { - - // skip expensive unitarity check if eps is infinite (encoded by 0) - if (eps == 0) - return true; + assert_utilsGivenNonZeroEpsilon(eps); // check every element has unit magnitude for (qindex i=0; i bool isHermitian(T elems, qindex dim, qreal eps) { - - // skip expensive Hermiticity check if eps is infinite (encoded by 0) - if (eps == 0) - return true; + assert_utilsGivenNonZeroEpsilon(eps); qreal epsSq = eps * eps; @@ -444,10 +435,7 @@ bool isHermitian(T elems, qindex dim, qreal eps) { // diagonal version doesn't need templating because array decays to pointer, yay! bool isHermitian(qcomp* diags, qindex dim, qreal eps) { - - // skip expensive Hermiticity check if eps is infinite (encoded by 0) - if (eps == 0) - return true; + assert_utilsGivenNonZeroEpsilon(eps); // check every element has a zero (or void assertMatrixIsUnitary(T matr, const char* caller) { - // avoid expensive unitarity check if validation is anyway disabled - if (!global_isValidationEnabled) + // avoid expensive unitarity check (and do not overwrite .isUnitary) if validation is anyway disabled + if (isNumericalValidationDisabled()) return; // unitarity is determined differently depending on matrix type @@ -2150,8 +2159,8 @@ void ensureMatrHermiticityIsKnown(T matr) { template void assertMatrixIsHermitian(T matr, const char* caller) { - // avoid expensive unitarity check if validation is anyway disabled - if (!global_isValidationEnabled) + // avoid expensive unitarity check (and do not overwrite .isHermitian) if validation is anyway disabled + if (isNumericalValidationDisabled()) return; // unitarity is determined differently depending on matrix type @@ -2724,8 +2733,8 @@ void validate_krausMapIsCPTP(KrausMap map, const char* caller) { validate_krausMapFields(map, caller); validate_krausMapIsSynced(map, caller); - // avoid expensive CPTP check if validation is anyway disabled - if (!global_isValidationEnabled) + // avoid expensive CPTP check (and do not overwrite .isCPTP) if validation is anyway disabled + if (isNumericalValidationDisabled()) return; // evaluate CPTPness if it isn't already known @@ -2973,6 +2982,10 @@ void validate_pauliStrSumFields(PauliStrSum sum, const char* caller) { void valdidate_pauliStrSumIsHermitian(PauliStrSum sum, const char* caller) { + // avoid expensive hermiticity check (and do not overwrite .isHermitian) if validation is anyway disabled + if (isNumericalValidationDisabled()) + return; + // ensure hermiticity is known (if not; compute it) if (*(sum.isHermitian) == validate_STRUCT_PROPERTY_UNKNOWN_FLAG) *(sum.isHermitian) = util_isHermitian(sum, global_validationEpsilon); diff --git a/quest/src/core/validation.hpp b/quest/src/core/validation.hpp index 76d26856..d2e17dbe 100644 --- a/quest/src/core/validation.hpp +++ b/quest/src/core/validation.hpp @@ -22,7 +22,7 @@ using std::string; /* - * MATRIX ATTRIBUTE FLAGS + * PLACEHOLDER STRUCT FIELDS */ const int validate_STRUCT_PROPERTY_UNKNOWN_FLAG = -1; From c04cc436daa6f14827dffcc3c25ec6c133f8c166 Mon Sep 17 00:00:00 2001 From: Tyson Jones Date: Sun, 12 Jan 2025 13:03:54 +0100 Subject: [PATCH 6/7] patched deprecated createPauliStrSum --- quest/include/deprecated.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/quest/include/deprecated.h b/quest/include/deprecated.h index 8a4c01a1..91aff3fa 100644 --- a/quest/include/deprecated.h +++ b/quest/include/deprecated.h @@ -964,9 +964,11 @@ static inline PauliStrSum _createPauliStrSumFromCodes(int numQubits, _NoWarnPaul PauliStr* strings = (PauliStr*) malloc(numTerms * sizeof *strings); for (int i=0; i Date: Mon, 20 Jan 2025 15:32:26 +0100 Subject: [PATCH 7/7] patched two-qubit depolarising as per https://github.com/TysonRayJones/Distributed-Full-State-Algorithms/issues/7 --- quest/src/api/decoherence.cpp | 2 +- quest/src/core/accelerator.cpp | 6 --- quest/src/core/accelerator.hpp | 1 - quest/src/core/localiser.cpp | 28 +++++------ quest/src/core/utilities.cpp | 9 ++++ quest/src/core/utilities.hpp | 4 +- quest/src/cpu/cpu_subroutines.cpp | 77 +++++++++---------------------- quest/src/cpu/cpu_subroutines.hpp | 1 - quest/src/gpu/gpu_kernels.cuh | 47 ++++++------------- quest/src/gpu/gpu_subroutines.cpp | 39 +++++----------- quest/src/gpu/gpu_subroutines.hpp | 1 - 11 files changed, 74 insertions(+), 141 deletions(-) diff --git a/quest/src/api/decoherence.cpp b/quest/src/api/decoherence.cpp index d887e270..35f63fa8 100644 --- a/quest/src/api/decoherence.cpp +++ b/quest/src/api/decoherence.cpp @@ -111,8 +111,8 @@ void mixPaulis(Qureg qureg, int qubit, qreal probX, qreal probY, qreal probZ) { void mixKrausMap(Qureg qureg, int* qubits, int numQubits, KrausMap map) { validate_quregFields(qureg, __func__); validate_quregIsDensityMatrix(qureg, __func__); - validate_mixedAmpsFitInNode(qureg, numQubits, __func__); validate_targets(qureg, qubits, numQubits, __func__); + validate_mixedAmpsFitInNode(qureg, numQubits, __func__); validate_krausMapIsCPTP(map, __func__); // also checks fields and is-sync validate_krausMapMatchesTargets(map, numQubits, __func__); diff --git a/quest/src/core/accelerator.cpp b/quest/src/core/accelerator.cpp index 6d7f0e51..a1443e2d 100644 --- a/quest/src/core/accelerator.cpp +++ b/quest/src/core/accelerator.cpp @@ -747,12 +747,6 @@ void accel_densmatr_twoQubitDepolarising_subF(Qureg qureg, int qubit1, int qubit gpu_densmatr_twoQubitDepolarising_subF(qureg, qubit1, qubit2, prob): cpu_densmatr_twoQubitDepolarising_subF(qureg, qubit1, qubit2, prob); } -void accel_densmatr_twoQubitDepolarising_subG(Qureg qureg, int qubit1, int qubit2, qreal prob) { - - (qureg.isGpuAccelerated)? - gpu_densmatr_twoQubitDepolarising_subG(qureg, qubit1, qubit2, prob): - cpu_densmatr_twoQubitDepolarising_subG(qureg, qubit1, qubit2, prob); -} diff --git a/quest/src/core/accelerator.hpp b/quest/src/core/accelerator.hpp index d389f59e..aeafe66a 100644 --- a/quest/src/core/accelerator.hpp +++ b/quest/src/core/accelerator.hpp @@ -237,7 +237,6 @@ void accel_densmatr_twoQubitDepolarising_subC(Qureg qureg, int qubit1, int qubit void accel_densmatr_twoQubitDepolarising_subD(Qureg qureg, int qubit1, int qubit2, qreal prob); void accel_densmatr_twoQubitDepolarising_subE(Qureg qureg, int qubit1, int qubit2, qreal prob); void accel_densmatr_twoQubitDepolarising_subF(Qureg qureg, int qubit1, int qubit2, qreal prob); -void accel_densmatr_twoQubitDepolarising_subG(Qureg qureg, int qubit1, int qubit2, qreal prob); void accel_densmatr_oneQubitPauliChannel_subA(Qureg qureg, int qubit, qreal pI, qreal pX, qreal pY, qreal pZ); void accel_densmatr_oneQubitPauliChannel_subB(Qureg qureg, int qubit, qreal pI, qreal pX, qreal pY, qreal pZ); diff --git a/quest/src/core/localiser.cpp b/quest/src/core/localiser.cpp index 339b2d81..7c46f5cb 100644 --- a/quest/src/core/localiser.cpp +++ b/quest/src/core/localiser.cpp @@ -1407,18 +1407,25 @@ void twoQubitDepolarisingOnPrefixAndPrefix(Qureg qureg, int ketQb1, int ketQb2, int braBit1 = util_getRankBitOfBraQubit(ketQb1, qureg); int braBit2 = util_getRankBitOfBraQubit(ketQb2, qureg); - // scale 25% of (non-communicated) amps + // pack unscaled amps before subsequent scaling + qindex numPacked = accel_statevec_packAmpsIntoBuffer(qureg, {ketQb1,ketQb2}, {braBit1,braBit2}); + + // scale all amps accel_densmatr_twoQubitDepolarising_subE(qureg, ketQb1, ketQb2, prob); - // pack and swap 25% of buffer, and use it to modify 25% of local amps + // swap the buffer with 3 other nodes to update local amps int pairRank1 = util_getRankWithBraQubitFlipped(ketQb1, qureg); - exchangeAmpsToBuffersWhereQubitsAreInStates(qureg, pairRank1, {ketQb1,ketQb2}, {braBit1,braBit2}); + int pairRank2 = util_getRankWithBraQubitFlipped(ketQb2, qureg); + int pairRank3 = util_getRankWithBraQubitsFlipped({ketQb1,ketQb2}, qureg); + + comm_exchangeSubBuffers(qureg, numPacked, pairRank1); accel_densmatr_twoQubitDepolarising_subF(qureg, ketQb1, ketQb2, prob); - // pack and swap another 25% of buffer (we could pack during subE, but we choose not to) - int pairRank2 = util_getRankWithBraQubitFlipped(ketQb2, qureg); - exchangeAmpsToBuffersWhereQubitsAreInStates(qureg, pairRank2, {ketQb1,ketQb2}, {braBit1,braBit2}); - accel_densmatr_twoQubitDepolarising_subG(qureg, ketQb1, ketQb2, prob); + comm_exchangeSubBuffers(qureg, numPacked, pairRank2); + accel_densmatr_twoQubitDepolarising_subF(qureg, ketQb1, ketQb2, prob); + + comm_exchangeSubBuffers(qureg, numPacked, pairRank3); + accel_densmatr_twoQubitDepolarising_subF(qureg, ketQb1, ketQb2, prob); } @@ -1433,13 +1440,6 @@ void localiser_densmatr_twoQubitDepolarising(Qureg qureg, int qubit1, int qubit2 bool comm1 = doesChannelRequireComm(qureg, qubit1); bool comm2 = doesChannelRequireComm(qureg, qubit2); - - - // TODO: - // this is bugged, even when non-distributed!!! - - - if (comm2 && comm1) twoQubitDepolarisingOnPrefixAndPrefix(qureg, qubit1, qubit2, prob); if (comm2 && !comm1) diff --git a/quest/src/core/utilities.cpp b/quest/src/core/utilities.cpp index 19d9ede0..f42cce80 100644 --- a/quest/src/core/utilities.cpp +++ b/quest/src/core/utilities.cpp @@ -121,6 +121,15 @@ int util_getRankWithBraQubitFlipped(int ketQubit, Qureg qureg) { return rankFlip; } +int util_getRankWithBraQubitsFlipped(vector ketQubits, Qureg qureg) { + + int rank = qureg.rank; + for (int qubit : ketQubits) + rank = flipBit(rank, util_getPrefixBraInd(qubit, qureg)); + + return rank; +} + vector util_getBraQubits(vector ketQubits, Qureg qureg) { vector braInds(0); diff --git a/quest/src/core/utilities.hpp b/quest/src/core/utilities.hpp index 0fe1ad97..f7c70b12 100644 --- a/quest/src/core/utilities.hpp +++ b/quest/src/core/utilities.hpp @@ -40,9 +40,11 @@ int util_getRankBitOfQubit(int ketQubit, Qureg qureg); int util_getRankBitOfBraQubit(int ketQubit, Qureg qureg); int util_getRankWithQubitFlipped(int ketQubit, Qureg qureg); -int util_getRankWithBraQubitFlipped(int ketQubit, Qureg qureg); int util_getRankWithQubitsFlipped(vector prefixQubits, Qureg qureg); +int util_getRankWithBraQubitFlipped(int ketQubit, Qureg qureg); +int util_getRankWithBraQubitsFlipped(vector ketQubits, Qureg qureg); + vector util_getBraQubits(vector ketQubits, Qureg qureg); vector util_getVector(int* qubits, int numQubits); diff --git a/quest/src/cpu/cpu_subroutines.cpp b/quest/src/cpu/cpu_subroutines.cpp index 4a046b45..237209ce 100644 --- a/quest/src/cpu/cpu_subroutines.cpp +++ b/quest/src/cpu/cpu_subroutines.cpp @@ -1083,11 +1083,11 @@ void cpu_densmatr_twoQubitDepolarising_subA(Qureg qureg, int ketQb1, int ketQb2, for (qindex n=0; n c1*amp + c2(sum of other three amps) auto factors = util_getTwoQubitDepolarisingFactors(prob); auto c1 = factors.c1; auto c2 = factors.c2; + // below, we compute term = (sum of all four amps) for brevity, so adjust c1 + c1 -= c2; + // for brevity qcomp* amps = qureg.cpuAmps; @@ -1151,7 +1155,7 @@ void cpu_densmatr_twoQubitDepolarising_subC(Qureg qureg, int ketQb1, int ketQb2, // decide whether or not to modify nth local bool flag1 = getBit(n, ketQb1) == getBit(n, braQb1); bool flag2 = getBit(n, ketQb2) == braBit2; - bool mod = !(flag1 & flag2); + bool mod = !(flag1 & flag2); // scale amp by 1 or (1 + c3) qureg.cpuAmps[n] *= 1 + c3 * mod; @@ -1197,30 +1201,26 @@ void cpu_densmatr_twoQubitDepolarising_subD(Qureg qureg, int ketQb1, int ketQb2, void cpu_densmatr_twoQubitDepolarising_subE(Qureg qureg, int ketQb1, int ketQb2, qreal prob) { - // scale 25% of amps but iterate all + // all amplitudes are scaled; 25% by c1 and 75% by 1 + c3 qindex numIts = qureg.numAmpsPerNode; + auto factors = util_getTwoQubitDepolarisingFactors(prob); + qreal fac0 = 1 + factors.c3; + qreal fac1 = factors.c1 - fac0; + int braBit1 = util_getRankBitOfBraQubit(ketQb1, qureg); int braBit2 = util_getRankBitOfBraQubit(ketQb2, qureg); - qreal c3 = util_getTwoQubitDepolarisingFactors(prob).c3; - - // TODO: - // are we really inefficiently enumerating all amps and applying a non-unity - // factor to only 25%?! Is this because we do not know braBit2 and ergo - // cannot be sure a direct enumeration is accessing indicies in a monotonically - // increasing order? Can that really outweigh a 3x slowdown?! Test and fix! - #pragma omp parallel for if(qureg.isMultithreaded) for (qindex n=0; n>> ( toCuQcomps(qureg.gpuAmps), numThreads, - ketQb1, ketQb2, braQb1, braQb2, factors.c1, factors.c2 + ketQb1, ketQb2, braQb1, braQb2, altc1, factors.c2 ); #else @@ -1049,11 +1052,14 @@ void gpu_densmatr_twoQubitDepolarising_subE(Qureg qureg, int ketQb1, int ketQb2, int braBit1 = util_getRankBitOfBraQubit(ketQb1, qureg); int braBit2 = util_getRankBitOfBraQubit(ketQb2, qureg); - auto c3 = util_getTwoQubitDepolarisingFactors(prob).c3; + + auto factors = util_getTwoQubitDepolarisingFactors(prob); + qreal fac0 = 1 + factors.c3; + qreal fac1 = factors.c1 - fac0; kernel_densmatr_twoQubitDepolarising_subE <<>> ( toCuQcomps(qureg.gpuAmps), numThreads, - ketQb1, ketQb2, braBit1, braBit2, c3 + ketQb1, ketQb2, braBit1, braBit2, fac0, fac1 ); #else @@ -1072,34 +1078,11 @@ void gpu_densmatr_twoQubitDepolarising_subF(Qureg qureg, int ketQb1, int ketQb2, int braBit1 = util_getRankBitOfBraQubit(ketQb1, qureg); int braBit2 = util_getRankBitOfBraQubit(ketQb2, qureg); - auto factors = util_getTwoQubitDepolarisingFactors(prob); + auto c2 = util_getTwoQubitDepolarisingFactors(prob).c2; kernel_densmatr_twoQubitDepolarising_subF <<>> ( toCuQcomps(qureg.gpuAmps), &toCuQcomps(qureg.gpuCommBuffer)[offset], numThreads, - ketQb1, ketQb2, braBit1, braBit2, factors.c1, factors.c2 - ); - -#else - error_gpuSimButGpuNotCompiled(); -#endif -} - - -void gpu_densmatr_twoQubitDepolarising_subG(Qureg qureg, int ketQb1, int ketQb2, qreal prob) { - -#if COMPILE_CUDA || COMPILE_CUQUANTUM - - qindex numThreads = qureg.numAmpsPerNode / 4; - qindex numBlocks = getNumBlocks(numThreads); - qindex offset = getBufferRecvInd(); - - int braBit1 = util_getRankBitOfBraQubit(ketQb1, qureg); - int braBit2 = util_getRankBitOfBraQubit(ketQb2, qureg); - auto factors = util_getTwoQubitDepolarisingFactors(prob); - - kernel_densmatr_twoQubitDepolarising_subG <<>> ( - toCuQcomps(qureg.gpuAmps), &toCuQcomps(qureg.gpuCommBuffer)[offset], numThreads, - ketQb1, ketQb2, braBit1, braBit2, factors.c1, factors.c2 + ketQb1, ketQb2, braBit1, braBit2, c2 ); #else diff --git a/quest/src/gpu/gpu_subroutines.hpp b/quest/src/gpu/gpu_subroutines.hpp index 9f773ba8..1b042749 100644 --- a/quest/src/gpu/gpu_subroutines.hpp +++ b/quest/src/gpu/gpu_subroutines.hpp @@ -108,7 +108,6 @@ void gpu_densmatr_twoQubitDepolarising_subC(Qureg qureg, int qubit1, int qubit2, void gpu_densmatr_twoQubitDepolarising_subD(Qureg qureg, int qubit1, int qubit2, qreal prob); void gpu_densmatr_twoQubitDepolarising_subE(Qureg qureg, int qubit1, int qubit2, qreal prob); void gpu_densmatr_twoQubitDepolarising_subF(Qureg qureg, int qubit1, int qubit2, qreal prob); -void gpu_densmatr_twoQubitDepolarising_subG(Qureg qureg, int qubit1, int qubit2, qreal prob); void gpu_densmatr_oneQubitPauliChannel_subA(Qureg qureg, int ketQubit, qreal pI, qreal pX, qreal pY, qreal pZ); void gpu_densmatr_oneQubitPauliChannel_subB(Qureg qureg, int ketQubit, qreal pI, qreal pX, qreal pY, qreal pZ);