Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

misc bug patches #515

Merged
merged 7 commits into from
Jan 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions quest/include/deprecated.h
Original file line number Diff line number Diff line change
Expand Up @@ -964,9 +964,11 @@ static inline PauliStrSum _createPauliStrSumFromCodes(int numQubits, _NoWarnPaul

PauliStr* strings = (PauliStr*) malloc(numTerms * sizeof *strings);
for (int i=0; i<numTerms; i++) {
int codes[100];

int codes[100]; // assumes numQubits<=100
for (int j=0; j<numQubits && j<100; j++)
codes[i] = (int) allPauliCodes[i*numQubits+j];
codes[j] = (int) allPauliCodes[i*numQubits+j];

strings[i] = getPauliStr(codes, targs, numQubits);
}

Expand Down
4 changes: 2 additions & 2 deletions quest/src/api/decoherence.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ void mixKrausMap(Qureg qureg, int* qubits, int numQubits, KrausMap map) {
validate_quregFields(qureg, __func__);
validate_quregIsDensityMatrix(qureg, __func__);
validate_targets(qureg, qubits, numQubits, __func__);
validate_mixedAmpsFitInNode(qureg, numQubits, __func__);
validate_krausMapIsCPTP(map, __func__); // also checks fields and is-sync
validate_krausMapMatchesTargets(map, numQubits, __func__);

Expand All @@ -123,8 +124,7 @@ void mixQureg(Qureg outQureg, Qureg inQureg, qreal inProb) {
validate_quregFields(outQureg, __func__);
validate_quregFields(inQureg, __func__);
validate_probability(inProb, __func__);
validate_quregIsDensityMatrix(outQureg, __func__);
validate_quregsCanBeMixed(outQureg, inQureg, __func__);
validate_quregsCanBeMixed(outQureg, inQureg, __func__); // checks outQureg is densmatr

qreal outProb = 1 - inProb;
localiser_densmatr_mixQureg(outProb, outQureg, inProb, inQureg);
Expand Down
31 changes: 20 additions & 11 deletions quest/src/api/operations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ void validateAndApplyAnyCtrlAnyTargUnitaryMatrix(Qureg qureg, int* ctrls, int* s
validate_controlsAndTargets(qureg, ctrls, numCtrls, targs, numTargs, caller);
validate_controlStates(states, numCtrls, caller);
validate_matrixDimMatchesTargets(matr, numTargs, caller); // also checks fields and is-synced
validate_mixedAmpsFitInNode(qureg, numTargs, caller);
validate_matrixIsUnitary(matr, caller); // harmlessly rechecks fields and is-synced

auto ctrlVec = util_getVector(ctrls, numCtrls);
Expand Down Expand Up @@ -117,6 +118,7 @@ void multiplyCompMatr2(Qureg qureg, int target1, int target2, CompMatr2 matrix)
validate_quregFields(qureg, __func__);
validate_twoTargets(qureg, target1, target2, __func__);
validate_matrixFields(matrix, __func__); // matrix can be non-unitary
validate_mixedAmpsFitInNode(qureg, 2, __func__);

bool conj = false;
localiser_statevec_anyCtrlTwoTargDenseMatr(qureg, {}, {}, target1, target2, matrix, conj);
Expand Down Expand Up @@ -156,6 +158,7 @@ void multiplyCompMatr(Qureg qureg, int* targets, int numTargets, CompMatr matrix
validate_quregFields(qureg, __func__);
validate_targets(qureg, targets, numTargets, __func__);
validate_matrixDimMatchesTargets(matrix, numTargets, __func__); // also validates fields and is-sync, but not unitarity
validate_mixedAmpsFitInNode(qureg, numTargets, __func__);

bool conj = false;
localiser_statevec_anyCtrlAnyTargDenseMatr(qureg, {}, {}, util_getVector(targets, numTargets), matrix, conj);
Expand Down Expand Up @@ -376,8 +379,8 @@ void multiplyFullStateDiagMatr(Qureg qureg, FullStateDiagMatr matrix) {
bool onlyMultiply = true;
qcomp exponent = qcomp(1, 0);
(qureg.isDensityMatrix)?
localiser_statevec_allTargDiagMatr(qureg, matrix, exponent) :
localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, onlyMultiply);
localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, onlyMultiply):
localiser_statevec_allTargDiagMatr(qureg, matrix, exponent);
}

void multiplyFullStateDiagMatrPower(Qureg qureg, FullStateDiagMatr matrix, qcomp exponent) {
Expand All @@ -387,8 +390,8 @@ void multiplyFullStateDiagMatrPower(Qureg qureg, FullStateDiagMatr matrix, qcomp

bool onlyMultiply = true;
(qureg.isDensityMatrix)?
localiser_statevec_allTargDiagMatr(qureg, matrix, exponent) :
localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, onlyMultiply);
localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, onlyMultiply):
localiser_statevec_allTargDiagMatr(qureg, matrix, exponent);
}

void applyFullStateDiagMatr(Qureg qureg, FullStateDiagMatr matrix) {
Expand All @@ -400,8 +403,8 @@ void applyFullStateDiagMatr(Qureg qureg, FullStateDiagMatr matrix) {
bool onlyMultiply = false;
qcomp exponent = qcomp(1, 0);
(qureg.isDensityMatrix)?
localiser_statevec_allTargDiagMatr(qureg, matrix, exponent) :
localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, onlyMultiply);
localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, onlyMultiply):
localiser_statevec_allTargDiagMatr(qureg, matrix, exponent);
}

void applyFullStateDiagMatrPower(Qureg qureg, FullStateDiagMatr matrix, qcomp exponent) {
Expand All @@ -412,8 +415,8 @@ void applyFullStateDiagMatrPower(Qureg qureg, FullStateDiagMatr matrix, qcomp ex

bool onlyMultiply = false;
(qureg.isDensityMatrix)?
localiser_statevec_allTargDiagMatr(qureg, matrix, exponent) :
localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, onlyMultiply);
localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, onlyMultiply):
localiser_statevec_allTargDiagMatr(qureg, matrix, exponent);
}


Expand Down Expand Up @@ -617,9 +620,14 @@ void applyMultiStateControlledSqrtSwap(Qureg qureg, int* controls, int* states,
validate_controlsAndTwoTargets(qureg, controls, numControls, target1, target2, __func__);
validate_controlStates(states, numControls, __func__); // permits states==nullptr

// this is likely suboptimal, and there must exist a more
// efficient bespoke strategy for sqrt-SWAP, although given
// it is a little esoteric, optimisation is not worthwhile
// TODO:
// this function exacts sqrtSwap as a dense 2-qubit matrix,
// where as bespoke communication and simulation strategy is
// clearly possible which we have not supported because the gate
// is somewhat esoteric. As such, we must validate mixed-amps fit

validate_mixedAmpsFitInNode(qureg, 2, __func__); // to throw SqrtSwap error, not generic CompMatr2 error

CompMatr2 matr = getCompMatr2({
{1, 0, 0, 0},
{0, .5+.5_i, .5-.5_i, 0},
Expand Down Expand Up @@ -1224,6 +1232,7 @@ void applySuperOp(Qureg qureg, SuperOp superop, int* targets, int numTargets) {
validate_superOpFields(superop, __func__);
validate_superOpIsSynced(superop, __func__);
validate_superOpDimMatchesTargs(superop, numTargets, __func__);
validate_mixedAmpsFitInNode(qureg, numTargets, __func__);

localiser_densmatr_superoperator(qureg, superop, util_getVector(targets, numTargets));
}
Expand Down
3 changes: 0 additions & 3 deletions quest/src/api/qureg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,6 @@ Qureg validateAndCreateCustomQureg(int numQubits, int isDensMatr, int useDistrib
// automatically overwrite distrib, GPU, and multithread fields which were left as modeflag::USE_AUTO
autodep_chooseQuregDeployment(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, env);

// throw error if the user had forced multithreading but GPU accel was auto-chosen
validate_newQuregNotBothMultithreadedAndGpuAccel(useGpuAccel, useMultithread, caller);

Qureg qureg = qureg_populateNonHeapFields(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread);

// always allocate CPU memory
Expand Down
6 changes: 0 additions & 6 deletions quest/src/core/accelerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -747,12 +747,6 @@ void accel_densmatr_twoQubitDepolarising_subF(Qureg qureg, int qubit1, int qubit
gpu_densmatr_twoQubitDepolarising_subF(qureg, qubit1, qubit2, prob):
cpu_densmatr_twoQubitDepolarising_subF(qureg, qubit1, qubit2, prob);
}
void accel_densmatr_twoQubitDepolarising_subG(Qureg qureg, int qubit1, int qubit2, qreal prob) {

(qureg.isGpuAccelerated)?
gpu_densmatr_twoQubitDepolarising_subG(qureg, qubit1, qubit2, prob):
cpu_densmatr_twoQubitDepolarising_subG(qureg, qubit1, qubit2, prob);
}



Expand Down
1 change: 0 additions & 1 deletion quest/src/core/accelerator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,6 @@ void accel_densmatr_twoQubitDepolarising_subC(Qureg qureg, int qubit1, int qubit
void accel_densmatr_twoQubitDepolarising_subD(Qureg qureg, int qubit1, int qubit2, qreal prob);
void accel_densmatr_twoQubitDepolarising_subE(Qureg qureg, int qubit1, int qubit2, qreal prob);
void accel_densmatr_twoQubitDepolarising_subF(Qureg qureg, int qubit1, int qubit2, qreal prob);
void accel_densmatr_twoQubitDepolarising_subG(Qureg qureg, int qubit1, int qubit2, qreal prob);

void accel_densmatr_oneQubitPauliChannel_subA(Qureg qureg, int qubit, qreal pI, qreal pX, qreal pY, qreal pZ);
void accel_densmatr_oneQubitPauliChannel_subB(Qureg qureg, int qubit, qreal pI, qreal pX, qreal pY, qreal pZ);
Expand Down
40 changes: 20 additions & 20 deletions quest/src/core/autodeployer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ void chooseWhetherToDistributeQureg(int numQubits, int isDensMatr, int &useDistr
// it's ok if we cannot query RAM; if we'd have exceeded it, it's likely we'll exceed auto-threshold and will still distribute
} catch (mem::COULD_NOT_QUERY_RAM &e) {}

// force distribution if GPU deployment is possible but we exceed local VRAM
// force distribution if GPU deployment is available but we exceed local VRAM;
// this is preferable over falling back to CPU-only which would be astonishingly slow
if (useGpuAccel == 1 || useGpuAccel == modeflag::USE_AUTO) {
size_t localGpuMem = gpu_getCurrentAvailableMemoryInBytes();
if (!mem_canQuregFitInMemory(numQubits, isDensMatr, 1, localGpuMem)) {
Expand All @@ -76,47 +77,44 @@ void chooseWhetherToDistributeQureg(int numQubits, int isDensMatr, int &useDistr
}
}

// by now, we know that Qureg can definitely fit into a single GPU, or principally fit into RAM,
// but we may still wish to distribute it so that multiple Quregs don't choke up memory.
// to reach here, we know that Qureg can fit into the remaining memory of a single GPU, or principally
// fit into RAM, but we may still wish to distribute for improved parallelisation and to avoid memory saturation
int effectiveNumQubitsPerNode = mem_getEffectiveNumStateVecQubitsPerNode(numQubits, isDensMatr, numEnvNodes);
useDistrib = (effectiveNumQubitsPerNode >= MIN_NUM_LOCAL_QUBITS_FOR_AUTO_QUREG_DISTRIBUTION);
}


void chooseWhetherToGpuAccelQureg(int numQubits, int isDensMatr, int useDistrib, int &useGpuAccel, int numQuregNodes) {
void chooseWhetherToGpuAccelQureg(int numQubits, int isDensMatr, int &useGpuAccel, int numQuregNodes) {

// if the flag is already set, don't change it
if (useGpuAccel != modeflag::USE_AUTO)
return;

// determine the 'effective number of qubits' each GPU would have to simulate, if distributed
// determine the 'effective number of qubits' each GPU would have to simulate, if distributed...
int effectiveNumQubits = mem_getEffectiveNumStateVecQubitsPerNode(numQubits, isDensMatr, numQuregNodes);

// choose to GPU accelerate only if that's not too few
// and choose to GPU accelerate only if that's not too few
useGpuAccel = (effectiveNumQubits >= MIN_NUM_LOCAL_QUBITS_FOR_AUTO_QUREG_GPU_ACCELERATION);

// notice there was no automatic disabling of GPU acceleration in the scenario that the local
// partition exceeded GPU memory. This is because such a scenario would be catastrophically
// slow and astonish users by leaving GPUs idle in intensive simulation. Instead, we auto-deploy
// to GPU and subsequent validation will notice we exceeded GPU memory.
// to GPU anyway and subsequent validation will notice we exceeded GPU memory and report an error.
}


void chooseWhetherToMultithreadQureg(int numQubits, int isDensMatr, int useDistrib, int useGpuAccel, int &useMultithread, int numQuregNodes) {
void chooseWhetherToMultithreadQureg(int numQubits, int isDensMatr, int &useMultithread, int numQuregNodes) {

// if the flag is already set (user-given, or inferred from env), don't change it
if (useMultithread != modeflag::USE_AUTO)
return;

// if GPU-aceleration was chosen, disable auto multithreading...
if (useGpuAccel) {
useMultithread = 0;
return;
}

// otherwise, we're not GPU-accelerating, and should choose to multithread based on Qureg size
// otherwise, choose to multithread based on Qureg size
int effectiveNumQubits = mem_getEffectiveNumStateVecQubitsPerNode(numQubits, isDensMatr, numQuregNodes);
useMultithread = (effectiveNumQubits >= MIN_NUM_LOCAL_QUBITS_FOR_AUTO_QUREG_MULTITHREADING);

// note the qureg may be simultaneously GPU-accelerated and so never use its
// multithreaded CPU routines, except in functions which accept multiple Quregs
}


Expand All @@ -125,8 +123,6 @@ void autodep_chooseQuregDeployment(int numQubits, int isDensMatr, int &useDistri
// preconditions:
// - the given configuration is compatible with env (assured by prior validation)
// - this means no deployment is forced (=1) which is incompatible with env
// - it also means GPU-acceleration and multithreading are not simultaneously forced
// (although they may still be left automatic and need explicit revision)

// disable any automatic deployments not permitted by env (it's gauranteed we never overwrite =1 to =0)
if (!env.isDistributed)
Expand All @@ -141,11 +137,15 @@ void autodep_chooseQuregDeployment(int numQubits, int isDensMatr, int &useDistri
if (env.numNodes == 1)
useDistrib = 0;

// overwrite any auto options (== modeflag::USE_AUTO)
// overwrite useDistrib
chooseWhetherToDistributeQureg(numQubits, isDensMatr, useDistrib, useGpuAccel, env.numNodes);
int numQuregNodes = (useDistrib)? env.numNodes : 1;
chooseWhetherToGpuAccelQureg(numQubits, isDensMatr, useDistrib, useGpuAccel, numQuregNodes);
chooseWhetherToMultithreadQureg(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, numQuregNodes);

// overwrite useGpuAccel
chooseWhetherToGpuAccelQureg(numQubits, isDensMatr, useGpuAccel, numQuregNodes);

// overwrite useMultithread
chooseWhetherToMultithreadQureg(numQubits, isDensMatr, useMultithread, numQuregNodes);
}


Expand Down
8 changes: 7 additions & 1 deletion quest/src/core/errors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ void assert_pairRankIsDistinct(Qureg qureg, int pairRank) {

void assert_bufferSendRecvDoesNotOverlap(qindex sendInd, qindex recvInd, qindex numAmps) {

if (sendInd + numAmps > recvInd)
if (sendInd < recvInd + numAmps)
raiseInternalError("A distributed function attempted to send and receive portions of the buffer which overlapped.");
}

Expand Down Expand Up @@ -684,6 +684,12 @@ void assert_utilsGivenDensMatr(Qureg qureg) {
raiseInternalError("A utility function was given a statevector where a density matrix was expected.");
}

void assert_utilsGivenNonZeroEpsilon(qreal eps) {

if (eps == 0)
raiseInternalError("A utility function (isUnitary, isHermitian, isCPTP) received an epsilon of zero, which should have precluded it being called.");
}



/*
Expand Down
2 changes: 2 additions & 0 deletions quest/src/core/errors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,8 @@ void assert_utilsGivenStateVec(Qureg qureg);

void assert_utilsGivenDensMatr(Qureg qureg);

void assert_utilsGivenNonZeroEpsilon(qreal eps);



/*
Expand Down
46 changes: 34 additions & 12 deletions quest/src/core/localiser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -803,9 +803,15 @@ void anyCtrlMultiSwapBetweenPrefixAndSuffix(Qureg qureg, vector<int> ctrls, vect
// a communicator which may be inelegant alongside our own distribution scheme.

// perform necessary swaps to move all targets into suffix, each of which invokes communication
for (size_t i=0; i<targsA.size(); i++)
if (targsA[i] != targsB[i])
anyCtrlSwapBetweenPrefixAndSuffix(qureg, ctrls, ctrlStates, targsA[i], targsB[i]);
for (size_t i=0; i<targsA.size(); i++) {

if (targsA[i] == targsB[i])
continue;

int suffixTarg = std::min(targsA[i], targsB[i]);
int prefixTarg = std::max(targsA[i], targsB[i]);
anyCtrlSwapBetweenPrefixAndSuffix(qureg, ctrls, ctrlStates, suffixTarg, prefixTarg);
}
}


Expand Down Expand Up @@ -1401,18 +1407,25 @@ void twoQubitDepolarisingOnPrefixAndPrefix(Qureg qureg, int ketQb1, int ketQb2,
int braBit1 = util_getRankBitOfBraQubit(ketQb1, qureg);
int braBit2 = util_getRankBitOfBraQubit(ketQb2, qureg);

// scale 25% of (non-communicated) amps
// pack unscaled amps before subsequent scaling
qindex numPacked = accel_statevec_packAmpsIntoBuffer(qureg, {ketQb1,ketQb2}, {braBit1,braBit2});

// scale all amps
accel_densmatr_twoQubitDepolarising_subE(qureg, ketQb1, ketQb2, prob);

// pack and swap 25% of buffer, and use it to modify 25% of local amps
// swap the buffer with 3 other nodes to update local amps
int pairRank1 = util_getRankWithBraQubitFlipped(ketQb1, qureg);
exchangeAmpsToBuffersWhereQubitsAreInStates(qureg, pairRank1, {ketQb1,ketQb2}, {braBit1,braBit2});
int pairRank2 = util_getRankWithBraQubitFlipped(ketQb2, qureg);
int pairRank3 = util_getRankWithBraQubitsFlipped({ketQb1,ketQb2}, qureg);

comm_exchangeSubBuffers(qureg, numPacked, pairRank1);
accel_densmatr_twoQubitDepolarising_subF(qureg, ketQb1, ketQb2, prob);

// pack and swap another 25% of buffer (we could pack during subE, but we choose not to)
int pairRank2 = util_getRankWithBraQubitFlipped(ketQb2, qureg);
exchangeAmpsToBuffersWhereQubitsAreInStates(qureg, pairRank2, {ketQb1,ketQb2}, {braBit1,braBit2});
accel_densmatr_twoQubitDepolarising_subG(qureg, ketQb1, ketQb2, prob);
comm_exchangeSubBuffers(qureg, numPacked, pairRank2);
accel_densmatr_twoQubitDepolarising_subF(qureg, ketQb1, ketQb2, prob);

comm_exchangeSubBuffers(qureg, numPacked, pairRank3);
accel_densmatr_twoQubitDepolarising_subF(qureg, ketQb1, ketQb2, prob);
}


Expand Down Expand Up @@ -1755,8 +1768,17 @@ qreal localiser_densmatr_calcProbOfMultiQubitOutcome(Qureg qureg, vector<int> qu

if (doAnyLocalStatesHaveQubitValues(qureg, braQubits, outcomes)) {

// such nodes need to know all ket qubits (which are all suffix)
prob += accel_densmatr_calcProbOfMultiQubitOutcome_sub(qureg, qubits, outcomes);
// such nodes need only know the ket qubits/outcomes for which the bra-qubits are in suffix
vector<int> ketQubitsWithBraInSuffix;
vector<int> ketOutcomesWithBraInSuffix;
for (int q=0; q<qubits.size(); q++)
if (util_isBraQubitInSuffix(qubits[q], qureg)) {
ketQubitsWithBraInSuffix.push_back(qubits[q]);
ketOutcomesWithBraInSuffix.push_back(outcomes[q]);
}

prob += accel_densmatr_calcProbOfMultiQubitOutcome_sub(
qureg, ketQubitsWithBraInSuffix, ketOutcomesWithBraInSuffix);
}

// all nodes must sum their probabilities (unless qureg was cloned per-node), for consensus
Expand Down
Loading