diff --git a/.github/workflows/windows-build.yml b/.github/workflows/windows-build.yml
new file mode 100644
index 000000000..74817bf0c
--- /dev/null
+++ b/.github/workflows/windows-build.yml
@@ -0,0 +1,56 @@
+name: Windows build
+
+on:
+  push:
+    branches:
+      - master
+      - develop
+  pull_request:
+    branches:
+      - '**'
+
+jobs:
+
+  gnumake-build:
+    name: GNUMake test of demo on Windows MSVC
+    runs-on: windows-latest
+    strategy:
+      matrix:
+        float-precision: [2]
+
+    steps:
+      - uses: actions/checkout@v2
+      - uses: ilammy/msvc-dev-cmd@v1
+      - name: get example code
+        shell: cmd
+        run: |
+          copy examples\makefile .
+          copy examples\tutorial_example.c .
+      - name: compile example 
+        shell: cmd
+        run: make COMPILER=cl COMPILER_TYPE=MSVC WINDOWS_ARCH=64 EXE=demotest PRECISION=${{matrix.float-precision}}
+      - name: run example
+        shell: cmd
+        run: demotest.exe
+        
+  cmake-nmake-build:
+    name: CMake with NMake test of demo on Windows MSVC
+    runs-on: windows-latest
+    strategy:
+      matrix:
+        float-precision: [2]
+
+    steps:
+      - uses: actions/checkout@v2
+      - uses: ilammy/msvc-dev-cmd@v1
+      - name: make build directory
+        shell: cmd
+        run: mkdir build_dir
+      - name: cmake configure
+        shell: cmd
+        run: cmake .. -G "NMake Makefiles" -DOUTPUT_EXE="demotest.exe" -DPRECISION:STRING=${{matrix.float-precision}}
+        working-directory: build_dir
+      - name: nmake build
+        shell: cmd
+        run: nmake
+        working-directory: build_dir
\ No newline at end of file
diff --git a/QuEST/CMakeLists.txt b/QuEST/CMakeLists.txt
index f45d5981e..d8e12cb9b 100644
--- a/QuEST/CMakeLists.txt
+++ b/QuEST/CMakeLists.txt
@@ -136,7 +136,7 @@ if (${MULTITHREADED} AND NOT ${GPUACCELERATED})
     set(OpenMP_C_VERSION ${OpenMP_C_VERSION} CACHE STRING "OpenMP C version")
 
     # MSVC, for instance, only implements OpenMP 2.0 (as of 2019)
-    if (OpenMP_C_VERSION VERSION_LESS "3.1")  # todo find the real minimum required
+    if (OpenMP_C_VERSION VERSION_LESS "2.0")  # todo find the real minimum required
       set(MULTITHREADED 0)
       message(WARNING "Found OpenMP ${OpenMP_C_VERSION} but this is too \
              old. Turning OpenMP support OFF.")
@@ -173,9 +173,7 @@ endif()
 # ----- C COMPILER FLAGS --------------------------------------------------
 
 # set C flags that are common between compilers and build types
-set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} \
-    -std=c99"
-)
+set (CMAKE_C_STANDARD 99)
 
 # Use -O2 for all but debug mode by default 
 if (NOT("${CMAKE_BUILD_TYPE}" STREQUAL "Debug"))
@@ -184,16 +182,19 @@ if (NOT("${CMAKE_BUILD_TYPE}" STREQUAL "Debug"))
     )
 endif()
 
+# Set c flags to use in debug mode
+
+if (NOT("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC"))
+  set(CMAKE_C_FLAGS_DEBUG 
+      "-g"
+  )
+endif()
+
 # Set c flags for release
 set(CMAKE_C_FLAGS_RELEASE 
     "-O2"
 )
 
-# Set c flags to use in debug mode
-set(CMAKE_C_FLAGS_DEBUG 
-    "-g"
-)
-
 # TODO standardize
 # set C compiler flags based on compiler type
 if ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")
@@ -213,14 +214,17 @@ elseif ("${CMAKE_C_COMPILER_ID}" STREQUAL "Intel")
   )
 elseif ("${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC")
   # using Visual Studio
+  string(REGEX REPLACE "/W3" "" CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
+  string(REGEX REPLACE "-W3" "" CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} \
+    -w"
+  )
 endif()
 
 # ----- C++ COMPILER FLAGS --------------------------------------------------
 
 # set C++ flags that are common between compilers and build types
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
-    -std=c++98 -Wall"
-)
+set (CMAKE_CXX_STANDARD 98)
 
 # Use -O2 for all but debug mode by default 
 if (NOT("${CMAKE_BUILD_TYPE}" STREQUAL "Debug"))
@@ -243,20 +247,25 @@ set(CMAKE_CXX_FLAGS_DEBUG
 if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
   # using Clang
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
-    -mavx"
+    -mavx -Wall"
   )
 elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
   # using GCC
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
-    -mavx"
+    -mavx -Wall"
   )
 elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
   # using Intel
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
-    -xAVX -axCORE-AVX2 -diag-disable -cpu-dispatch"
+    -xAVX -axCORE-AVX2 -diag-disable -cpu-dispatch -Wall"
   )
 elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
   # using Visual Studio
+  string(REGEX REPLACE "/W3" "" CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
+  string(REGEX REPLACE "-W3" "" CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
+    -w"
+  )
 endif()
 
 if (VERBOSE_CMAKE)
diff --git a/QuEST/include/QuEST.h b/QuEST/include/QuEST.h
index 4d1a9a68f..21ef8f1c5 100644
--- a/QuEST/include/QuEST.h
+++ b/QuEST/include/QuEST.h
@@ -357,11 +357,14 @@ typedef struct Qureg
  *
  * @ingroup type
  * @author Ania Brown
+ * @author Tyson Jones (seeding)
  */
 typedef struct QuESTEnv
 {
     int rank;
     int numRanks;
+    unsigned long int* seeds;
+    int numSeeds;
 } QuESTEnv;
 
 
@@ -739,6 +742,7 @@ ComplexMatrixN createComplexMatrixN(int numQubits);
 void destroyComplexMatrixN(ComplexMatrixN matr);
 
 #ifndef __cplusplus
+#ifndef _WIN32
 /** Initialises a ComplexMatrixN instance to have the passed
  * \p real and \p imag values. This allows succint population of any-sized
  * ComplexMatrixN, e.g. through 2D arrays:
@@ -762,7 +766,8 @@ void destroyComplexMatrixN(ComplexMatrixN matr);
  * @author Tyson Jones
  */
 void initComplexMatrixN(ComplexMatrixN m, qreal real[][1<<m.numQubits], qreal imag[][1<<m.numQubits]);
-#endif 
+#endif
+#endif
 
 /** Dynamically allocates a Hamiltonian expressed as a real-weighted sum of products of Pauli operators.
  *
@@ -1462,7 +1467,9 @@ void initPureState(Qureg qureg, Qureg pure);
  */
 void initDebugState(Qureg qureg);
 
-/** Initialise state-vector \p qureg by specifying all amplitudes.
+/** Initialise \p qureg by specifying all amplitudes.
+ * For density matrices, it is assumed the amplitudes have been flattened 
+ * column-wise into the given arrays.
  *
  * The real and imaginary components of the amplitudes are passed in separate arrays,
  * \p reals and \p imags,
@@ -1470,8 +1477,8 @@ void initDebugState(Qureg qureg);
  * There is no automatic checking that the passed arrays are L2 normalised, so this 
  * can be used to prepare \p qureg in a non-physical state.
  *
- * In distributed mode, this would require the complete state-vector to fit in 
- * every node. To manually prepare a state-vector which cannot fit in every node,
+ * In distributed mode, this would require the complete state to fit in 
+ * every node. To manually prepare a state for which all amplitudes cannot fit into a single node,
  * use setAmps()
  *
  * @see
@@ -1481,8 +1488,6 @@ void initDebugState(Qureg qureg);
  * @param[in,out] qureg the ::Qureg to overwrite
  * @param[in] reals array of the real components of the new amplitudes
  * @param[in] imags array of the imaginary components of the new amplitudes
- * @throws invalidQuESTInputError()
- * - if \p qureg is not a state-vector (i.e. is a density matrix)
  * @throws segmentation-fault
  * - if either \p reals or \p imags have fewer than `qureg.numAmpsTotal` elements
  * @author Tyson Jones
@@ -3134,26 +3139,27 @@ qreal calcProbOfOutcome(Qureg qureg, int measureQubit, int outcome);
  * @author Tyson Jones
  */
 void calcProbOfAllOutcomes(qreal* outcomeProbs, Qureg qureg, int* qubits, int numQubits);
-// DEBUG
-void TEST_calcProbOfAllOutcomes(qreal* retProbs, Qureg qureg, int* qubits, int numQubits);
-void SHARED_calcProbOfAllOutcomes(qreal* outcomeProbs, Qureg qureg, int* qubits, int numQubits);
 
 /** Updates \p qureg to be consistent with measuring \p measureQubit in the given 
  * \p outcome (0 or 1), and returns the probability of such a measurement outcome. 
- * This is effectively performing a projection, or a measurement with a forced outcome.
+ * This is effectively performing a renormalising projection, or a measurement with a forced outcome.
  * This is an irreversible change to the state, whereby computational states
  * inconsistant with the outcome are given zero amplitude and the \p qureg is renormalised.
- * Exits with error if the given outcome has a near zero probability, and so cannot be
+ * The given outcome must not have a near zero probability, else it cannot be
  * collapsed into.
  *
  * Note that the collapse probably used for renormalisation is calculated for 
  * \p outcome \p = \p 0, and assumed 1 minus this probability if \p outcome \p = \p 1.
  * Hence this routine will not correctly project un-normalised quregs onto 
  * \p outcome \p = \p 1.
+ *
+ * To avoid renormalisation after projection, or force projection into non-physical 
+ * states with very small probability, use applyProjector().
  * 
  * @see
  * - measure()
  * - measureWithStats()
+ * - applyProjector()
  *
  * @ingroup normgate
  * @param[in,out] qureg object representing the set of all qubits
@@ -3174,12 +3180,13 @@ qreal collapseToOutcome(Qureg qureg, int measureQubit, int outcome);
  * Outcome probabilities are weighted by the state vector, which is irreversibly
  * changed after collapse to be consistent with the outcome.
  *
- * > The random outcome generator can be seeded with seedQuESTDefault(), which 
- * > is safe to use in distributed mode.
+ * > The random outcome generator is seeded by seedQuESTDefault() within 
+ * > createQuESTEnv(), unless later overridden by seedQuEST().
  * 
  * @see
  * - measureWithStats()
  * - collapseToOutcome()
+ * - seedQuEST()
  * - seedQuESTDefault()
  * 
  * @ingroup normgate
@@ -3198,13 +3205,14 @@ int measure(Qureg qureg, int measureQubit);
  * Outcome probabilities are weighted by the state vector, which is irreversibly
  * changed after collapse to be consistent with the outcome.
  *
- * > The random outcome generator can be seeded with seedQuESTDefault(), which 
- * > is safe to use in distributed mode.
+ * > The random outcome generator is seeded by seedQuESTDefault() within 
+ * > createQuESTEnv(), unless later overridden by seedQuEST().
  *
  * @see 
  * - measure()
  * - collapseToOutcome()
  * - seedQuESTDefault()
+ * - seedQuEST()
  *
  * @ingroup normgate
  * @param[in, out] qureg object representing the set of all qubits
@@ -3298,47 +3306,102 @@ Complex calcInnerProduct(Qureg bra, Qureg ket);
  */
 qreal calcDensityInnerProduct(Qureg rho1, Qureg rho2);
 
-/** Seed the Mersenne Twister used for random number generation in the QuEST environment with an example
- * default seed.
- * This default seeding function uses the mt19937 init_by_array function with two keys -- 
- * time and pid. Subsequent calls to mt19937 genrand functions will use this seeding. 
- * For a multi process code, the same seed is given to all process, therefore this seeding is only
- * appropriate to use for functions such as measure where all processes require the same random value.
- *
- * For more information about the MT, see http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/emt19937ar.html
+/** Seeds the random number generator with the (master node) current time and process ID.
+ * 
+ * This is the default seeding used by createQuESTEnv(), and determines the 
+ * outcomes in functions like measure() and measureWithStats().
  * 
- * > To manually generate a key to seed MT, use seedQuEST()
+ * In distributed mode, every node agrees on the seed (nominated by the master node)
+ * such that every node generates the same sequence of pseudorandom numbers.
  *
- * Presently, only the following functions involve random generation (through
- * internal function generateMeasurementOutcome()):
- * - measure()
- * - measureWithStats()
+ * > QuEST uses the 
+ * > <a href="http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/emt19937ar.html">Mersenne Twister</a>
+ * > for random number generation. 
  *
  * @see
- * - seedQuEST()
+ * - Use seedQuEST() to provide a custom seed, overriding the default.
+ * - Use getQuESTSeeds() to obtain the seeds currently being used for RNG.
  *
  * @ingroup debug
+ * @param[in] env a pointer to the ::QuESTEnv runtime environment
  * @author Ania Brown
  * @author Balint Koczor (Windows compatibility)
+ * @author Tyson Jones (doc)
  **/
-void seedQuESTDefault(void);
+void seedQuESTDefault(QuESTEnv *env);
 
-/** Seed the Mersenne Twister used for random number generation in the QuEST environment with
- * a user defined seed.
- * This function uses the mt19937 init_by_array function with numSeeds keys supplied by the user.
- * Subsequent calls to mt19937 genrand functions will use this seeding. 
- * For a multi process code, the same seed is given to all process, therefore this seeding is only
- * appropriate to use for functions such as measure where all processes require the same random value.
+/** Seeds the random number generator with a custom array of key(s), overriding the
+ * default keys.
  *
- * For more information about the MT, see http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/emt19937ar.html
+ * This determines the sequence of outcomes in functions like measure() and measureWithStats().
+ *
+ * In distributed mode, the key(s) passed to the master node will be broadcast to all 
+ * other nodes, such that every node generates the same sequence of pseudorandom numbers.
+ *
+ * This function will copy the contents of \p seedArray into a permanent array 
+ * `env.seeds`, so \p seedArray is afterward safe to free.
+ *
+ * > QuEST uses the 
+ * > <a href="http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/emt19937ar.html">Mersenne Twister</a>
+ * > for random number generation. 
+ *
+ * @see
+ * - Use seedQuESTDefault() to seed via the current timestamp and process id.
+ * - Use getQuESTSeeds() to obtain the seeds currently being used for RNG.
  *
  * @ingroup debug
+ * @param[in] env a pointer to the ::QuESTEnv runtime environment
  * @param[in] seedArray Array of integers to use as seed. 
  *  This allows the MT to be initialised with more than a 32-bit integer if required
  * @param[in] numSeeds Length of seedArray
  * @author Ania Brown
+ * @author Tyson Jones (doc)
  **/
-void seedQuEST(unsigned long int *seedArray, int numSeeds);
+void seedQuEST(QuESTEnv *env, unsigned long int *seedArray, int numSeeds);
+
+/** Obtain the seeds presently used in random number generation.
+ *
+ * This function sets argument \p seeds to the address of the array of keys
+ * which have seeded QuEST's
+ * <a href="http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/emt19937ar.html">Mersenne Twister</a>
+ * random number generator. \p numSeeds is set to the length of \p seeds.
+ * These are the seeds which inform the outcomes of random functions like 
+ * measure(), and are set using seedQuEST() and seedQuESTDefault().
+ *
+ * > The output \p seeds array <b>must not</b> be freed, and should not be modified.
+ *
+ * Obtaining QuEST's seeds is useful for seeding your own random number generators,
+ * so that a simulation (with random QuEST measurements, and your own random decisions)
+ * can be precisely repeated later, just by calling seedQuEST().
+ *
+ * Note this function merely sets the arguments to the attributes for \p env. 
+ * I.e.
+ * ```
+ *     unsigned long int* seeds;
+ *     int numSeeds;
+ *     getQuESTSeeds(env, &seeds, &numSeeds);
+ *     
+ *     func(seeds, numSeeds);
+ * ```
+ * is equivalent to
+ * ```
+ *     func(env.seeds, env.numSeeds);
+ * ```
+ * However, one should not rely upon their local pointer from getQuESTSeeds() to be 
+ * automatically updated after a subsequent call to seedQuEST() or seedQuESTDefault().
+ * Instead, getQuESTSeeds() should be recalled.
+ *
+ * @see
+ * - seedQuEST()
+ * - seedQuESTDefault()
+ *
+ * @ingroup debug
+ * @param[in] env the ::QuESTEnv runtime environment
+ * @param[in] seeds a pointer to an unitialised array to be modified
+ * @param[in] numSeeds a pointer to an integer to be modified
+ * @author Tyson Jones
+ **/
+void getQuESTSeeds(QuESTEnv env, unsigned long int** seeds, int* numSeeds);
 
 /** Enable QASM recording. Gates applied to qureg will here-after be added to a
  * growing log of QASM instructions, progressively consuming more memory until 
@@ -5354,6 +5417,7 @@ extern "C" void invalidQuESTInputError(const char* errMsg, const char* errFunc)
 void invalidQuESTInputError(const char* errMsg, const char* errFunc);
  
 #ifndef __cplusplus
+#ifndef _WIN32
  // hide this function from doxygen
  /// \cond HIDDEN_SYMBOLS
 /** Creates a ComplexMatrixN struct with .real and .imag arrays kept entirely 
@@ -5398,6 +5462,7 @@ ComplexMatrixN bindArraysToStackComplexMatrixN(
     int numQubits, qreal re[][1<<numQubits], qreal im[][1<<numQubits], 
     qreal** reStorage, qreal** imStorage);
 #endif
+#endif
 /// \endcond
 
 // hide this function from doxygen
@@ -6171,6 +6236,11 @@ void applyNamedPhaseFuncOverrides(Qureg qureg, int* qubits, int* numQubitsPerReg
  *   \f[
  *      f(\vec{r}, \theta)|_{\theta=0.5} \; = \; \begin{cases} \pi & \;\;\; \vec{r}=\vec{0} \\ \displaystyle 0.5 \left[ \sum_j^{\text{numRegs}} {r_j}^2 \right]^{-1/2} & \;\;\;\text{otherwise} \end{cases}.
  *   \f] 
+ *   Notice the order of the parameters matches the order of the words in the \p phaseFunc.
+ *   > Functions \p SCALED_INVERSE_SHIFTED_NORM and \p SCALED_INVERSE_SHIFTED_DISTANCE,
+ *   > which can have denominators arbitrarily close to zero, will invoke the 
+ *   > divergence parameter whenever the denominator is smaller than (or equal to)
+ *   > machine precision `REAL_EPS`.
  *
  * - Functions allowing the shifting of sub-register values, which are \p SCALED_INVERSE_SHIFTED_NORM
  *   and \p SCALED_INVERSE_SHIFTED_DISTANCE, need these shift values to be passed in the \p params
@@ -6496,7 +6566,7 @@ void applyFullQFT(Qureg qureg);
  *      = 
  *      \frac{1}{\sqrt{2^n}}
  *       \sum\limits_{j=0}^{2^N-1} \alpha_j \left( 
- *           \sum\limits_{y=0}^{2^n-1} e^{2 \pi \, i \, x_j \, y / 2^N} \; 
+ *           \sum\limits_{y=0}^{2^n-1} e^{2 \pi \, i \, x_j \, y / 2^n} \; 
  *           |y,r_j \rangle
  *       \right)
  *   \f]
@@ -6535,6 +6605,30 @@ void applyFullQFT(Qureg qureg);
  */
 void applyQFT(Qureg qureg, int* qubits, int numQubits);
 
+/** Force the target \p qubit of \p qureg into the given classical \p outcome, via a 
+ * non-renormalising projection.
+ *
+ * This function zeroes all amplitudes in the state-vector or density-matrix which 
+ * correspond to the opposite \p outcome given. Unlike collapseToOutcome(), it does 
+ * not thereafter normalise \p qureg, and hence may leave it in a non-physical state.
+ *
+ * Note there is no requirement that the \p outcome state has a non-zero proability, and hence 
+ * this function may leave \p qureg in a blank state, like that produced by initBlankState().
+ * 
+ * @see
+ * - collapseToOutcome() for a norm-preserving equivalent, like a forced measurement
+ *
+ * @ingroup operator
+ * @param[in,out] qureg a state-vector or density matrix to modify
+ * @param[in] qubit the qubit to which to apply the projector 
+ * @param[in] the single-qubit outcome (`0` or `1`) to project \p qubit into
+ * @throws invalidQuESTInputError()
+ * - if \p qubit is outside [0, `qureg.numQubitsRepresented`)
+ * - if \p outcome is not in {0,1}
+ * @author Tyson Jones
+ */
+void applyProjector(Qureg qureg, int qubit, int outcome);
+
 // end prevention of C++ name mangling
 #ifdef __cplusplus
 }
diff --git a/QuEST/src/CPU/QuEST_cpu.c b/QuEST/src/CPU/QuEST_cpu.c
index dbcb341b8..dd0291827 100644
--- a/QuEST/src/CPU/QuEST_cpu.c
+++ b/QuEST/src/CPU/QuEST_cpu.c
@@ -27,6 +27,12 @@
 # include <omp.h>
 # endif
 
+/* to support MSVC, we must remove the use of VLA in multiQubtUnitary.
+ * We'll instead create stack arrays use _malloca
+ */
+#ifdef _WIN32
+    #include <malloc.h>
+#endif
 
 
 /*
@@ -803,23 +809,28 @@ void densmatr_collapseToKnownProbOutcome(Qureg qureg, int measureQubit, int outc
     if (locNumAmps <= outerBlockSize) {
         
         // if this is an undesired outer block, kill all elems
-        if (outerBit != outcome)
-            return zeroSomeAmps(qureg, 0, qureg.numAmpsPerChunk);
+        if (outerBit != outcome) {
+            zeroSomeAmps(qureg, 0, qureg.numAmpsPerChunk);
+            return;
+        }
         
         // othwerwise, if this is a desired outer block, and also entirely an inner block
         if (locNumAmps <= innerBlockSize) {
             
             // and that inner block is undesired, kill all elems
-            if (innerBit != outcome)
-                return zeroSomeAmps(qureg, 0, qureg.numAmpsPerChunk);
+            if (innerBit != outcome) 
+                zeroSomeAmps(qureg, 0, qureg.numAmpsPerChunk);
             // otherwise normalise all elems
             else
-                return normaliseSomeAmps(qureg, totalStateProb, 0, qureg.numAmpsPerChunk);
+                normaliseSomeAmps(qureg, totalStateProb, 0, qureg.numAmpsPerChunk);
+                
+            return;
         }
                 
         // otherwise this is a desired outer block which contains 2^a inner blocks; kill/renorm every second inner block
-        return alternateNormZeroingSomeAmpBlocks(
+        alternateNormZeroingSomeAmpBlocks(
             qureg, totalStateProb, innerBit==outcome, 0, qureg.numAmpsPerChunk, innerBlockSize);
+        return;
     }
     
     // Otherwise, this chunk's amps contain multiple outer blocks (and hence multiple inner blocks)
@@ -1919,13 +1930,24 @@ void statevec_multiControlledMultiQubitUnitaryLocal(Qureg qureg, long long int c
     
     // each thread/task will record and modify numTargAmps amplitudes, privately
     // (of course, tasks eliminated by the ctrlMask won't edit their allocation)
-    long long int ampInds[numTargAmps];
-    qreal reAmps[numTargAmps];
-    qreal imAmps[numTargAmps];
+    //
+    // If we're NOT on windows, we can fortunately use the stack directly
+    #ifndef _WIN32
+        long long int ampInds[numTargAmps];
+        qreal reAmps[numTargAmps];
+        qreal imAmps[numTargAmps];
+
+        int sortedTargs[numTargs];
+    // on Windows, with no VLA, we can use _malloca to allocate on stack (must free)
+    #else
+        long long int* ampInds;
+        qreal* reAmps;
+        qreal* imAmps;
+        int* sortedTargs = (int*) _malloca(numTargs * sizeof *sortedTargs);
+    #endif
 
     // we need a sorted targets list to find thisInd00 for each task.
     // we can't modify targets, because the user-ordering of targets matters in u
-    int sortedTargs[numTargs]; 
     for (int t=0; t < numTargs; t++) 
         sortedTargs[t] = targs[t];
     qsort(sortedTargs, numTargs, sizeof(int), qsortComp);
@@ -1937,6 +1959,13 @@ void statevec_multiControlledMultiQubitUnitaryLocal(Qureg qureg, long long int c
     private  (thisTask,thisInd00,thisGlobalInd00,ind,i,t,r,c,reElem,imElem,  ampInds,reAmps,imAmps)
 # endif
     {
+        // when manually allocating array memory (on Windows), this must be done in each thread
+        // separately and is not performed automatically by declaring a var as omp-private
+        # ifdef _WIN32
+            ampInds = (long long int*) _malloca(numTargAmps * sizeof *ampInds);
+            reAmps = (qreal*) _malloca(numTargAmps * sizeof *reAmps);
+            imAmps = (qreal*) _malloca(numTargAmps * sizeof *imAmps);
+        # endif
 # ifdef _OPENMP
 # pragma omp for schedule (static)
 # endif
@@ -1981,7 +2010,17 @@ void statevec_multiControlledMultiQubitUnitaryLocal(Qureg qureg, long long int c
                 }
             }
         }
+        // on Windows, we must explicitly free the stack structures
+        #ifdef _WIN32
+            _freea(ampInds);
+            _freea(reAmps);
+            _freea(imAmps);
+        #endif
     }
+
+    #ifdef _WIN32
+        _freea(sortedTargs);
+    #endif
 }
 
 void statevec_unitaryLocal(Qureg qureg, int targetQubit, ComplexMatrix2 u)
@@ -3515,6 +3554,7 @@ void statevec_calcProbOfAllOutcomesLocal(qreal* outcomeProbs, Qureg qureg, int*
      * or a dynamic list of omp locks (duplicates memory cost of outcomeProbs).
      * Using locks was always slower than the method below. Using reduction was only 
      * faster for very few threads, or very few outcomeProbs.
+     * Finally, we exclude the 'update' clause after 'atomic' to maintain MSVC compatibility 
      */
 
     long long int numOutcomeProbs = (1 << numQubits);
@@ -3566,7 +3606,7 @@ void statevec_calcProbOfAllOutcomesLocal(qreal* outcomeProbs, Qureg qureg, int*
             
             // atomicly update corresponding outcome array element
             # ifdef _OPENMP
-            # pragma omp atomic update
+            # pragma omp atomic
             # endif
             outcomeProbs[outcomeInd] += prob;
         }
@@ -3637,7 +3677,7 @@ void densmatr_calcProbOfAllOutcomesLocal(qreal* outcomeProbs, Qureg qureg, int*
     
             // atomicly update corresponding outcome array element
             # ifdef _OPENMP
-            # pragma omp atomic update
+            # pragma omp atomic
             # endif
             outcomeProbs[outcomeInd] += stateRe[index];
         }
@@ -4493,15 +4533,15 @@ void statevec_applyParamNamedPhaseFuncOverrides(
                         for (r=0; r<numRegs; r++)
                             norm += phaseInds[r]*phaseInds[r];
                     norm = sqrt(norm);
-
+ 
                     if (phaseFuncName == NORM)
                         phase = norm;
                     else if (phaseFuncName == INVERSE_NORM)
-                        phase = (norm == 0.)? params[0] : 1/norm;
+                        phase = (norm == 0.)? params[0] : 1/norm;  // smallest non-zero norm is 1
                     else if (phaseFuncName == SCALED_NORM)
                         phase = params[0] * norm;
                     else if (phaseFuncName == SCALED_INVERSE_NORM || phaseFuncName == SCALED_INVERSE_SHIFTED_NORM)
-                        phase = (norm == 0.)? params[1] : params[0] / norm;
+                        phase = (norm <= REAL_EPS)? params[1] : params[0] / norm; // unless shifted closer to zero
                 }
                 // compute product related phases
                 else if (phaseFuncName == PRODUCT || phaseFuncName == INVERSE_PRODUCT ||
@@ -4514,7 +4554,7 @@ void statevec_applyParamNamedPhaseFuncOverrides(
                     if (phaseFuncName == PRODUCT)
                         phase = prod;
                     else if (phaseFuncName == INVERSE_PRODUCT)
-                        phase = (prod == 0.)? params[0] : 1/prod;
+                        phase = (prod == 0.)? params[0] : 1/prod;  // smallest non-zero product norm is +- 1
                     else if (phaseFuncName == SCALED_PRODUCT)
                         phase = params[0] * prod;
                     else if (phaseFuncName == SCALED_INVERSE_PRODUCT)
@@ -4528,7 +4568,7 @@ void statevec_applyParamNamedPhaseFuncOverrides(
                     dist = 0;
                     if (phaseFuncName == SCALED_INVERSE_SHIFTED_DISTANCE) {
                         for (r=0; r<numRegs; r+=2)
-                            dist += (phaseInds[r+1] - phaseInds[r] - params[2+r/2])*(phaseInds[r+1] - phaseInds[r] - params[2+r/2]);
+                            dist += (phaseInds[r] - phaseInds[r+1] - params[2+r/2])*(phaseInds[r] - phaseInds[r+1] - params[2+r/2]);
                     }
                     else
                         for (r=0; r<numRegs; r+=2)
@@ -4538,11 +4578,11 @@ void statevec_applyParamNamedPhaseFuncOverrides(
                     if (phaseFuncName == DISTANCE)
                         phase = dist;
                     else if (phaseFuncName == INVERSE_DISTANCE)
-                        phase = (dist == 0.)? params[0] : 1/dist;
+                        phase = (dist == 0.)? params[0] : 1/dist; // smallest non-zero dist is 1
                     else if (phaseFuncName == SCALED_DISTANCE)
                         phase = params[0] * dist;
                     else if (phaseFuncName == SCALED_INVERSE_DISTANCE || phaseFuncName == SCALED_INVERSE_SHIFTED_DISTANCE)
-                        phase = (dist == 0.)? params[1] : params[0] / dist;
+                        phase = (dist <= REAL_EPS)? params[1] : params[0] / dist; // unless shifted closer to 0
                 }
             }
             
@@ -4562,4 +4602,3 @@ void statevec_applyParamNamedPhaseFuncOverrides(
         }
     }
 }
-
diff --git a/QuEST/src/CPU/QuEST_cpu_distributed.c b/QuEST/src/CPU/QuEST_cpu_distributed.c
index d2b464727..8292eb5cf 100644
--- a/QuEST/src/CPU/QuEST_cpu_distributed.c
+++ b/QuEST/src/CPU/QuEST_cpu_distributed.c
@@ -154,7 +154,9 @@ QuESTEnv createQuESTEnv(void) {
     
     validateNumRanks(env.numRanks, __func__);
     
-	seedQuESTDefault();
+    env.seeds = NULL;
+    env.numSeeds = 0;
+	seedQuESTDefault(&env);
     
     return env;
 }
@@ -170,6 +172,8 @@ int syncQuESTSuccess(int successCode){
 }
 
 void destroyQuESTEnv(QuESTEnv env){
+    free(env.seeds);
+    
     int finalized;
     MPI_Finalized(&finalized);
     if (!finalized) MPI_Finalize();
@@ -1381,17 +1385,24 @@ void statevec_collapseToKnownProbOutcome(Qureg qureg, int measureQubit, int outc
     }
 }
 
-void seedQuESTDefault(){
-    // init MT random number generator with three keys -- time and pid
-    // for the MPI version, it is ok that all procs will get the same seed as random numbers will only be 
-    // used by the master process
-
-    unsigned long int key[2];
-    getQuESTDefaultSeedKey(key);
-    // this seed will be used to generate the same random number on all procs,
-    // therefore we want to make sure all procs receive the same key
-    MPI_Bcast(key, 2, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
-    init_by_array(key, 2);
+void seedQuEST(QuESTEnv *env, unsigned long int* seedArray, int numSeeds) {
+
+    // it is imperative every node agrees on the seed, so that random decisions 
+    // agree on every node. Hence we use only the master node keys.
+    MPI_Bcast(seedArray, numSeeds, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
+    
+    // free existing seed array, if exists
+    if (env->seeds != NULL)
+        free(env->seeds);
+        
+    // record keys in permanent heap
+    env->seeds = malloc(numSeeds * sizeof *(env->seeds));
+    for (int i=0; i<numSeeds; i++)
+        (env->seeds)[i] = seedArray[i];
+    env->numSeeds = numSeeds;
+    
+    // pass keys to Mersenne Twister seeder
+    init_by_array(seedArray, numSeeds); 
 }
 
 /** returns -1 if this node contains no amplitudes where qb1 and qb2 
diff --git a/QuEST/src/CPU/QuEST_cpu_local.c b/QuEST/src/CPU/QuEST_cpu_local.c
index 70fc2da8b..222fa4c8f 100644
--- a/QuEST/src/CPU/QuEST_cpu_local.c
+++ b/QuEST/src/CPU/QuEST_cpu_local.c
@@ -174,7 +174,9 @@ QuESTEnv createQuESTEnv(void) {
     env.rank=0;
     env.numRanks=1;
     
-    seedQuESTDefault();
+    env.seeds = NULL;
+    env.numSeeds = 0;
+    seedQuESTDefault(&env);
     
     return env;
 }
@@ -188,7 +190,7 @@ int syncQuESTSuccess(int successCode){
 }
 
 void destroyQuESTEnv(QuESTEnv env){
-    // MPI finalize goes here in MPI version. Call this function anyway for consistency
+    free(env.seeds);
 }
 
 void reportQuESTEnv(QuESTEnv env){
@@ -322,14 +324,20 @@ void statevec_collapseToKnownProbOutcome(Qureg qureg, int measureQubit, int outc
     statevec_collapseToKnownProbOutcomeLocal(qureg, measureQubit, outcome, stateProb);
 }
 
-void seedQuESTDefault(void){
-    // init MT random number generator with three keys -- time and pid
-    // for the MPI version, it is ok that all procs will get the same seed as random numbers will only be 
-    // used by the master process
+void seedQuEST(QuESTEnv *env, unsigned long int *seedArray, int numSeeds) {
 
-    unsigned long int key[2];
-    getQuESTDefaultSeedKey(key);
-    init_by_array(key, 2);
+    // free existing seed array, if exists
+    if (env->seeds != NULL)
+        free(env->seeds);
+        
+    // record keys in permanent heap
+    env->seeds = malloc(numSeeds * sizeof *(env->seeds));
+    for (int i=0; i<numSeeds; i++)
+        (env->seeds)[i] = seedArray[i];
+    env->numSeeds = numSeeds;
+    
+    // pass keys to Mersenne Twister seeder
+    init_by_array(seedArray, numSeeds); 
 }
 
 void statevec_multiControlledTwoQubitUnitary(Qureg qureg, long long int ctrlMask, int q1, int q2, ComplexMatrix4 u)
diff --git a/QuEST/src/GPU/QuEST_gpu.cu b/QuEST/src/GPU/QuEST_gpu.cu
index 698064de2..8d0cb1dba 100755
--- a/QuEST/src/GPU/QuEST_gpu.cu
+++ b/QuEST/src/GPU/QuEST_gpu.cu
@@ -471,7 +471,9 @@ QuESTEnv createQuESTEnv(void) {
     env.rank=0;
     env.numRanks=1;
     
-    seedQuESTDefault();
+    env.seeds = NULL;
+    env.numSeeds = 0;
+    seedQuESTDefault(env);
     
     return env;
 }
@@ -485,7 +487,7 @@ int syncQuESTSuccess(int successCode){
 }
 
 void destroyQuESTEnv(QuESTEnv env){
-    // MPI finalize goes here in MPI version. Call this function anyway for consistency
+    free(env.seeds);
 }
 
 void reportQuESTEnv(QuESTEnv env){
@@ -3835,11 +3837,11 @@ __global__ void statevec_applyParamNamedPhaseFuncOverridesKernel(
             if (phaseFuncName == NORM)
                 phase = norm;
             else if (phaseFuncName == INVERSE_NORM)
-                phase = (norm == 0.)? params[0] : 1/norm;
+                phase = (norm == 0.)? params[0] : 1/norm; // smallest non-zero norm is 1
             else if (phaseFuncName == SCALED_NORM)
                 phase = params[0] * norm;
             else if (phaseFuncName == SCALED_INVERSE_NORM || phaseFuncName == SCALED_INVERSE_SHIFTED_NORM)
-                phase = (norm == 0.)? params[1] : params[0] / norm;
+                phase = (norm <= REAL_EPS)? params[1] : params[0] / norm; // unless shifted closer to zero
         }
         // compute product related phases
         else if (phaseFuncName == PRODUCT || phaseFuncName == INVERSE_PRODUCT ||
@@ -3852,7 +3854,7 @@ __global__ void statevec_applyParamNamedPhaseFuncOverridesKernel(
             if (phaseFuncName == PRODUCT)
                 phase = prod;
             else if (phaseFuncName == INVERSE_PRODUCT)
-                phase = (prod == 0.)? params[0] : 1/prod;
+                phase = (prod == 0.)? params[0] : 1/prod; // smallest non-zero prod is +- 1
             else if (phaseFuncName == SCALED_PRODUCT)
                 phase = params[0] * prod;
             else if (phaseFuncName == SCALED_INVERSE_PRODUCT)
@@ -3866,7 +3868,7 @@ __global__ void statevec_applyParamNamedPhaseFuncOverridesKernel(
             qreal dist = 0;
             if (phaseFuncName == SCALED_INVERSE_SHIFTED_DISTANCE) {
                 for (int r=0; r<numRegs; r+=2) {
-                    qreal dif = (phaseInds[(r+1)*stride+offset] - phaseInds[r*stride+offset] - params[2+r/2]);
+                    qreal dif = (phaseInds[r*stride+offset] - phaseInds[(r+1)*stride+offset] - params[2+r/2]);
                     dist += dif*dif;
                 }
             }
@@ -3880,11 +3882,11 @@ __global__ void statevec_applyParamNamedPhaseFuncOverridesKernel(
             if (phaseFuncName == DISTANCE)
                 phase = dist;
             else if (phaseFuncName == INVERSE_DISTANCE)
-                phase = (dist == 0.)? params[0] : 1/dist;
+                phase = (dist == 0.)? params[0] : 1/dist; // smallest non-zero dist is 1
             else if (phaseFuncName == SCALED_DISTANCE)
                 phase = params[0] * dist;
             else if (phaseFuncName == SCALED_INVERSE_DISTANCE || phaseFuncName == SCALED_INVERSE_SHIFTED_DISTANCE)
-                phase = (dist == 0.)? params[1] : params[0] / dist;
+                phase = (dist <= REAL_EPS)? params[1] : params[0] / dist; // unless shifted closer
         }
     }
     
@@ -3960,16 +3962,21 @@ void statevec_applyParamNamedPhaseFuncOverrides(
         cudaFree(d_params);
 }
 
-void seedQuESTDefault(){
-    // init MT random number generator with three keys -- time and pid
-    // for the MPI version, it is ok that all procs will get the same seed as random numbers will only be 
-    // used by the master process
-
-    unsigned long int key[2];
-    getQuESTDefaultSeedKey(key); 
-    init_by_array(key, 2); 
-}  
+void seedQuEST(QuESTEnv *env, unsigned long int *seedArray, int numSeeds) {
 
+    // free existing seed array, if exists
+    if (env->seeds != NULL)
+        free(env->seeds);
+        
+    // record keys in permanent heap
+    env->seeds = malloc(numSeeds * sizeof *(env->seeds));
+    for (int i=0; i<numSeeds; i++)
+        (env->seeds)[i] = seedArray[i];
+    env->numSeeds = numSeeds;
+    
+    // pass keys to Mersenne Twister seeder
+    init_by_array(seedArray, numSeeds); 
+}
 
 
 
diff --git a/QuEST/src/QuEST.c b/QuEST/src/QuEST.c
index 8f9f09a12..bdcb81cf4 100644
--- a/QuEST/src/QuEST.c
+++ b/QuEST/src/QuEST.c
@@ -155,7 +155,6 @@ void initPureState(Qureg qureg, Qureg pure) {
 }
 
 void initStateFromAmps(Qureg qureg, qreal* reals, qreal* imags) {
-    validateStateVecQureg(qureg, __func__);
     
     statevec_setAmps(qureg, 0, reals, imags, qureg.numAmpsTotal);
     
@@ -878,7 +877,7 @@ void applyFullQFT(Qureg qureg) {
 
     qasm_recordComment(qureg, "Beginning of QFT circuit");
         
-    int qubits[qureg.numQubitsRepresented];
+    int qubits[100];
     for (int i=0; i<qureg.numQubitsRepresented; i++)
         qubits[i] = i;
     agnostic_applyQFT(qureg, qubits, qureg.numQubitsRepresented);
@@ -886,6 +885,20 @@ void applyFullQFT(Qureg qureg) {
     qasm_recordComment(qureg, "End of QFT circuit");
 }
 
+void applyProjector(Qureg qureg, int qubit, int outcome) {
+    validateTarget(qureg, qubit, __func__);
+    validateOutcome(outcome, __func__);
+     
+    qreal renorm = 1;
+    
+    if (qureg.isDensityMatrix)
+        densmatr_collapseToKnownProbOutcome(qureg, qubit, outcome, renorm);
+    else
+        statevec_collapseToKnownProbOutcome(qureg, qubit, outcome, renorm);
+    
+    qasm_recordComment(qureg, "Here, qubit %d was un-physically projected into outcome %d", qubit, outcome);
+}
+
 
 
 /*
@@ -1369,6 +1382,7 @@ void destroyComplexMatrixN(ComplexMatrixN m) {
     free(m.imag);
 }
 
+#ifndef _WIN32
 void initComplexMatrixN(ComplexMatrixN m, qreal re[][1<<m.numQubits], qreal im[][1<<m.numQubits]) {
     validateMatrixInit(m, __func__);
     
@@ -1379,6 +1393,7 @@ void initComplexMatrixN(ComplexMatrixN m, qreal re[][1<<m.numQubits], qreal im[]
             m.imag[i][j] = im[i][j];
         }
 }
+#endif
 
 PauliHamil createPauliHamil(int numQubits, int numSumTerms) {
     validateHamilParams(numQubits, numSumTerms, __func__);
@@ -1595,6 +1610,19 @@ void reportPauliHamil(PauliHamil hamil) {
 int  getQuEST_PREC(void) {
   return sizeof(qreal)/4;
 }
+
+void seedQuESTDefault(QuESTEnv* env) {
+    
+    // seed Mersenne Twister random number generator with two keys -- time and pid
+    unsigned long int keys[2];
+    getQuESTDefaultSeedKey(keys);
+    seedQuEST(env, keys, 2);
+}
+
+void getQuESTSeeds(QuESTEnv env, unsigned long int** seeds, int* numSeeds) {
+    *seeds = env.seeds;
+    *numSeeds = env.numSeeds;
+}
   
 
 #ifdef __cplusplus
diff --git a/QuEST/src/QuEST_common.c b/QuEST/src/QuEST_common.c
index 0e3409c89..2bd21ab35 100644
--- a/QuEST/src/QuEST_common.c
+++ b/QuEST/src/QuEST_common.c
@@ -216,16 +216,6 @@ void getQuESTDefaultSeedKey(unsigned long int *key){
 #endif 
 }
 
-/** 
- * numSeeds <= 64
- */
-void seedQuEST(unsigned long int *seedArray, int numSeeds){
-    // init MT random number generator with user defined list of seeds
-    // for the MPI version, it is ok that all procs will get the same seed as random numbers will only be 
-    // used by the master process
-    init_by_array(seedArray, numSeeds); 
-}
-
 void reportState(Qureg qureg){
     FILE *state;
     char filename[100];
@@ -534,7 +524,7 @@ qreal statevec_calcExpecPauliProd(Qureg qureg, int* targetQubits, enum pauliOpTy
 qreal statevec_calcExpecPauliSum(Qureg qureg, enum pauliOpType* allCodes, qreal* termCoeffs, int numSumTerms, Qureg workspace) {
     
     int numQb = qureg.numQubitsRepresented;
-    int targs[numQb];
+    int targs[100]; // [numQb];
     for (int q=0; q < numQb; q++)
         targs[q] = q;
         
@@ -548,7 +538,7 @@ qreal statevec_calcExpecPauliSum(Qureg qureg, enum pauliOpType* allCodes, qreal*
 void statevec_applyPauliSum(Qureg inQureg, enum pauliOpType* allCodes, qreal* termCoeffs, int numSumTerms, Qureg outQureg) {
     
     int numQb = inQureg.numQubitsRepresented;
-    int targs[numQb];
+    int targs[100]; // [numQb];
     for (int q=0; q < numQb; q++)
         targs[q] = q;
         
@@ -643,7 +633,7 @@ void densmatr_applyTwoQubitKrausSuperoperator(Qureg qureg, int target1, int targ
 
 void densmatr_applyMultiQubitKrausSuperoperator(Qureg qureg, int *targets, int numTargets, ComplexMatrixN superOp) {
     long long int ctrlMask = 0;
-    int allTargets[2*numTargets];
+    int allTargets[200]; // [2*numTargets];
     for (int t=0; t < numTargets; t++) {
         allTargets[t] = targets[t];
         allTargets[t+numTargets] = targets[t] + qureg.numQubitsRepresented;
@@ -658,6 +648,7 @@ void densmatr_mixKrausMap(Qureg qureg, int target, ComplexMatrix2 *ops, int numO
     densmatr_applyKrausSuperoperator(qureg, target, superOp);
 }
 
+#ifndef _WIN32
 ComplexMatrixN bindArraysToStackComplexMatrixN(
     int numQubits, qreal re[][1<<numQubits], qreal im[][1<<numQubits], 
     qreal** reStorage, qreal** imStorage
@@ -674,6 +665,7 @@ ComplexMatrixN bindArraysToStackComplexMatrixN(
     }
     return m;
 }
+
 #define macro_initialiseStackComplexMatrixN(matrix, numQubits, real, imag) \
     /* reStorage_ and imStorage_ must not exist in calling scope */ \
     qreal* reStorage_[1<<(numQubits)]; \
@@ -685,13 +677,25 @@ ComplexMatrixN bindArraysToStackComplexMatrixN(
     qreal reArr_[1<<(numQubits)][1<<(numQubits)]; \
     qreal imArr_[1<<(numQubits)][1<<(numQubits)]; \
     macro_initialiseStackComplexMatrixN(matrix, (numQubits), reArr_, imArr_);
+#endif
 
 void densmatr_mixTwoQubitKrausMap(Qureg qureg, int target1, int target2, ComplexMatrix4 *ops, int numOps) {
     
-    ComplexMatrixN superOp;
-    macro_allocStackComplexMatrixN(superOp, 4);
-    populateKrausSuperOperator4(&superOp, ops, numOps);
-    densmatr_applyTwoQubitKrausSuperoperator(qureg, target1, target2, superOp);
+  // if NOT on Windows, allocate ComplexN on stack
+  #ifndef _WIN32
+      ComplexMatrixN superOp;
+      macro_allocStackComplexMatrixN(superOp, 4);
+      populateKrausSuperOperator4(&superOp, ops, numOps);
+      densmatr_applyTwoQubitKrausSuperoperator(qureg, target1, target2, superOp);
+
+  // but on Windows, we MUST allocated dynamically
+  #else
+      ComplexMatrixN superOp = createComplexMatrixN(4);
+      populateKrausSuperOperator4(&superOp, ops, numOps);
+      densmatr_applyTwoQubitKrausSuperoperator(qureg, target1, target2, superOp);
+      destroyComplexMatrixN(superOp);
+
+  #endif
 }
 
 void densmatr_mixMultiQubitKrausMap(Qureg qureg, int* targets, int numTargets, ComplexMatrixN* ops, int numOps) {
@@ -713,25 +717,34 @@ void densmatr_mixMultiQubitKrausMap(Qureg qureg, int* targets, int numTargets, C
      * for numTargets < 4, superOp will be kept in the stack, else in the heap
      */
      
-    if (numTargets < 4) {
-        // everything must live in 'if' since this macro declares local vars
-        macro_allocStackComplexMatrixN(superOp, 2*numTargets);
-        populateKrausSuperOperatorN(&superOp, ops, numOps);
-        densmatr_applyMultiQubitKrausSuperoperator(qureg, targets, numTargets, superOp);
-    }
-    else {
-        superOp = createComplexMatrixN(2*numTargets);
-        populateKrausSuperOperatorN(&superOp, ops, numOps);
-        densmatr_applyMultiQubitKrausSuperoperator(qureg, targets, numTargets, superOp);
-        destroyComplexMatrixN(superOp);
-    }
+     // if NOT on Windows, allocate ComplexN on stack depending on size
+     #ifndef _WIN32
+         if (numTargets < 4) {
+             // everything must live in 'if' since this macro declares local vars
+             macro_allocStackComplexMatrixN(superOp, 2*numTargets);
+             populateKrausSuperOperatorN(&superOp, ops, numOps);
+             densmatr_applyMultiQubitKrausSuperoperator(qureg, targets, numTargets, superOp);
+         }
+         else {
+             superOp = createComplexMatrixN(2*numTargets);
+             populateKrausSuperOperatorN(&superOp, ops, numOps);
+             densmatr_applyMultiQubitKrausSuperoperator(qureg, targets, numTargets, superOp);
+             destroyComplexMatrixN(superOp);
+         }
+     // on Windows, we must always create in heap
+     #else
+         superOp = createComplexMatrixN(2*numTargets);
+         populateKrausSuperOperatorN(&superOp, ops, numOps);
+         densmatr_applyMultiQubitKrausSuperoperator(qureg, targets, numTargets, superOp);
+         destroyComplexMatrixN(superOp);
+     #endif
 }
 
 void densmatr_mixPauli(Qureg qureg, int qubit, qreal probX, qreal probY, qreal probZ) {
     
     // convert pauli probabilities into Kraus map
     const int numOps = 4;
-    ComplexMatrix2 ops[numOps];
+    ComplexMatrix2 ops[4]; // [numOps];
     for (int n=0; n < numOps; n++)
         ops[n] = (ComplexMatrix2) {.real={{0}}, .imag={{0}}};
     
@@ -759,8 +772,8 @@ void applyExponentiatedPauliHamil(Qureg qureg, PauliHamil hamil, qreal fac, int
      
     // prepare targets for multiRotatePauli 
     // (all qubits; actual targets are determined by Pauli codes)
-    int vecTargs[hamil.numQubits];
-    int densTargs[hamil.numQubits];
+    int vecTargs[100]; // [hamil.numQubits];
+    int densTargs[100]; // [hamil.numQubits];
     for (int q=0; q<hamil.numQubits; q++) {
         vecTargs[q] = q;
         densTargs[q] = q + hamil.numQubits;
@@ -855,7 +868,7 @@ void agnostic_applyQFT(Qureg qureg, int* qubits, int numQubits) {
         
         int numRegs = 2;
         int numQubitsPerReg[2] = {q, 1};
-        int regs[q+1];
+        int regs[100]; // [q+1];
         for (int i=0; i<q+1; i++)
             regs[i] = qubits[i]; // qubits[q] is in own register
         
diff --git a/QuEST/src/QuEST_validation.c b/QuEST/src/QuEST_validation.c
index 3bcdedae2..9986b6364 100644
--- a/QuEST/src/QuEST_validation.c
+++ b/QuEST/src/QuEST_validation.c
@@ -196,18 +196,22 @@ static const char* errorMessages[] = {
     [E_INVALID_NUM_REGS_DISTANCE_PHASE_FUNC] = "Phase functions DISTANCE, INVERSE_DISTANCE, SCALED_DISTANCE and SCALED_INVERSE_DISTANCE require a strictly even number of sub-registers."
 };
 
-void exitWithError(const char* msg, const char* func) {
+void default_invalidQuESTInputError(const char* errMsg, const char* errFunc) {
     printf("!!!\n");
-    printf("QuEST Error in function %s: %s\n", func, msg);
+    printf("QuEST Error in function %s: %s\n", errFunc, errMsg);
     printf("!!!\n");
     printf("exiting..\n");
     exit(1);
 }
 
+#ifndef _WIN32
 #pragma weak invalidQuESTInputError
 void invalidQuESTInputError(const char* errMsg, const char* errFunc) {
-    exitWithError(errMsg, errFunc);
+    default_invalidQuESTInputError(errMsg, errFunc);
 }
+#else
+#pragma comment(linker, "/alternatename:invalidQuESTInputError=default_invalidQuESTInputError")   
+#endif
 
 void QuESTAssert(int isValid, ErrorCode code, const char* func){
     if (!isValid) invalidQuESTInputError(errorMessages[code], func);
@@ -803,7 +807,8 @@ void validatePhaseFuncTerms(int numQubits, enum bitEncoding encoding, qreal* coe
         // if there are 16 or fewer qubits (0.5mB cache), use a stack array to tick off overrides
         if (numQubits < 16) {
             
-            long long int negIsOverriden[numNegInds];  // flags for {-1,-2,...}; at index {abs(-1)-1, abs(-2)-2, ...}
+            // flags for {-1,-2,...}; at index {abs(-1)-1, abs(-2)-2, ...}
+            long long int negIsOverriden[32768];  // [numNegInds];
             for (int i=0; i<numNegInds; i++)
                 negIsOverriden[i] = 0;
             
diff --git a/README.md b/README.md
index 5af29cd0e..37afa84aa 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,9 @@ The **Quantum Exact Simulation Toolkit** is a high performance simulator of quan
 
 [![Languages](https://img.shields.io/badge/C-99-ff69b4.svg)](http://www.open-std.org/jtc1/sc22/wg14/www/standards.html#9899)
 [![Languages](https://img.shields.io/badge/C++-11-ff69b4.svg)](https://isocpp.org/wiki/faq/cpp11)
+![OS](https://img.shields.io/badge/os-MacOS-9cbd3c.svg)
+![OS](https://img.shields.io/badge/os-Linux-9cbd3c.svg)
+![OS](https://img.shields.io/badge/os-Windows-9cbd3c.svg)
 [![Platforms](https://img.shields.io/badge/multithreaded-OpenMP-6699ff.svg)](https://www.openmp.org/)
 [![Platforms](https://img.shields.io/badge/GPU-CUDA-6699ff.svg)](https://developer.nvidia.com/cuda-zone)
 [![Platforms](https://img.shields.io/badge/distributed-MPI-6699ff.svg)](https://www.mpi-forum.org/) 
@@ -92,7 +95,8 @@ QuEST supports:
 - :ballot_box_with_check: &nbsp; **many tools to analyse** quantum states, such as calculations of [probability](https://quest-kit.github.io/QuEST-develop-doc/group__calc.html#gad0cc08d52cad5062553d6f78126780cc), [fidelity](https://quest-kit.github.io/QuEST-develop-doc/group__calc.html#gaa266ed6c8ae5d0d0f49e1ac50819cffc), and [expected value](https://quest-kit.github.io/QuEST-develop-doc/group__calc.html#ga82f17e96a4cb7612fb9c6ef856df3810)  
 - :ballot_box_with_check: &nbsp; **variable precision** through a `qreal` numerical type which can use single, double or quad precision  
 - :ballot_box_with_check: &nbsp; **QASM output** to verify simulated circuits  
-- :ballot_box_with_check: &nbsp; **direct access to amplitudes** for rapid custom modification of the quantum state  
+- :ballot_box_with_check: &nbsp; **direct access to amplitudes** for rapid custom modification of the quantum state 
+- :ballot_box_with_check: &nbsp; **native compilation** on MacOS, Linux and Windows, through Clang, GNU, Intel, and MSVC compilers
 
 ---------------------------------
 
@@ -143,9 +147,16 @@ then run it with
 ```
 <br>
 
-> Windows users should install [Build Tools](https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2019) for Visual Studio, [CMake](https://cmake.org/download/) and [MinGW-w64](https://sourceforge.net/projects/mingw-w64/), and run the above commmands in the *Developer Command Prompt for VS*, with alternate cmake argument
+> **Windows** users should install [Build Tools](https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2019) for Visual Studio, and [CMake](https://cmake.org/download/), and run the above commmands in the *Developer Command Prompt for VS*, though using build commands
+> ```bash 
+> cmake .. -G "NMake Makefiles"
+> nmake
+> ```
+> If using MSVC and NMake in this way fails, users can forego GPU acceleration, download
+> [MinGW-w64](https://sourceforge.net/projects/mingw-w64/), and compile via 
 > ```bash 
 > cmake .. -G "MinGW Makefiles"
+> make
 > ```
 
 
diff --git a/doxyconfig/config b/doxyconfig/config
index 78d494da0..9e7d5b210 100644
--- a/doxyconfig/config
+++ b/doxyconfig/config
@@ -39,7 +39,7 @@ PROJECT_NAME           = "QuEST"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "v3.3.0"
+PROJECT_NUMBER         = "v3.4.0"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/examples/README.md b/examples/README.md
index 0c2c6546e..54c2ea091 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -157,7 +157,7 @@ and after compiling (see section below) and running, gives psuedo-random output
 > Qubit 2 collapsed to 1 with probability 0.499604
 > ```
 
-QuEST uses the [Mersenne Twister](http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/emt19937ar.html) algorithm to generate random numbers used for randomly collapsing quantum states. The user can seed this RNG using `seedQuEST(arrayOfSeeds, arrayLength)`, otherwise QuEST will by default (through `seedQuESTDefault()`) create a seed from the current time and the process id.
+QuEST uses the [Mersenne Twister](http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/emt19937ar.html) algorithm to generate random numbers used for randomly collapsing quantum states. The user can seed this RNG using [`seedQuEST()`](https://quest-kit.github.io/QuEST/group__debug.html#ga555451c697ea4a9d27389155f68fdabc), otherwise QuEST will by default create a seed from the current time and the process id.
 
 
 > In distributed mode (see below), all code in your source files will be executed independently on every node. 
@@ -176,6 +176,8 @@ QuEST uses the [Mersenne Twister](http://www.math.sci.hiroshima-u.ac.jp/~m-mat/M
 
 QuEST uses [CMake](https://cmake.org/) (version `3.7` or higher) as its build system. Configure the build by supplying the below `-D[VAR=VALUE]` options after the `cmake ..` command. You can alternatively compile via [GNU Make](https://www.gnu.org/software/make/) directly with the provided [makefile](makefile).
 
+> **Windows** users should install [CMake](https://cmake.org/download/) and [Build Tools](https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2019), and run the below commands in the *Developer Command Prompt for VS*
+
 To compile, run:
 ```console
 mkdir build
@@ -183,7 +185,22 @@ cd build
 cmake .. -DUSER_SOURCE="[FILENAME]"
 make
 ```
-where `[FILENAME]` is the name of your source file, including the file extension, relative to the root QuEST directory (above `build`). If your project contains multiple source files, separate them with semi-colons. For example,
+where `[FILENAME]` is the name of your source file, including the file extension, relative to the root QuEST directory (above `build`). 
+
+> Windows users should replace the final two build commands with
+> ```bash 
+> cmake .. -G "NMake Makefiles"
+> nmake
+> ```
+> If using MSVC and NMake in this way fails, users can forego GPU acceleration, download
+> [MinGW-w64](https://sourceforge.net/projects/mingw-w64/), and compile via 
+> ```bash 
+> cmake .. -G "MinGW Makefiles"
+> make
+> ```
+> Compiling directly with `make` and the provided [makefile](makefile), copied to the root directory, may prove easier.
+
+If your project contains multiple source files, separate them with semi-colons. For example,
 ```console
  -DUSER_SOURCE="source1.c;source2.cpp"
 ```
diff --git a/examples/makefile b/examples/makefile
index e72f55e52..000dc5e9b 100644
--- a/examples/makefile
+++ b/examples/makefile
@@ -31,9 +31,12 @@ QUEST_DIR = QuEST
 # compiler to use, which should support both C and C++, to be wrapped by GPU/MPI compilers
 COMPILER = gcc
 
-# type of above compiler, one of {GNU, INTEL, CLANG}, used for setting compiler flags
+# type of above compiler, one of {GNU, INTEL, CLANG, MSVC}, used for setting compiler flags
 COMPILER_TYPE = GNU
 
+# only for WINDOWS: whether OS is 32-bit (x86) or 64-bit (x64). Choose {32, 64}
+WINDOWS_ARCH = 32
+
 # hardwares to target: 1 means use, 0 means don't use
 MULTITHREADED = 0
 DISTRIBUTED = 0
@@ -66,10 +69,12 @@ ifneq ($(SILENT), 1)
     ifneq ($(COMPILER_TYPE), CLANG)
     ifneq ($(COMPILER_TYPE), GNU)
     ifneq ($(COMPILER_TYPE), INTEL)
+	  ifneq ($(COMPILER_TYPE), MSVC)
         $(error COMPILER_TYPE must be one of CLANG, GNU or INTEL)
     endif
     endif
     endif
+    endif
 
     # distributed GPU not supported
     ifeq ($(DISTRIBUTED), 1)
@@ -128,6 +133,15 @@ ifneq ($(SILENT), 1)
     endif
     endif
     endif
+    
+    # Windows users must set WINDOWS_ARCH as {32, 64}
+    ifeq ($(COMPILER_TYPE), MSVC)
+    ifneq ($(WINDOWS_ARCH), 32)
+    ifneq ($(WINDOWS_ARCH), 64)
+        $(error When compiling with MSVC, WINDOWS_ARCH must be 32 or 64)
+    endif
+    endif
+    endif
 
 # end of allowed cleaning
 endif
@@ -146,7 +160,11 @@ endif
 # --- libraries
 #
 
-LIBS = -lm
+ifeq ($(COMPILER_TYPE), MSVC)
+    LIBS =
+else
+    LIBS = -lm
+endif
 
 
 #
@@ -194,23 +212,34 @@ ifeq ($(MULTITHREADED), 1)
         THREAD_FLAGS = -fopenmp
     else ifeq ($(COMPILER_TYPE), INTEL)
         THREAD_FLAGS = -qopenmp
+    else ifeq ($(COMPILER_TYPE), MSVC)
+        THREAD_FLAGS = -openmp
     endif
 else
     THREAD_FLAGS =
 endif
 
+# windows architecture flag
+ifeq ($(WINDOWS_ARCH), 32)
+    ARCH_FLAG = X86
+else
+    ARCH_FLAG = X64
+endif
+
 # c
 C_CLANG_FLAGS = -O2 -std=c99 -mavx -Wall -DQuEST_PREC=$(PRECISION)
 C_GNU_FLAGS = -O2 -std=c99 -mavx -Wall -DQuEST_PREC=$(PRECISION) $(THREAD_FLAGS)
 C_INTEL_FLAGS = -O2 -std=c99 -fprotect-parens -Wall -xAVX -axCORE-AVX2 -diag-disable -cpu-dispatch -DQuEST_PREC=$(PRECISION) $(THREAD_FLAGS)
+C_MSVC_FLAGS = -O2 -EHs -DQuEST_PREC=$(PRECISION) $(THREAD_FLAGS) -nologo -DDWIN$(WINDOWS_ARCH) -D_WINDOWS -Fo$@
 
 # c++
 CPP_CLANG_FLAGS = -O2 -std=c++11 -mavx -Wall -DQuEST_PREC=$(PRECISION)
 CPP_GNU_FLAGS = -O2 -std=c++11 -mavx -Wall -DQuEST_PREC=$(PRECISION) $(THREAD_FLAGS)
 CPP_INTEL_FLAGS = -O2 -std=c++11 -fprotect-parens -Wall -xAVX -axCORE-AVX2 -diag-disable -cpu-dispatch -DQuEST_PREC=$(PRECISION) $(THREAD_FLAGS)
+CPP_MSVC_FLAGS = -O2 -EHs -std:c++latest -DQuEST_PREC=$(PRECISION) $(THREAD_FLAGS) -nologo -DDWIN$(WINDOWS_ARCH) -D_WINDOWS -Fo$@
 
 # wrappers
-CPP_CUDA_FLAGS = -O2 -arch=compute_$(GPU_COMPUTE_CAPABILITY) -code=sm_$(GPU_COMPUTE_CAPABILITY) -DQuEST_PREC=$(PRECISION) -ccbin $(COMPILER)
+CPP_CUDA_FLAGS = -O2 -arch=compute_$(GPU_COMPUTE_CAPABILITY) -code=sm_$(GPU_COMPUTE_CAPABILITY) -DQuEST_PREC=$(PRECISION)
 
 # choose c/c++ flags based on compiler type
 ifeq ($(COMPILER_TYPE), CLANG)
@@ -222,21 +251,51 @@ else ifeq ($(COMPILER_TYPE), GNU)
 else ifeq ($(COMPILER_TYPE), INTEL)
     C_FLAGS = $(C_INTEL_FLAGS)
     CPP_FLAGS = $(CPP_INTEL_FLAGS)
+else ifeq ($(COMPILER_TYPE), MSVC)
+    C_FLAGS = $(C_MSVC_FLAGS)
+    CPP_FLAGS = $(CPP_MSVC_FLAGS)
+	
+	# must specify machine type on Windows
+    CPP_CUDA_FLAGS := $(CPP_CUDA_FLAGS) -m=$(WINDOWS_ARCH) -DDWIN$(WINDOWS_ARCH)
 endif
 
-ifeq ($(TEST), 1)
-	QUEST_LIB := libQuEST
-	LIB_EXT = .so
-	LIB_NAME = $(addsuffix $(LIB_EXT), $(QUEST_LIB))
-	C_FLAGS += -fPIC
-endif
 
 
 #
-# --- compiler environment vars
+# --- compiler mode and linker flags 
 #
 
-MPI_WRAPPED_COMP = I_MPI_CC=$(COMPILER) OMPI_CC=$(COMPILER) MPICH_CC=$(COMPILER)
+# format args based on compiler type
+ifeq ($(COMPILER_TYPE), MSVC)
+    C_MODE = 
+    LINKER = link.exe
+    LINK_FLAGS := -SUBSYSTEM:CONSOLE -nologo -MACHINE:$(ARCH_FLAG)
+		# note MSVC linker does not receive thread flags
+    	
+    # must forward linker flags from NVCC to link.exe on Windows
+    ifeq ($(GPUACCELERATED), 1)
+        LINK_FLAGS := -o $(EXE).exe $(foreach option, $(LINK_FLAGS), -Xlinker $(option))
+    else
+        LINK_FLAGS := -out:$(EXE).exe $(LINK_FLAGS)
+    endif
+    
+    MPI_VARS = 
+else
+    C_MODE = -x c
+    LINKER = $(COMPILER)
+    LINK_FLAGS := -o $(EXE) $(THREAD_FLAGS)
+	
+    MPI_VARS = I_MPI_CC=$(COMPILER) OMPI_CC=$(COMPILER) MPICH_CC=$(COMPILER)
+endif
+
+# prepare compiler + args shortcut
+ifeq ($(DISTRIBUTED), 1)
+    COMP_CMD = $(MPI_VARS) $(MPI_COMPILER)
+    LINK_CMD = $(MPI_VARS) $(MPI_COMPILER)
+else
+    COMP_CMD = $(COMPILER)
+    LINK_CMD = $(LINKER)
+endif
 
 
 
@@ -257,7 +316,7 @@ OBJ += $(addsuffix .o, $(SOURCES))
 
 
 #
-# --- rules
+# --- building
 #
 
 # notes:
@@ -266,83 +325,59 @@ OBJ += $(addsuffix .o, $(SOURCES))
 #	- MPICC will compile .c and .cpp files (wrapping $COMPILER)
 
 
-# GPU
+# first, let NVCC compile any GPU sources
 ifeq ($(GPUACCELERATED), 1)
 
-  %.o: %.c
-	$(COMPILER) -x c $(C_FLAGS) $(QUEST_INCLUDE) -c $<
-  %.o: $(QUEST_INNER_DIR)/%.c
-	$(COMPILER) -x c $(C_FLAGS) $(QUEST_INCLUDE) -c $<
-  %.o: $(QUEST_COMMON_DIR)/%.c
-	$(COMPILER) -x c $(C_FLAGS) $(QUEST_INCLUDE) -c $<
-
+  # final -o to force NVCC to use '.o' extension even on Windows
   %.o: %.cu
-	$(CUDA_COMPILER) -dc $(CPP_CUDA_FLAGS) $(QUEST_INCLUDE) $<
+	$(CUDA_COMPILER) -dc $(CPP_CUDA_FLAGS) -ccbin $(COMPILER) $(QUEST_INCLUDE) -o $@ $<
   %.o: $(QUEST_INNER_DIR)/%.cu
-	$(CUDA_COMPILER) -dc $(CPP_CUDA_FLAGS) $(QUEST_INCLUDE) $<
-	
-  %.o: %.cpp
-	$(CUDA_COMPILER) -dc $(CPP_CUDA_FLAGS) $(QUEST_INCLUDE) $<
-  %.o: $(QUEST_INNER_DIR)/%.cpp
-	$(CUDA_COMPILER) -dc $(CPP_CUDA_FLAGS) $(QUEST_INCLUDE) $<
-
-# distributed
-else ifeq ($(DISTRIBUTED), 1)
-
-  %.o: %.c
-	$(MPI_WRAPPED_COMP) $(MPI_COMPILER) -x c $(C_FLAGS) $(QUEST_INCLUDE) -c $<
-  %.o: $(QUEST_INNER_DIR)/%.c
-	$(MPI_WRAPPED_COMP) $(MPI_COMPILER) -x c $(C_FLAGS) $(QUEST_INCLUDE) -c $<
-  %.o: $(QUEST_COMMON_DIR)/%.c
-	$(MPI_WRAPPED_COMP) $(MPI_COMPILER) -x c $(C_FLAGS) $(QUEST_INCLUDE) -c $<
-	
-  %.o: %.cpp
-	$(MPI_WRAPPED_COMP) $(MPI_COMPILER) $(CPP_FLAGS) $(QUEST_INCLUDE) -c $<
-  %.o: $(QUEST_INNER_DIR)/%.cpp
-	$(MPI_WRAPPED_COMP) $(MPI_COMPILER) $(CPP_FLAGS) $(QUEST_INCLUDE) -c $<
+	$(CUDA_COMPILER) -dc $(CPP_CUDA_FLAGS) -ccbin $(COMPILER) $(QUEST_INCLUDE) -o $@ $<
 
-# CPU
-else
+endif
 
-  %.o: %.c
-	$(COMPILER) -x c $(C_FLAGS) $(QUEST_INCLUDE) -c $<
-  %.o: $(QUEST_INNER_DIR)/%.c
-	$(COMPILER) -x c $(C_FLAGS) $(QUEST_INCLUDE) -c $<
-  %.o: $(QUEST_COMMON_DIR)/%.c
-	$(COMPILER) -x c $(C_FLAGS) $(QUEST_INCLUDE) -c $<
+# remaining files compiled for CPU
+# C
+%.o: %.c
+	$(COMP_CMD) $(C_MODE) $(C_FLAGS) $(QUEST_INCLUDE) -c $<
+%.o: $(QUEST_INNER_DIR)/%.c
+	$(COMP_CMD) $(C_MODE) $(C_FLAGS) $(QUEST_INCLUDE) -c $<
+%.o: $(QUEST_COMMON_DIR)/%.c
+	$(COMP_CMD) $(C_MODE) $(C_FLAGS) $(QUEST_INCLUDE) -c $<
 	
-  %.o: %.cpp
-	$(COMPILER) $(CPP_FLAGS) $(QUEST_INCLUDE) -c $<
-  %.o: $(QUEST_INNER_DIR)/%.cpp
-	$(COMPILER) $(CPP_FLAGS)  -c $<
-
-endif
+# C++
+%.o: %.cpp
+	$(COMP_CMD) $(CPP_FLAGS) $(QUEST_INCLUDE) -c $<
+%.o: $(QUEST_INNER_DIR)/%.cpp
+	$(COMP_CMD) $(CPP_FLAGS) -c $<
+%.o: $(LINK_DIR)/%.cpp
+	$(COMP_CMD) $(CPP_FLAGS) $(QUEST_INCLUDE) -c $<
 
 
 
 #
-# --- build
+# --- linking
 #
 
 # CUDA
 ifeq ($(GPUACCELERATED), 1)
 
-  all:	$(OBJ)
-		$(CUDA_COMPILER) $(CPP_CUDA_FLAGS) $(QUEST_INCLUDE) -o $(EXE) $(OBJ) $(LIBS)
-
-# MPI
-else ifeq ($(DISTRIBUTED), 1)
+  # a dirty hack to silence cl when NVCC linking
+  # (https://stackoverflow.com/questions/61178458/force-nvcc-straight-to-linking-phase)
+  SHUTUP := 
+  ifeq ($(COMPILER_TYPE), MSVC)
+      SHUTUP := -Xcompiler 2>nul:
+  endif
 
-  default:	$(EXE)
-  $(EXE):	$(OBJ)
-			$(MPI_WRAPPED_COMP) $(MPI_COMPILER) $(C_FLAGS) $(QUEST_INCLUDE) -o $(EXE) $(OBJ) $(LIBS) 
+  all:	$(OBJ)
+	$(CUDA_COMPILER) $(SHUTUP) $(CPP_CUDA_FLAGS) $(OBJ) $(LIBS) $(LINK_FLAGS)
 
-# C
+# C and C++
 else
 
   default:	$(EXE)
   $(EXE):	$(OBJ)
-			$(COMPILER) $(C_FLAGS) $(QUEST_INCLUDE) -o $(EXE) $(OBJ) $(LIBS) 
+			$(LINK_CMD) $(OBJ) $(LIBS) $(LINK_FLAGS)
 
 endif
 
@@ -353,11 +388,24 @@ endif
 # --- clean
 #
 
-.PHONY:		clean veryclean
-clean:
-			/bin/rm -f *.o $(EXE)
+# use OS delete command, inferred from compiler type
+# (this incorrectly assumes intel compilers aren't run on Windows, 
+#  despite them being far better there than MSVC! Fix this in Cmake)
+ifeq ($(COMPILER_TYPE), MSVC)
+    REM = del
+    EXE_FN = $(EXE).exe
+else
+    REM = /bin/rm -f
+    EXE_FN = $(EXE)
+endif
+
+.PHONY:		tidy clean veryclean
+tidy:
+			$(REM) *.o *.lib *.exp
+clean:	tidy
+			$(REM) $(EXE_FN)
 veryclean:	clean
-			/bin/rm -f *.h~ *.c~ makefile~
+			$(REM) *.h~ *.c~ makefile~
 
 
 
diff --git a/tests/test_operators.cpp b/tests/test_operators.cpp
index ef2846ff2..dd468e974 100644
--- a/tests/test_operators.cpp
+++ b/tests/test_operators.cpp
@@ -1723,7 +1723,8 @@ TEST_CASE( "applyParamNamedPhaseFunc", "[operators]" ) {
                 qreal phase = 0;
                 for (int r=0; r<numRegs; r++)
                     phase += pow(regVals[i][r] - params[2+r], 2);
-                phase = (phase == 0.)? params[1] : params[0]/sqrt(phase);
+                phase = sqrt(phase);
+                phase = (phase <= REAL_EPS)? params[1] : params[0]/phase;
                 diagMatr[i][i] = expI(phase);
             }
             
@@ -1952,8 +1953,9 @@ TEST_CASE( "applyParamNamedPhaseFunc", "[operators]" ) {
                 for (size_t i=0; i<diagMatr.size(); i++) {
                     qreal phase = 0;
                     for (int r=0; r<numRegs; r+=2)
-                        phase += pow(regVals[i][r+1]-regVals[i][r]-params[2+r/2], 2);
-                    phase = (phase == 0.)? params[1] : params[0]/sqrt(phase);
+                        phase += pow(regVals[i][r]-regVals[i][r+1]-params[2+r/2], 2);
+                    phase = sqrt(phase);
+                    phase = (phase <= REAL_EPS)? params[1] : params[0]/phase;
                     diagMatr[i][i] = expI(phase);
                 }
             }
@@ -2271,7 +2273,8 @@ TEST_CASE( "applyParamNamedPhaseFuncOverrides", "[operators]" ) {
                 qreal phase = 0;
                 for (int r=0; r<numRegs; r++)
                     phase += pow(regVals[i][r] - params[2+r], 2);
-                phase = (phase == 0.)? params[1] : params[0]/sqrt(phase);
+                phase = sqrt(phase);
+                phase = (phase <= REAL_EPS)? params[1] : params[0]/phase;
                 diagMatr[i][i] = expI(phase);
             }
             setDiagMatrixOverrides(diagMatr, numQubitsPerReg, numRegs, encoding, overrideInds, overridePhases, numOverrides);
@@ -2505,8 +2508,9 @@ TEST_CASE( "applyParamNamedPhaseFuncOverrides", "[operators]" ) {
                 for (size_t i=0; i<diagMatr.size(); i++) {
                     qreal phase = 0;
                     for (int r=0; r<numRegs; r+=2)
-                        phase += pow(regVals[i][r+1]-regVals[i][r]-params[2+r/2], 2);
-                    phase = (phase == 0.)? params[1] : params[0]/sqrt(phase);
+                        phase += pow(regVals[i][r]-regVals[i][r+1]-params[2+r/2], 2);
+                    phase = sqrt(phase);
+                    phase = (phase <= REAL_EPS)? params[1] : params[0]/phase;
                     diagMatr[i][i] = expI(phase);
                 }
                 
@@ -3203,6 +3207,141 @@ TEST_CASE( "applyPhaseFuncOverrides", "[operators]" ) {
 
 
 
+/** @sa applyProjector
+ * @ingroup unittest 
+ * @author Tyson Jones 
+ */
+TEST_CASE( "applyProjector", "[operators]" ) {
+    
+    Qureg vec = createQureg(NUM_QUBITS, QUEST_ENV);
+    Qureg mat = createDensityQureg(NUM_QUBITS, QUEST_ENV);
+    
+    SECTION( "correctness" ) {
+        
+        int qubit = GENERATE( range(0,NUM_QUBITS) );
+        int outcome = GENERATE( 0, 1 );
+        
+        // repeat these random tests 10 times on every qubit, and for both outcomes
+        GENERATE( range(0,10) );
+        
+        SECTION( "state-vector" ) {
+            
+            SECTION( "normalised" ) {
+                
+                // use a random L2 state for every qubit & outcome
+                QVector vecRef = getRandomStateVector(NUM_QUBITS);
+                toQureg(vec, vecRef);
+                
+                // zero non-outcome reference amps
+                for (size_t ind=0; ind<vecRef.size(); ind++) {
+                    int bit = (ind >> qubit) & 1; // target-th bit
+                    if (bit != outcome)
+                        vecRef[ind] = 0;
+                }
+                
+                applyProjector(vec, qubit, outcome);
+                REQUIRE( areEqual(vec, vecRef) );
+            }
+            SECTION( "unnormalised" ) {
+                
+                // use a random non-physical state for every qubit & outcome
+                QVector vecRef = getRandomQVector(1 << NUM_QUBITS);
+                toQureg(vec, vecRef);
+                
+                // zero non-outcome reference amps
+                for (size_t ind=0; ind<vecRef.size(); ind++) {
+                    int bit = (ind >> qubit) & 1; // target-th bit
+                    if (bit != outcome)
+                        vecRef[ind] = 0;
+                }
+                
+                applyProjector(vec, qubit, outcome);
+                REQUIRE( areEqual(vec, vecRef) );
+            }
+        }        
+        SECTION( "density-matrix" ) {
+            
+            SECTION( "pure" ) {
+
+                QVector vecRef = getRandomStateVector(NUM_QUBITS);
+                QMatrix matRef = getPureDensityMatrix(vecRef);
+                
+                toQureg(mat, matRef);
+                applyProjector(mat, qubit, outcome);
+                
+                // zero any amplitudes that aren't |outcome><outcome|
+                for (size_t r=0; r<matRef.size(); r++) {
+                    for (size_t c=0; c<matRef.size(); c++) {
+                        int ketBit = (c >> qubit) & 1;
+                        int braBit = (r >> qubit) & 1;
+                        if (!(ketBit == outcome && braBit == outcome))
+                            matRef[r][c] = 0;
+                    }
+                }
+                
+                REQUIRE( areEqual(mat, matRef) );
+            }
+            SECTION( "mixed" ) {
+                
+                QMatrix matRef = getRandomDensityMatrix(NUM_QUBITS);
+                
+                toQureg(mat, matRef);
+                applyProjector(mat, qubit, outcome);
+                
+                // zero any amplitudes that aren't |outcome><outcome|
+                for (size_t r=0; r<matRef.size(); r++) {
+                    for (size_t c=0; c<matRef.size(); c++) {
+                        int ketBit = (c >> qubit) & 1;
+                        int braBit = (r >> qubit) & 1;
+                        if (!(ketBit == outcome && braBit == outcome))
+                            matRef[r][c] = 0;
+                    }
+                }
+                
+                REQUIRE( areEqual(mat, matRef) );
+            }
+            SECTION( "unnormalised" ) {
+                
+                QMatrix matRef = getRandomQMatrix(1 << NUM_QUBITS);
+                
+                toQureg(mat, matRef);
+                applyProjector(mat, qubit, outcome);
+                
+                // zero any amplitudes that aren't |outcome><outcome|
+                for (size_t r=0; r<matRef.size(); r++) {
+                    for (size_t c=0; c<matRef.size(); c++) {
+                        int ketBit = (c >> qubit) & 1;
+                        int braBit = (r >> qubit) & 1;
+                        if (!(ketBit == outcome && braBit == outcome))
+                            matRef[r][c] = 0;
+                    }
+                }
+                
+                REQUIRE( areEqual(mat, matRef) );
+            }
+        }
+    }
+    SECTION( "input validation" ) {
+        
+        SECTION( "qubit index" ) {
+            
+            int qubit = GENERATE( -1, NUM_QUBITS );
+            int outcome = 0;
+            REQUIRE_THROWS_WITH( applyProjector(mat, qubit, outcome), Contains("Invalid target qubit") );
+        }
+        SECTION( "outcome value" ) {
+            
+            int qubit = 0;
+            int outcome = GENERATE( -1, 2 );
+            REQUIRE_THROWS_WITH( applyProjector(mat, qubit, outcome), Contains("Invalid measurement outcome") );
+        }
+    }
+    destroyQureg(vec, QUEST_ENV);
+    destroyQureg(mat, QUEST_ENV);
+}
+
+
+
 /** @sa applyQFT
  * @ingroup unittest
  * @author Tyson Jones 
diff --git a/tests/test_state_initialisations.cpp b/tests/test_state_initialisations.cpp
index de8bb1354..53c5b4eb8 100644
--- a/tests/test_state_initialisations.cpp
+++ b/tests/test_state_initialisations.cpp
@@ -286,36 +286,49 @@ TEST_CASE( "initPureState", "[state_initialisations]" ) {
 TEST_CASE( "initStateFromAmps", "[state_initialisations]" ) {
     
     Qureg vec = createQureg(NUM_QUBITS, QUEST_ENV);
+    Qureg mat = createDensityQureg(NUM_QUBITS, QUEST_ENV);
     
     SECTION( "correctness" ) {
         
         SECTION( "state-vector" ) {
             
-            // create arbitrary (but distinctly non-zero) amplitudes
+            // create random (unnormalised) vector
+            QVector vecRef = getRandomQVector(1<<NUM_QUBITS);
+            
             qreal ampsRe[vec.numAmpsTotal];
             qreal ampsIm[vec.numAmpsTotal];
-            QVector vecRef = QVector(vec.numAmpsTotal);
-            for (int i=0; i<vec.numAmpsTotal; i++) {
-                ampsRe[i] = 2*i;
-                ampsIm[i] = 2*i + 1;
-                vecRef[i] = (ampsRe[i]) + ampsIm[i] * (qcomp) 1i;;
+            for (size_t i=0; i<vecRef.size(); i++) {
+                ampsRe[i] = real(vecRef[i]);
+                ampsIm[i] = imag(vecRef[i]);
             }
             
-            initBlankState(vec);
             initStateFromAmps(vec, ampsRe, ampsIm);
             REQUIRE( areEqual(vec, vecRef) );
         }
-    }
-    SECTION( "input validation" ) {
-        
         SECTION( "density-matrix" ) {
             
-            Qureg mat = createDensityQureg(NUM_QUBITS, QUEST_ENV);
-            REQUIRE_THROWS_WITH( initStateFromAmps(mat, NULL, NULL), Contains("valid only for state-vectors") );
-            destroyQureg(mat, QUEST_ENV);
+            // create random (unnormalised) matrix
+            QMatrix matRef = getRandomQMatrix(1<<NUM_QUBITS);
+            
+            qreal ampsRe[mat.numAmpsTotal];
+            qreal ampsIm[mat.numAmpsTotal];
+            
+            // populate column-wise 
+            long long int i=0;
+            for (size_t c=0; c<matRef.size(); c++) {
+                for (size_t r=0; r<matRef.size(); r++) {
+                    ampsRe[i] = real(matRef[r][c]);
+                    ampsIm[i] = imag(matRef[r][c]);
+                    i++;
+                }
+            }
+    
+            initStateFromAmps(mat, ampsRe, ampsIm);
+            REQUIRE( areEqual(mat, matRef) );
         }
     }
     destroyQureg(vec, QUEST_ENV);
+    destroyQureg(mat, QUEST_ENV);
 }