diff --git a/CHANGELOG.md b/CHANGELOG.md
index a10ad2fb..c0ab85c1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,12 +1,22 @@
 # Changelog
 TAPAS toolbox 
+## [5.1.0] 2021-04-23
+
+### Added
+- HUGE: Added Monte Carlo based inference (https://doi.org/10.1002/hbm.25431)
+
+### Fixed
+- Fixed typos
+
+
 ## [5.0.0] 2021-03-15
 
 ### Added
 - [genbed](genbed/README.md): Python package for Generative Embedding
 - [UniQC](UniQC/README.md): unified neuroimaging quality control (beta release)
 - [task/BLT](task/BLT/README.md): breathing learning task 
-
+- [PhysIO](PhysIO/CHANGELOG.md): Release 2021a-v8.0; new method for estimating respiratory volume per unit time (RVT) via the Hilbert transform ([Harrison et al., NeuroImage 230 (2021)](https://doi.org/10.1016/j.neuroimage.2021.117787))
+    - also: robust respiratory preprocessing: optional de-spiking (median filter) and window-padded detrending
 
 ## [4.0.0] 2020-09-09
 
diff --git a/CITATION.cff b/CITATION.cff
index a195607b..f25afb92 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -3,8 +3,8 @@ message: If you use this software, please cite it as below.
 authors:
   - family-names: Aponte
     given-names: Eduardo
-  - family-names: Bollman
-    given-names: Sakia
+  - family-names: Bollmann
+    given-names: Saskia
   - family-names: Brodersen
     given-names: Kay
   - family-names: Frässle
@@ -28,8 +28,8 @@ authors:
   - family-names: Yao
     given-names: Yu
 title: "TAPAS - Translational Algorithms for Psychiatry-Advancing Science"
-version: 5.0.0
-date-released: 2021-03-12
+version: 5.1.0
+date-released: 2021-04-23
 repository-code: "https://github.com/translationalneuromodeling/tapas"
 url: "http://www.translationalneuromodeling.org/tapas"
-license: GPL-3.0-or-later
\ No newline at end of file
+license: GPL-3.0-or-later
diff --git a/README.md b/README.md
index 3bad609a..1917e2ae 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 ![TAPAS Logo](misc/TapasLogo.png?raw=true "TAPAS Logo")
 
-*Version 5.0.0*
+*Version 5.1.0*
 
 T  A  P  A  S - Translational Algorithms for Psychiatry-Advancing Science.
 ========================================================================
@@ -39,10 +39,9 @@ And the following tasks:
 - [BLT](task/BLT/README.md): Breathing Learning Task.
 
 
-TAPAS also includes beta versions the following toolboxes:
+TAPAS also includes beta versions the following toolboxes. Please note that these toolboxes have not been extensively tested and are still in active development:
 - [h2gf](h2gf/README.md): hierarchical extension of HGF.
 - [UniQC](UniQC/README.md): unified neuroimaging quality control.
-Please note that these toolboxes have not been extensively tested and are still in active development.
 
 
 TAPAS is written in MATLAB and Python and distributed as open source code under
@@ -53,9 +52,11 @@ the GNU General Public License (GPL, Version 3).
 INSTALLATION
 ------------
 
-TAPAS is a collection of toolboxes written in MATLAB (Version R2016b) and Python. The key
-requirement is the installation of MATLAB (produced by The MathWorks, Inc.
-Natick, MA, USA. http://www.mathworks.com/). Toolboxes written in Python currently include: genbed. For requirements see the genbed [documentation](genbed/README.md).
+TAPAS is a collection of toolboxes written in MATLAB (Version R2016b) and Python. 
+- The key requirement is the installation of MATLAB (produced by The MathWorks, Inc.
+Natick, MA, USA. http://www.mathworks.com/). 
+- Toolboxes written in Python currently include: 
+    - genbed: For requirements see the genbed [documentation](genbed/README.md).
 
 Please download TAPAS from our
 [Github Release Page](https://github.com/translationalneuromodeling/tapas/releases).
@@ -101,7 +102,8 @@ Cite Me
 
 Please cite this toolbox as:
 
-S. Frässle, E. A. Aponte, K. H. Brodersen, C. T. Do, O. Harrison, S. J. Harrison, J. Heinzle, S. Iglesias, L. Kasper, E. I. Lomakina, C. D. Mathys, M. Müller-Schrader, I. Pereira, F. H. Petzschner, S. Raman, D. Schöbi, B. Toussaint, L. A. Weber, Y. Yao, and K. E. Stephan, “TAPAS: an open-source toolbox for translational neuromodeling and computational psychiatry,” \[Online\]. Available: https://www.biorxiv.org/content/10.1101/2021.03.12.435091v1
+Frässle, S., Aponte, E.A., Bollmann, S., Brodersen, K.H., Do, C.T., Harrison, O.K., Harrison, S.J., Heinzle, J., Iglesias, S., Kasper, L., Lomakina, E.I., Mathys, C., Müller-Schrader, M., Pereira, I., Petzschner, F.H., Raman, S., Schöbi, D., Toussaint, B., Weber, L.A., Yao, Y., Stephan, K.E., 2021. TAPAS: an open-source software package for Translational Neuromodeling and Computational Psychiatry. bioRxiv 2021.03.12.435091. https://doi.org/10.1101/2021.03.12.435091
+
 
 For additional information about citations and current version, enter `tapas_version(1);` on your MATLAB command line.
 
diff --git a/huge/.gitignore b/huge/.gitignore
index 13d4a43a..42e9a5e7 100644
--- a/huge/.gitignore
+++ b/huge/.gitignore
@@ -2,4 +2,3 @@
 /*.mex*
 /trash
 /@tapas_Huge/*.asv
-/spm
diff --git a/huge/@tapas_Huge/default_options.m b/huge/@tapas_Huge/default_options.m
index 9e3239e7..28ccefba 100644
--- a/huge/@tapas_Huge/default_options.m
+++ b/huge/@tapas_Huge/default_options.m
@@ -30,10 +30,13 @@
 
 % name value pairs
 options.nvp = struct(...
+    'burnin'                , [], ...
     'confounds'             , [], ...
     'confoundsvariant'      , 'default', ...
     'dcm'                   , [], ...
+    'inversetemperature'    , 1, ...
     'k'                     , 1, ...
+    'kernelratio'           , [10 100], ...
     'method'                , 'VB', ...
     'numberofiterations'    , [], ...
     'omitfromclustering'    , [], ...
@@ -42,6 +45,7 @@
     'priordegree'           , 100, ...
     'priorvarianceratio'    , .1, ...
     'randomize'             , false, ...
+    'retainsamples'         , 1000, ...
     'saveto'                , [], ...
     'seed'                  , [], ...
     'startingvaluedcm'      , 'prior', ...
diff --git a/huge/@tapas_Huge/estimate.m b/huge/@tapas_Huge/estimate.m
index 5aef6cd6..97f67fd7 100644
--- a/huge/@tapas_Huge/estimate.m
+++ b/huge/@tapas_Huge/estimate.m
@@ -182,7 +182,7 @@
 prior.tau_0 = obj.options.nvp.priorvarianceratio;
 assert(all(prior.tau_0(:) > 0), 'TAPAS:HUGE:Prior', ...
     'PriorCovarianceRatio must be positive scalar.');
-% xxxTODO check size consistent
+
 
 % mu_h
 prior.mu_h = obj.options.prior.mu_h;
@@ -298,7 +298,7 @@
 
 % randomize starting values
 if obj.options.nvp.randomize && ~(strcmpi(obj.options.nvp.method, 'MH') && ...
-        ~obj.options.mh.nSteps.clusters) %%% TODO remove exception and introduce new method for clustering only
+        ~obj.options.mh.nSteps.clusters)
     stVal2 = stVal2 + randn(size(stVal2)).*obj.options.start.gmm;
 end
 
diff --git a/huge/@tapas_Huge/mh_init.m b/huge/@tapas_Huge/mh_init.m
new file mode 100644
index 00000000..989ab99d
--- /dev/null
+++ b/huge/@tapas_Huge/mh_init.m
@@ -0,0 +1,177 @@
+function [ obj ] = mh_init( obj )
+% Initialize Markov chain state for Metropolized Gibbs sampling.
+% Requires obj.dcm and obj.prior to be intialized.
+% 
+% This is a protected method of the tapas_Huge class. It cannot be called
+% from outside the class.
+% 
+
+
+% fMRI forward model
+obj.options.fncBold = @bold_gen;
+
+%% priors
+if isvector(obj.prior.tau_0)
+    obj.prior.tau_0 = diag(obj.prior.tau_0);
+end
+% adapt obj.prior to collapsed HUGE
+prior = struct();
+prior.alpha_0   = obj.prior.alpha_0;
+prior.m_0       = obj.prior.m_0;
+prior.T_0       = inv(obj.prior.S_0).*obj.prior.tau_0;
+prior.s_0       = - log(diag(obj.prior.S_0)');
+prior.nu_0 = obj.prior.nu_0;
+if isscalar(prior.nu_0)
+    prior.nu_0  = repmat(prior.nu_0, obj.idx.P_c, 1);
+end
+prior.Pi_h      = inv(obj.prior.Sigma_h);
+prior.mu_h      = obj.prior.mu_h;
+% lambda: BOLD-to-Noise ratio in log-space 
+prior.lambda_0  = repmat(log(obj.prior.mu_lambda), 1, obj.R);
+prior.omega_0   = repmat(4/(log(obj.prior.s2_lambda).^2), obj.R, 1);
+
+obj.prior = prior;
+
+%% starting values
+init.pi = ones(1, obj.K)./obj.K;
+init.mu = obj.options.start.clusters;
+init.kappa = repmat(obj.prior.s_0, obj.K, 1);
+init.theta_c = obj.options.start.subjects(:, 1:obj.idx.P_c);
+init.theta_h = obj.options.start.subjects(:, obj.idx.P_c+1:end);
+init.lambda = repmat(obj.prior.lambda_0, obj.N, 1);
+% randomizing starting values
+if obj.options.nvp.randomize
+    tmp = obj.options.start.gmm;
+    if obj.options.mh.nSteps.weights
+        init.pi = init.pi + exp(randn(size(init.pi))*tmp); %%% TODO draw from prior 
+        init.pi = init.pi./sum(init.pi); %%% requires samples from dirichlet 
+    end
+    if obj.options.mh.nSteps.clusters
+        init.kappa = init.kappa + randn(size(init.kappa))*tmp;
+    end
+    tmp = obj.options.start.dcm;
+    if obj.options.mh.nSteps.dcm
+        init.lambda = init.lambda + randn(size(init.lambda))*tmp;
+    end
+end
+
+%% switch to single precision
+if obj.options.bSinglePrec
+    init.pi = single(init.pi);
+    init.mu = single(init.mu);
+    init.kappa = single(init.kappa);
+    init.theta_c = single(init.theta_c);
+    init.theta_h = single(init.theta_h);
+    init.lambda = single(init.lambda);
+end
+
+%% initialize state of Markov chain
+obj.aux.l2pi = -.5*obj.idx.P_c*log(2*pi);
+% step size
+obj.aux.step = obj.options.mh.stepSize;
+obj.aux.step.mu = repmat(obj.aux.step.mu, obj.K, 1);
+% obj.aux.step.mu = ones(obj.K, 1);
+obj.aux.step.kappa = repmat(obj.aux.step.kappa, obj.K, 1);
+obj.aux.step.theta = repmat(obj.aux.step.theta, obj.N, 1);
+% obj.aux.step.theta = ones(obj.N, 1);
+obj.aux.step.lambda = repmat(obj.aux.step.lambda, obj.N, 1);
+
+% transforming proposal distribution
+obj.aux.transform = struct();
+obj.aux.transform.mu = repmat(eye(obj.idx.P_c), 1, 1, obj.K);
+obj.aux.logdet.mu = zeros(obj.K, 1);
+% obj.aux.transform.theta = repmat(eye(obj.idx.P_c + obj.idx.P_h), 1, 1, obj.N);
+obj.aux.transform.theta = eye(obj.idx.P_c + obj.idx.P_h);
+% obj.aux.logdet.theta = zeros(obj.N, 1);
+obj.aux.logdet.theta = 0;
+
+obj.aux.sample = init; % obj.aux sample
+obj.aux.nProp = struct(); % number of proposals
+obj.aux.nAccept = struct(); % number of accepted proposals
+obj.aux.lpr = struct(); % log obj.prior
+obj.aux.lpr.llh = zeros(obj.N,1); % log likelihood
+
+% weights
+obj.aux.nProp.pi = 0;
+obj.aux.nAccept.pi = 0;
+pic = fliplr(cumsum(obj.aux.sample.pi(end:-1:1)));
+pis = obj.aux.sample.pi./pic;
+pic = pic(2:end-1);
+pis = pis(1:end-1);
+obj.aux.sample.pi_u = tapas_huge_logit(pis) + log(obj.K-1:-1:1);
+
+obj.aux.lpr.pi = max(log(obj.aux.sample.pi)*(obj.prior.alpha_0 - 1) ...
+     + sum(log(pis)) + sum(log(1-pis)) + sum(log(pic)), -realmax);   
+% dPi = obj.aux.sample.pi_u - obj.prior.mu_pi; % Gaussian prior in 
+% obj.aux.lpr.pi = -dPi'.^2*obj.prior.pi_pi/2; % unconstrained space
+
+% clusters
+obj.aux.nProp.mu = 0;
+obj.aux.nAccept.mu = zeros(obj.K, 1);
+obj.aux.nAccept.mu_rsk = zeros(obj.K, 2);
+obj.aux.nProp.kappa = 0;
+obj.aux.nAccept.kappa = zeros(obj.K, 1);
+obj.aux.rho = zeros(obj.N, obj.K);
+obj.aux.lpr.mu = zeros(1, obj.K);
+obj.aux.lpr.kappa = zeros(1, obj.K);
+for k = 1:obj.K
+    dmu_k = obj.aux.sample.mu(k,:) - obj.prior.m_0;
+    obj.aux.lpr.mu(k) = -.5*dmu_k*obj.prior.T_0*dmu_k';
+    obj.aux.lpr.kappa(k) = -.5*(obj.aux.sample.kappa(k,:) - ...
+        obj.prior.s_0).^2*obj.prior.nu_0;
+    dtheta_c = bsxfun(@minus, obj.aux.sample.theta_c, obj.aux.sample.mu(k,:));
+    obj.aux.rho(:,k) = -.5*dtheta_c.^2*exp(obj.aux.sample.kappa(k,:)') ...
+        + .5*sum(obj.aux.sample.kappa(k,:)) + obj.aux.l2pi;
+end
+obj.aux.rho_max = max(obj.aux.rho, [], 2);
+obj.aux.rho = bsxfun(@minus, obj.aux.rho, obj.aux.rho_max);
+
+% DCM parameters
+obj.aux.nProp.theta = 0;
+obj.aux.nAccept.theta = zeros(obj.N, 1);
+obj.aux.lpr.theta_c = log(exp(obj.aux.rho)*obj.aux.sample.pi(:)) ...
+         + obj.aux.rho_max;
+dtheta_h = bsxfun(@minus, obj.aux.sample.theta_h, obj.prior.mu_h);
+obj.aux.lpr.theta_h = -.5*sum((dtheta_h*obj.prior.Pi_h).*dtheta_h, 2);
+
+obj.aux.lvarBold = zeros(obj.N, obj.R); % log-variance of BOLD response
+obj.aux.q_r = zeros(obj.N, 1); % number of scans
+obj.aux.epsilon = cell(obj.N, 1);
+for n = 1:obj.N
+    % number of scans
+    obj.aux.q_r(n) = size(obj.data(n).bold, 1);
+    % log-variance of BOLD response (per subject and region)
+    obj.aux.lvarBold(n,:) = max(log(var(obj.data(n).bold)), ...
+        obj.const.minLogVar);
+    theta = [obj.aux.sample.theta_c(n,:), obj.aux.sample.theta_h(n,:)];
+    epsilon = obj.bold_gen( theta, obj.data(n), obj.inputs(n), ...
+        obj.options.hemo, obj.R, obj.L, obj.idx );
+    assert(~(any(isnan(epsilon(:))) || any(isinf(epsilon(:)))), ...
+        'TAPAS:HUGE:Init:Stability',...
+        'Starting values for subject %u lead to instable DCM.', n);
+    obj.aux.epsilon{n} = epsilon;
+end
+
+% Noise precision (hyperparameters)
+obj.aux.nProp.lambda = 0;
+obj.aux.nAccept.lambda = zeros(obj.N,1);
+dLambda = bsxfun(@minus, obj.aux.sample.lambda, obj.prior.lambda_0);
+obj.aux.lpr.lambda = -.5*dLambda.^2*obj.prior.omega_0(:);
+
+%%% TODO add support for confounds
+
+% log-likelihood
+for n = 1:obj.N
+    obj.aux.lpr.llh(n) = ...
+        -.5*sum(obj.aux.epsilon{n}.^2*exp(obj.aux.sample.lambda(n,:) ...
+            - obj.aux.lvarBold(n,:))') ...
+        +.5*obj.aux.q_r(n)*sum(obj.aux.sample.lambda(n,:)...
+            - obj.aux.lvarBold(n,:));
+end
+
+% special proposals
+obj.aux.nProp.sp = [0;0];
+obj.aux.nAccept.sp = [0;0];
+
+end
+
diff --git a/huge/@tapas_Huge/mh_invert.m b/huge/@tapas_Huge/mh_invert.m
new file mode 100644
index 00000000..51513005
--- /dev/null
+++ b/huge/@tapas_Huge/mh_invert.m
@@ -0,0 +1,651 @@
+function [ obj ] = mh_invert( obj )
+% Metropolized Gibbs sampling on collapsed HUGE model.
+% 
+% This is a protected method of the tapas_Huge class. It cannot be called
+% from outside the class.
+% 
+% 
+
+obj = obj.mh_init( );
+% number of iterations and burn-in
+nIt = obj.options.nvp.numberofiterations;
+if isempty(nIt)
+    nIt = 2e5;
+end
+nBi = obj.options.nvp.burnin;
+if isempty(nBi) || nBi >= nIt
+    nBi = fix(nIt/2);
+end
+
+% inverse chain temperature
+invTemp = obj.options.nvp.inversetemperature;
+
+% reserve memory
+q_nk = zeros(obj.N, obj.K);
+obj.trace = struct();
+obj.trace.smp = repmat(obj.aux.sample, nIt, 1);
+obj.trace.lpr = repmat(obj.aux.lpr, nIt, 1);
+psrf = repmat(obj.aux.sample, fix(nIt/obj.const.nPsrf) + 1, 1);
+
+%% ===== MAIN LOOP =====
+for iIt = 1:nIt
+    
+    % subject level
+    for iMhDcm = 1:obj.options.mh.nSteps.dcm
+        % sample DCM (parameters)
+        obj = mh_sample_dcm( obj, invTemp );
+        % sample lambda (hyperparameters)
+        obj = mh_sample_noise( obj, invTemp );  
+    end
+
+    % group level
+    if mod(iIt, obj.options.mh.nSteps.knKm) == 0
+        k = mod(iIt/obj.options.mh.nSteps.knKm - 1,obj.K) + 1;
+        obj = mh_sample_kmhop( obj, k );
+    else
+        % sample weights pi
+        if obj.K > 1
+            for iMh = 1:obj.options.mh.nSteps.weights
+                obj = mh_sample_weights( obj );
+            end
+        end
+        % sample cluster mu and Sigma
+        for iMh = 1:obj.options.mh.nSteps.clusters
+            obj = mh_sample_cluster( obj );
+        end
+    end
+    
+    % save sample
+    obj.trace.smp(iIt) = obj.aux.sample;
+    obj.trace.lpr(iIt) = obj.aux.lpr;
+
+    % adapt proposal step size
+    if (iIt <= nBi/3) && (mod(iIt, obj.const.mhAdapt(1)) == 0)
+        obj = mh_adapt(obj, iIt);
+    end
+    
+    % accumulate cluster assigment estimate
+    if iIt > nBi
+        tmp = bsxfun(@times, exp(obj.aux.rho), obj.aux.sample.pi);
+        q_nk = q_nk + bsxfun(@rdivide, tmp, sum(tmp, 2));
+    end
+    % convergence monitoring
+    if mod(iIt, obj.const.nPsrf) == 0
+        iPsrf = iIt/obj.const.nPsrf;
+        psrf(iPsrf) = mh_psrf(obj.trace.smp(1:iIt), 4);
+    end
+    if obj.options.nvp.verbose && mod(iIt, 100) == 1
+        fprintf('Iteration %u\n', iIt);
+    end
+    
+% -------------------------------------
+end%         END MAIN LOOP
+% -------------------------------------
+%%          Post Processing
+% estimates for assignment probability
+q_nk = q_nk/(nIt - nBi);
+
+% acceptance ratio
+ratio = struct();
+obj.aux.nProp.sp(1) = obj.aux.nProp.sp(1)*obj.N;
+for parameter = {'pi', 'mu', 'kappa', 'theta', 'lambda', 'sp'}
+    ratio.(parameter{1}) = obj.aux.nAccept.(parameter{1})./...
+        obj.aux.nProp.(parameter{1});
+end
+% posterior mean and quantiles
+postMean    = struct();
+postVar     = struct();
+postQuant   = struct();
+for parameter = {'pi', 'mu', 'kappa', 'theta_c', 'theta_h', 'lambda'}
+    tmp = reshape([obj.trace.smp(nBi+1:end).(parameter{1})], ...
+        [size(obj.aux.sample.(parameter{1})), nIt - nBi]);
+    postMean.(parameter{1})  = mean(tmp, 3);
+    postVar.(parameter{1})   = var(tmp, 0, 3);
+    postQuant.(parameter{1}) = quantile(tmp, obj.options.quantiles, 3);
+end
+% cumulative probability levels for quantiles
+postQuant.levels = obj.options.quantiles;
+
+% calculate PSRF post burn-in
+psrf(end) = mh_psrf(obj.trace.smp(nBi + 1:end), 4);
+
+% collect posterior summaries
+obj.posterior = struct('nIt', nIt, 'nBi', nBi, 'q_nk', q_nk, ...
+    'ratio', ratio, 'psrf', psrf, 'mean', postMean, 'variance', postVar, ...
+    'quantile', postQuant, 'lvarBold', obj.aux.lvarBold, ...
+    'nrv', exp(-postMean.lambda));
+
+% thin MC chain
+if ~isempty(obj.options.nTrace)
+    % keep only nTrace samples from post-burn-in phase
+    nTrace  = min(nIt - nBi, obj.options.nTrace);
+    nThin   = fix((nIt - nBi)/nTrace);
+    % select samples uniformly
+    obj.trace.smp = obj.trace.smp(end-nThin*(nTrace - 1):nThin:end);
+    obj.trace.lpr = obj.trace.lpr(end-nThin*(nTrace - 1):nThin:end);
+end
+
+end
+
+%---------------------------------
+%            SAMPLING
+%---------------------------------
+%% SAMPLING: weights (pi)
+function [ obj ] = mh_sample_weights( obj )
+
+% propose (in unconstrained space) 
+prop_piu = obj.aux.sample.pi_u + ...
+    randn(1,obj.K-1)*obj.aux.step.pi;
+% transform to unit simplex
+prop_pis = 1./(1 + exp(log(obj.K-1:-1:1) - prop_piu));
+prop_pic = cumprod(1-prop_pis);
+prop_pi = [prop_pis(1),-diff(prop_pic),prop_pic(end)];
+
+% evaluate joint (in unconstrained space)
+% log-prior on pi
+prop_lpr = log(prop_pi)*(obj.prior.alpha_0 - 1) + ...
+   sum(log(prop_pis)) + sum(log(1-prop_pis)) + ...
+   sum(log(prop_pic(1:end-1)));
+prop_lpr = max(prop_lpr, -realmax);
+% log-conditional of theta_c given pi
+prop_lcd = log(exp(obj.aux.rho)*prop_pi') + obj.aux.rho_max;
+
+% accept/reject
+obj.aux.nProp.pi = obj.aux.nProp.pi + 1;
+a = exp(prop_lpr - obj.aux.lpr.pi...
+        + sum(prop_lcd - obj.aux.lpr.theta_c));
+%         a = exp(prop_lpr - obj.aux.lpr.pi);
+if ~isnan(a) && ~isinf(a) && rand()<a
+    obj.aux.sample.pi_u = prop_piu;
+    obj.aux.sample.pi = prop_pi;
+    obj.aux.lpr.pi = prop_lpr;
+    obj.aux.lpr.theta_c = prop_lcd;
+    obj.aux.nAccept.pi = obj.aux.nAccept.pi + 1;
+end
+
+end
+
+
+%% SAMPLING: cluster parameters (mu, kappa = - log(sigma^2))
+function [ obj ] = mh_sample_cluster( obj )
+
+obj.aux.nProp.mu = obj.aux.nProp.mu + 1;
+obj.aux.nProp.kappa = obj.aux.nProp.mu;
+
+for k = 1:obj.K
+
+    prop_kappa = obj.aux.sample.kappa(k,:);
+    % mean
+    tmp = randn(1,obj.idx.P_c)*obj.aux.step.mu(k);
+    prop_mu = obj.aux.sample.mu(k,:) + tmp*...
+        obj.aux.transform.mu(:,:,k);
+    dlq = 0; % delta log-proposal density
+
+    % evaluate log-conditional
+    % prior
+    prop_dmu_k = prop_mu - obj.prior.m_0;
+    prop_lpr_mu = -prop_dmu_k*obj.prior.T_0*prop_dmu_k'/2;
+
+    % log-conditional theta_c given mu and kappa
+    prop_dtheta_c = bsxfun(@minus, obj.aux.sample.theta_c, prop_mu);
+    tmp = obj.aux.l2pi - .5*prop_dtheta_c.^2*exp(prop_kappa') ...
+        + .5*sum(prop_kappa);
+    if obj.K > 1
+        prop_rho = bsxfun(@plus, obj.aux.rho, obj.aux.rho_max);
+        prop_rho(:,k) = tmp;
+        prop_rho_max = max(prop_rho, [], 2);
+        prop_rho = bsxfun(@minus, prop_rho, prop_rho_max);
+        prop_lcd = log(exp(prop_rho)*obj.aux.sample.pi') ...
+            + prop_rho_max; % log-sum-exp
+    else
+        prop_rho_max = tmp;
+        prop_lcd = prop_rho_max;
+        prop_rho = zeros(obj.N, 1);
+    end
+
+    % accept/reject
+    a = exp(sum(prop_lcd - obj.aux.lpr.theta_c) ...
+          + prop_lpr_mu - obj.aux.lpr.mu(k) + dlq);
+%             a = exp(prop_lpr_mu - obj.aux.lpr.mu(k));
+    if ~isnan(a) && ~isinf(a) && rand()<a
+        obj.aux.nAccept.mu(k) = obj.aux.nAccept.mu(k) + 1;
+        obj.aux.sample.mu(k,:) = prop_mu;
+        obj.aux.rho = prop_rho;
+        obj.aux.rho_max = prop_rho_max;
+        obj.aux.lpr.theta_c = prop_lcd;
+        obj.aux.lpr.mu(k) = prop_lpr_mu;
+    end
+
+    % precision
+    prop_kappa = prop_kappa + randn(1,obj.idx.P_c)*obj.aux.step.kappa(k);
+
+    % evaluate log-conditional
+    % prior
+    prop_dkappa = prop_kappa - obj.prior.s_0;
+    prop_lpr_kappa = -.5*prop_dkappa.^2*obj.prior.nu_0;
+    % log-conditional theta_c given mu and kappa
+    prop_dtheta_c = bsxfun(@minus, obj.aux.sample.theta_c, ...
+        obj.aux.sample.mu(k,:));
+    tmp = obj.aux.l2pi - .5*prop_dtheta_c.^2*exp(prop_kappa') ...
+        + .5*sum(prop_kappa);
+    if obj.K > 1
+        prop_rho = bsxfun(@plus, obj.aux.rho, obj.aux.rho_max);
+        prop_rho(:,k) = tmp;
+        prop_rho_max = max(prop_rho, [], 2);
+        prop_rho = bsxfun(@minus, prop_rho, prop_rho_max);
+        prop_lcd = log(exp(prop_rho)*obj.aux.sample.pi') ...
+            + prop_rho_max;
+    else
+        prop_rho_max = tmp;
+        prop_lcd = prop_rho_max;
+        prop_rho = zeros(obj.N, 1);
+    end
+
+    % accept/reject
+    a = exp(sum(prop_lcd - obj.aux.lpr.theta_c) ...
+          + prop_lpr_kappa - obj.aux.lpr.kappa(k));
+%             a = exp(prop_lpr_kappa - obj.aux.lpr.kappa(k));
+    if ~isnan(a) && ~isinf(a) && rand()<a
+        obj.aux.nAccept.kappa(k) = obj.aux.nAccept.kappa(k) + 1;
+        obj.aux.sample.kappa(k,:) = prop_kappa;
+        obj.aux.rho = prop_rho;
+        obj.aux.rho_max = prop_rho_max;
+        obj.aux.lpr.theta_c = prop_lcd;
+        obj.aux.lpr.kappa(k) = prop_lpr_kappa;
+    end
+
+end
+
+end
+
+
+%% SAMPLING: DCM parameter (theta)
+function [ obj ] = mh_sample_dcm( obj, invTemp )
+nProp = obj.aux.nProp.theta + obj.aux.nProp.sp(1) + 1;
+bGmm = mod(nProp, obj.options.mh.nSteps.knGmm) == 0;
+if bGmm
+    obj.aux.nProp.sp(1) = obj.aux.nProp.sp(1) + 1;
+else
+    obj.aux.nProp.theta = obj.aux.nProp.theta + 1;
+end
+prec = -.5*exp(obj.aux.sample.kappa);
+
+for n = 1:obj.N
+
+    % propose
+    if bGmm % sample from GMM        
+        k = randsample(obj.K, 1, true, obj.aux.sample.pi);
+        prop_theta_c = obj.aux.sample.mu(k,:) + ...
+            randn(1, obj.idx.P_c).*exp(-.5*obj.aux.sample.kappa(k,:));
+        prop_theta_h = obj.prior.mu_h + randn(1,obj.idx.P_h)./...
+            sqrt(diag(obj.prior.Pi_h))'; % assume diagonal precision
+    else % sample from Gaussian kernel
+%             prop_theta_c = obj.aux.sample.theta_c(n,:) + ...
+%                 randn(1, obj.idx.P_c).*obj.aux.step.theta_c(n,:);
+        tmp = (randn(1, obj.idx.P_c + obj.idx.P_h).*obj.aux.step.theta(n,:))* ...
+            obj.aux.transform.theta;
+        prop_theta_c = obj.aux.sample.theta_c(n,:) + tmp(1:obj.idx.P_c);
+        prop_theta_h = obj.aux.sample.theta_h(n,:) + tmp(obj.idx.P_c+1:end);
+    end
+
+    % evaluate joint
+    % log-prior
+    prop_dtheta_c = bsxfun(@minus, prop_theta_c, obj.aux.sample.mu);
+    prop_rho = obj.aux.l2pi + sum(prec.*prop_dtheta_c.^2, 2)' ...
+        + 0.5*sum(obj.aux.sample.kappa, 2)';
+    prop_rho_max = max(prop_rho);
+    prop_rho = prop_rho - prop_rho_max;
+    prop_lpr_c = log(exp(prop_rho)*obj.aux.sample.pi(:)) + prop_rho_max;            
+    prop_dtheta_h = prop_theta_h - obj.prior.mu_h;
+    prop_lpr_h = -.5*prop_dtheta_h*obj.prior.Pi_h*prop_dtheta_h';
+    % log-likelihood
+    if invTemp
+        prop_epsilon = obj.bold_gen( [prop_theta_c, prop_theta_h], ...
+            obj.data(n), obj.inputs(n), obj.options.hemo, obj.R, ...
+            obj.L, obj.idx );
+        tmp = obj.aux.sample.lambda(n,:) - obj.aux.lvarBold(n,:);
+        prop_llh = -.5*sum(prop_epsilon.^2*exp(tmp)') ...
+           +.5*obj.aux.q_r(n)*sum(tmp);
+    else % skip calculation of BOLD signal if inverse temp is zero
+        prop_llh = 0;
+        prop_epsilon = 0;
+    end
+
+    % accept/reject
+    a = exp(~bGmm*(prop_lpr_c - obj.aux.lpr.theta_c(n) ...
+        + prop_lpr_h - obj.aux.lpr.theta_h(n)) ...
+        + invTemp*(prop_llh - obj.aux.lpr.llh(n)));
+
+    if ~isnan(a) && ~isinf(a) && rand()<a
+        if bGmm
+            obj.aux.nAccept.sp(1) = obj.aux.nAccept.sp(1) + 1;
+        else
+            obj.aux.nAccept.theta(n) = obj.aux.nAccept.theta(n) + 1;
+        end
+        obj.aux.sample.theta_c(n,:) = prop_theta_c;
+        obj.aux.sample.theta_h(n,:) = prop_theta_h;
+        obj.aux.lpr.theta_c(n) = prop_lpr_c;
+        obj.aux.lpr.theta_h(n) = prop_lpr_h;
+        obj.aux.rho(n,:) = prop_rho;
+        obj.aux.rho_max(n) = prop_rho_max;
+        obj.aux.epsilon{n} = prop_epsilon;
+        obj.aux.lpr.llh(n) = prop_llh;
+    end
+
+end
+end
+
+
+%% SAMPLING: Signal-to-noise (lambda)
+function [ obj ] = mh_sample_noise( obj, invTemp )
+
+obj.aux.nProp.lambda = obj.aux.nProp.lambda + 1;
+for n = 1:obj.N
+
+    % propose in unconstrained space
+    prop_lambda = obj.aux.sample.lambda(n,:) + ...
+        randn(1, obj.R).*obj.aux.step.lambda(n);
+
+    % evaluate log-conditional
+    % prior
+    prop_dlambda = prop_lambda - obj.prior.lambda_0;
+    prop_lpr = -.5*prop_dlambda.^2*obj.prior.omega_0;
+    % likelihood
+    tmp = prop_lambda - obj.aux.lvarBold(n,:);
+    prop_llh = -.5*sum(obj.aux.epsilon{n}.^2*exp(tmp)') ...            
+        +.5*obj.aux.q_r(n)*sum(tmp);
+
+    % accept/reject
+    a = exp(prop_lpr - obj.aux.lpr.lambda(n) ...
+            + invTemp*(prop_llh - obj.aux.lpr.llh(n)));
+
+    if ~isnan(a) && ~isinf(a) && rand()<a
+        obj.aux.nAccept.lambda(n) = obj.aux.nAccept.lambda(n) + 1;
+        obj.aux.sample.lambda(n,:) = prop_lambda;
+        obj.aux.lpr.lambda(n) = prop_lpr;
+        obj.aux.lpr.llh(n) = prop_llh;
+    end
+end
+end
+
+
+%% SAMPLING: k-means-based mode hopping
+% A special proposal for clustering part of HUGE model (pi, mu and kappa)
+function [ obj ] = mh_sample_kmhop(obj, k)
+% track acceptance rate
+obj.aux.nProp.sp(2) = obj.aux.nProp.sp(2) + 1;
+
+% --- do kmeans for k = 1,...,K, and define q(...|k) ---
+% set up prior
+tmpm = exp(obj.prior.s_0+1./2./obj.prior.nu_0');
+tmpv = (exp(1./obj.prior.nu_0')-1).*exp(2*obj.prior.s_0+1./obj.prior.nu_0');
+b0 = tmpm./tmpv;
+a0 = tmpm.^2./tmpv;
+
+% get data
+X = obj.aux.sample.theta_c;
+
+% reserve memory
+qmm = repmat(obj.prior.m_0,obj.K,1,obj.K);
+qms = repmat(diag(obj.prior.T_0)',obj.K,1,obj.K);
+qkm = repmat(obj.prior.s_0,obj.K,1,obj.K);
+qks = repmat(obj.prior.nu_0',obj.K,1,obj.K);
+qpm = zeros(obj.K,obj.K);
+for k1 = 1:obj.K
+    % do kmeans
+    idx = kmeans(X,k1,'Replicates',100);
+    % assemble parameters for q
+    for k2 = 1:obj.K
+        nk = nnz(idx(:)==k2);
+        if nk > 0
+            mk = mean(X(idx(:)==k2,:),1);
+            ak = a0 + (nk + 1)/2;
+            bk = b0 + 0.5.*sum(bsxfun(@minus,X(idx(:)==k2,:),mk).^2,1);
+            tmpm = ak./bk;
+            tmpv = ak./bk.^2;
+            % q(kappa)
+            qks(k2,:,k1) = 1./log(tmpv./tmpm.^2 + 1);
+            qkm(k2,:,k1) = log(tmpm) - 1./2./qks(k2,:,k1);
+            % q(mu)
+            qms(k2,:,k1) = diag(obj.prior.T_0)' + nk.*exp(obj.prior.s_0);
+            qmm(k2,:,k1) = (diag(obj.prior.T_0)'.*obj.prior.m_0 + ...
+                nk.*exp(obj.prior.s_0).*mk)./qms(k2,:,k1);
+        end
+        qpm(k1,k2) = nk + obj.prior.alpha_0(k2);
+    end
+end
+
+% --- generate proposal for k in 1,...,K, and eval p ---
+k1 = k;
+
+prop_lpr_mu = zeros(1,obj.K);
+prop_lpr_kappa = zeros(1,obj.K);
+prop_rho = obj.aux.rho;
+
+prop_pi = zeros(1,obj.K);
+prop_kappa = zeros(obj.K,obj.idx.P_c);
+prop_mu = zeros(obj.K,obj.idx.P_c);
+for k2 = 1:obj.K
+    % draw kappa*
+    prop_kappa(k2,:) = qkm(k2,:,k1) + randn(1,obj.idx.P_c)./sqrt(qks(k2,:,k1));
+    % draw mu*
+    prop_mu(k2,:) = qmm(k2,:,k1) + randn(1,obj.idx.P_c)./sqrt(qms(k2,:,k1));
+    % draw pi*
+    prop_pi(1,k2) = gamrnd(qpm(k1,k2),1);
+    
+    % eval log p(mu*)
+    prop_dmu_k = prop_mu(k2,:) - obj.prior.m_0;
+    prop_lpr_mu(k2) = -prop_dmu_k*obj.prior.T_0*prop_dmu_k'/2;
+    % eval log p(kappa*)
+    prop_dkappa = prop_kappa(k2,:) - obj.prior.s_0;
+    prop_lpr_kappa(k2) = -.5*prop_dkappa.^2*obj.prior.nu_0;
+    
+    % log-conditional theta_c given mu and kappa
+    prop_dtheta_c = bsxfun(@minus, obj.aux.sample.theta_c, prop_mu(k2,:));
+    prop_rho(:,k2) = obj.aux.l2pi - .5*prop_dtheta_c.^2*exp(prop_kappa(k2,:)') ...
+        + .5*sum(prop_kappa(k2,:));
+
+end
+prop_pi = prop_pi./sum(prop_pi);
+
+% eval log p(pi*)
+prop_pis = [prop_pi(1),prop_pi(2:end-1)./(1 - cumsum(prop_pi(1:end-2)))];
+prop_pic = cumprod(1-prop_pis);
+prop_piu = tapas_huge_logit(prop_pis) - log(1./(obj.K-1:-1:1));
+% log-prior on pi
+prop_lpr_pi = log(prop_pi)*(obj.prior.alpha_0 - 1) + ...
+   sum(log(prop_pis)) + sum(log(1-prop_pis)) + ...
+   sum(log(prop_pic(1:end-1)));
+prop_lpr_pi = max(prop_lpr_pi, -realmax);
+
+% eval log p(theta|pi*,mu*,kappa*)
+prop_rho_max = max(prop_rho, [], 2);
+prop_rho = bsxfun(@minus, prop_rho, prop_rho_max);
+prop_lcd = log(exp(prop_rho)*prop_pi') + prop_rho_max; % log-sum-exp
+
+% --- iterate over all permutations and eval q ---
+% eval log q(pi*,kappa*,mu*)
+[prop_lq_pi,prop_lq_kappa,prop_lq_mu] = eval_lq_perm(prop_pi,...
+    prop_kappa,prop_mu,qpm,qkm,qks,qmm,qms,obj.K);
+
+% eval log q(pi,kappa,mu)
+[smp_lq_pi,smp_lq_kappa,smp_lq_mu] = eval_lq_perm(obj.aux.sample.pi,...
+    obj.aux.sample.kappa,obj.aux.sample.mu,qpm,qkm,qks,qmm,qms,obj.K);
+
+m_prop_lq_mu = logmeanexp(prop_lq_mu);
+m_prop_lq_kappa = logmeanexp(prop_lq_kappa);
+m_prop_lq_pi = logmeanexp(prop_lq_pi);
+
+m_smp_lq_mu = logmeanexp(smp_lq_mu);
+m_smp_lq_kappa = logmeanexp(smp_lq_kappa);
+m_smp_lq_pi = logmeanexp(smp_lq_pi);
+
+% --- eval MH acceptance ratio ---
+a = [sum(prop_lcd - obj.aux.lpr.theta_c) ...
+    ;sum(prop_lpr_kappa - obj.aux.lpr.kappa) ...
+    ;sum(prop_lpr_mu - obj.aux.lpr.mu) ...
+    ;prop_lpr_pi - obj.aux.lpr.pi ...
+    ;sum(m_smp_lq_kappa - m_prop_lq_kappa) ...
+    ;sum(m_smp_lq_mu - m_prop_lq_mu) ...
+    ;m_smp_lq_pi - m_prop_lq_pi ...
+];
+% figure(1);clf;stem(a);
+a = min(1,exp(sum(a)));
+
+% accept/reject
+if ~isnan(a) && ~isinf(a) && rand()<a
+    obj.aux.nAccept.sp(2) = obj.aux.nAccept.sp(2) + 1;
+    obj.aux.sample.pi_u = prop_piu;
+    obj.aux.sample.pi = prop_pi;
+    obj.aux.sample.mu = prop_mu;
+    obj.aux.sample.kappa = prop_kappa;
+    obj.aux.lpr.pi = prop_lpr_pi;
+    obj.aux.lpr.mu = prop_lpr_mu;
+    obj.aux.lpr.kappa = prop_lpr_kappa;
+    obj.aux.lpr.theta_c = prop_lcd;
+    obj.aux.rho = prop_rho;
+    obj.aux.rho_max = prop_rho_max;
+end
+
+end
+
+% evaluate log q(pi*,kappa*,mu*) for all permutations
+function [lq_pi,lq_kappa,lq_mu] = eval_lq_perm(p_pi,p_kappa,p_mu,qpm,qkm,qks,qmm,qms,K)
+pidx = perms(1:K);
+
+lq_mu = zeros(size(pidx));
+lq_kappa = zeros(size(pidx));
+lq_pi = zeros(size(pidx));
+
+x = p_pi;
+z = [x(1),x(2:end-1)./(1 - cumsum(x(1:end-2)))];
+c = cumprod(1-z);
+
+for k1 = 1:K
+    m_pi = zeros(K,K);
+    m_mu = zeros(size(m_pi));
+    m_kappa = zeros(size(m_pi));
+    % mesh log q(pi)
+    tmp = gammaln(sum(qpm(k1,:))) - sum(gammaln(qpm(k1,:))) + ... % const
+    sum(log(z)) + sum(log(1-z)) + ... % jacobian
+    sum(log(c(1:end-1))); % jacobian
+    m_pi(:,:) = (qpm(k1,:)' - 1)*log(x(:)') + tmp/K; % x
+
+    % mesh log q(mu,kappa)
+    for k2 = 1:K
+        for k3 = 1:K
+            % log p(mu*)
+            m_mu(k2,k3) = -.5*(p_mu(k3,:) - qmm(k2,:,k1)).^2*qms(k2,:,k1)' -.5*log(2*pi) +.5*sum(log(qms(k2,:,k1)));
+            % log p(kappa*)
+            m_kappa(k2,k3) = -.5*(p_kappa(k3,:) - qkm(k2,:,k1)).^2*qks(k2,:,k1)' -.5*log(2*pi) +.5*sum(log(qks(k2,:,k1)));
+        end
+    end
+    
+    linInd = sub2ind([K,K], repmat(1:K,size(pidx,1),1), pidx);
+    lq_mu(:,k1) = sum(m_mu(linInd),2);
+    lq_kappa(:,k1) = sum(m_kappa(linInd),2);
+    lq_pi(:,k1) = sum(m_pi(linInd),2);
+
+end
+end
+
+% log(mean(exp(log_probability)))
+function lmep = logmeanexp(lpr)
+tmp = max(lpr(:));
+lmep = log(mean(exp(lpr(:) - tmp))) + tmp;
+end
+
+
+%---------------------------------
+%              MISC
+%---------------------------------
+%% adapt proposal distribution
+function [ obj ] = mh_adapt(obj, iIt)
+
+% adapt step size for:
+for parameter = {'pi', 'mu', 'kappa', 'theta', 'lambda'}
+    % current acceptance rate
+    ratio = obj.aux.nAccept.(parameter{1})./obj.aux.nProp.(parameter{1});
+    % correction factor
+    tmp = ratio - obj.const.mhRate;
+    tmp = exp(sign(tmp).*abs(tmp).^3.*obj.const.mhReg);
+    % adapt step size
+    obj.aux.step.(parameter{1}) = obj.aux.step.(parameter{1}).*tmp;
+    % reset counter
+    obj.aux.nProp.(parameter{1}) = 0;
+    obj.aux.nAccept.(parameter{1}) = ...
+        zeros(size(obj.aux.nAccept.(parameter{1})));
+
+end
+
+step = fix(iIt/obj.const.mhTrans);
+idx = iIt:-step:1;
+
+% match proposal distribution to posterior covariance for ...
+% ... cluster means
+mu = reshape([obj.trace.smp(idx).mu], obj.K, obj.idx.P_c, []);
+for k = 1:obj.K
+    % calculate SVD on empirical covariance of posterior samples
+    tmp = cov(permute(mu(k,:,:), [3 2 1]));
+    [rotation, scales] = svd(tmp);
+    scales = sqrt(diag(scales));
+    % limit smallest proposal step size to 5% of maximum step size
+    scales(scales < max(scales)*.05) = max(scales)*.05;
+    if max(scales) > 0
+        obj.aux.transform.mu(:,:,k) = bsxfun(@times, rotation, scales')';
+        tmp = sum(log(scales));
+        obj.aux.step.mu(k) = obj.aux.step.mu(k)*exp(...
+            (obj.aux.logdet.mu(k) - tmp)/obj.idx.P_c);
+        obj.aux.logdet.mu(k) = tmp;        
+    end
+end
+
+% ... subject means
+theta = [reshape([obj.trace.smp(idx).theta_c], obj.N, obj.idx.P_c, []), ...
+         reshape([obj.trace.smp(idx).theta_h], obj.N, obj.idx.P_h, [])];
+pooled = zeros(obj.N, size(theta, 3), obj.idx.P_c + obj.idx.P_h);
+for n = 1:obj.N
+    tmp = permute(theta(n,:,:), [1 3 2]);
+    pooled(n, :, :) = bsxfun(@minus, tmp, mean(tmp, 2));
+end
+pooled = reshape(pooled, [], obj.idx.P_c + obj.idx.P_h);
+% calculate SVD on empirical covariance of posterior samples
+tmp = cov(pooled);
+
+[rotation, scales] = svd(tmp);
+scales = sqrt(diag(scales));
+% limit smallest proposal step size to 5% of maximum step size
+scales(scales < max(scales)*.05) = max(scales)*.05;
+if max(scales) > 0
+    obj.aux.transform.theta = bsxfun(@times, rotation, scales')';
+    tmp = sum(log(scales));
+    tmp = exp((obj.aux.logdet.theta - tmp)/(obj.idx.P_c + obj.idx.P_h));
+    for n = 1:obj.N
+        obj.aux.step.theta(n) = obj.aux.step.theta(n)*tmp;
+    end 
+    obj.aux.logdet.theta = sum(log(scales));  
+end
+
+end
+
+
+%% potential scale reduction factor
+function [ psrf ] = mh_psrf( trace, nChains )
+
+nIt = length(trace);
+psrf = struct();
+for parameter = fieldnames(trace)'
+    smpSize = [size(trace(1).(parameter{1})), nIt];
+    try
+        tmp = permute(reshape([trace(:).(parameter{1})], smpSize), [3 1 2]);
+            
+        psrf.(parameter{1}) = tapas_huge_psrf( tmp, nChains );
+    catch
+        warning('TAPAS:HUGE:convergence', [ 'Potential scale reduction ' ...
+            'factor could not be calculated for %s.'], parameter{1});
+        psrf.(parameter{1}) = NaN(smpSize);
+    end
+end
+
+end
+
diff --git a/huge/@tapas_Huge/optional_inputs.m b/huge/@tapas_Huge/optional_inputs.m
index 4d6e458a..358c531c 100644
--- a/huge/@tapas_Huge/optional_inputs.m
+++ b/huge/@tapas_Huge/optional_inputs.m
@@ -37,6 +37,14 @@
 nvp = tapas_huge_parse_inputs( obj.options.nvp, varargin );
 
 %% check and process inputs
+% length of burn-in period in samples    
+val = nvp.burnin;
+if ~isempty(val)
+    assert(isscalar(val) && isnumeric(val) && val > 0 && mod(val, 1) == 0, ...
+        'TAPAS:HUGE:Nvp:Burnin', ...
+        'Length of burn-in period must be positive integer.');
+end
+
 % switch confounds on/off
 assert(any(strcmpi(nvp.confoundsvariant, {'default', 'none', 'global', ...
     'cluster'})), 'TAPAS:HUGE:Nvp:ConfoundsVariant',  ['Valid values for ' ...
@@ -52,13 +60,31 @@
     nvp.omitfromclustering = [];
 end
 
-  
+% inverse temperature of MCMC
+val = nvp.inversetemperature;
+assert(isnumeric(val) && all(val >=0) && all(val <= 1), ...
+    'TAPAS:HUGE:Nvp:InvTemp', ...
+    'Inverse chain temperature must be between 0 and 1.')
+nvp.inversetemperature = val(1);
+            
 % number of clusters
 obj.K = nvp.k;
 
+% special kernel
+val = nvp.kernelratio;
+assert(isnumeric(val) && all(val >=0), ...
+    'TAPAS:HUGE:Nvp:kernel', ...
+    'Kernel ratio must be numeric and larger or equal zero.')
+if numel(val) > 0
+    obj.options.mh.nSteps.knGmm = fix(val(1));
+end
+if numel(val) > 1
+    obj.options.mh.nSteps.knKm = fix(val(2));
+end
+
 % inversion scheme
-assert(any(strcmpi(nvp.method, {'vb'})), 'TAPAS:HUGE:Nvp:Method', ...
-    'Valid values for Method are: VB.')
+assert(any(strcmpi(nvp.method, {'vb', 'mh'})), 'TAPAS:HUGE:Nvp:Method', ...
+    'Valid value for Method are: VB and MH.')
 nvp.method = upper(nvp.method);
 
 % number of iterations
@@ -107,6 +133,16 @@
         'PriorVarianceRatio must be a positive scalar.');
 end
 
+% number of samples to keep from MCMC
+val = nvp.retainsamples;
+if ischar(val) && strcmpi(val, 'all')
+    obj.options.nTrace = [];
+elseif ~isempty(val)
+    assert(isnumeric(val) && mod(val(1), 1) == 0 && val(1) > 0, ...
+        'TAPAS:HUGE:Nvp:Trace', 'RetainSamples must be positive integer.');
+    obj.options.nTrace = val(1);
+end
+
 % path and filename for saving results
 if ~isempty(nvp.saveto)
     pathStr = fileparts(nvp.saveto);
diff --git a/huge/@tapas_Huge/pair_plot.m b/huge/@tapas_Huge/pair_plot.m
new file mode 100644
index 00000000..015ff6bf
--- /dev/null
+++ b/huge/@tapas_Huge/pair_plot.m
@@ -0,0 +1,114 @@
+function [ fHdls ] = pair_plot( obj, clusters, subjects, pdx, maxSmp )
+% Generate pair plots for cluster and subject-level parameters from MCMC
+% trace.
+% 
+% INPUTS:
+%   obj - A tapas_Huge object containing estimation results.
+% 
+% OPTIONAL INPUTS:
+%   clusters - A vector containing indices of clusters for which to make
+%              pair plots. If empty, plot all clusters.
+%   subjects - A vector containing indices of subjects for which to make
+%              pair plots. If empty, plot all subjects.
+%   pdx      - A vector containing indices of parameters to plot. If empty,
+%              plot all parameters.
+%   maxSmp   - Maximum number of samples to use for plots (default: 1000).
+% 
+% OUTPUTS:
+%   fHdls - Cell array of figure handles.
+%
+% EXAMPLES:
+%   [fHdls] = PAIRS_PLOT(obj, [], 1, 1:3)    Generate pairs plot for the
+%       first three parameters for all clusters and the first subject.
+% 
+% See also tapas_Huge.PLOT
+% 
+
+% Author: Yu Yao (yao@biomed.ee.ethz.ch)
+% Copyright (C) 2020 Translational Neuromodeling Unit
+%                    Institute for Biomedical Engineering,
+%                    University of Zurich and ETH Zurich.
+% 
+% This file is part of TAPAS, which is released under the terms of the GNU
+% General Public Licence (GPL), version 3. For further details, see
+% <https://www.gnu.org/licenses/>.
+% 
+% This software is provided "as is", without warranty of any kind, express
+% or implied, including, but not limited to the warranties of
+% merchantability, fitness for a particular purpose and non-infringement.
+% 
+% This software is intended for research only. Do not use for clinical
+% purpose. Please note that this toolbox is under active development.
+% Considerable changes may occur in future releases. For support please
+% refer to:
+% https://github.com/translationalneuromodeling/tapas/issues
+% 
+
+assert(strcmpi(obj.options.nvp.method,'mh') && ~isempty(obj.trace), ...
+               'TAPAS:HUGE:plot',...
+               'Pair plot is available only for MH inversion method.');
+
+if nargin<2 || isempty(clusters)
+    clusters = 1:obj.K;
+end
+if nargin<3 || isempty(subjects)
+    subjects = 1:obj.N;
+end
+if nargin<4 || isempty(pdx)
+    pdx = 1:obj.idx.P_c+obj.idx.P_h;
+end
+if nargin < 5
+    maxSmp = 1e3;
+end
+
+
+fHdls = cell(0,1);
+nSmp = length(obj.trace.smp);
+if maxSmp > nSmp
+    rdx = randsample(nSmp,maxSmp);
+else
+    rdx = 1:nSmp;
+end
+
+tickLabels = obj.parse_labels( obj.dcm, obj.labels, obj.idx );
+
+%% cluster parameter
+mdx = pdx(pdx<=obj.idx.P_c);
+X = reshape([obj.trace.smp(rdx).mu],[size(obj.trace.smp(1).mu),numel(rdx)]);
+for k = clusters
+    fHdls{end+1,1} = plot_pairs(squeeze(X(k,mdx,:))',tickLabels(mdx));
+    title(fHdls{end}.Children(end), ['cluster ' num2str(k)]);
+end
+
+%% subject parameter
+X = cat(2, reshape([obj.trace.smp(rdx).theta_c], ...
+    [size(obj.trace.smp(1).theta_c),numel(rdx)]), ...
+    reshape([obj.trace.smp(rdx).theta_h], ...
+    [size(obj.trace.smp(1).theta_h),numel(rdx)]));
+for n = subjects
+    fHdls{end+1,1} = plot_pairs(squeeze(X(n,pdx,:))',tickLabels(pdx));
+    title(fHdls{end}.Children(end), ['subject ' num2str(n)]);
+end
+
+end
+
+
+function [ fHdl ] = plot_pairs(X, labels)
+fHdl = figure();
+P = size(X,2);
+ax = cell(P);
+for p1 = 1:P
+    ax{p1,p1} = subplot(P,P,(p1-1)*P+p1);
+    histogram(ax{p1,p1}, X(:,p1), 'normalization', 'pdf');
+    for p2 = p1+1:P
+        ax{p1,p2} = subplot(P,P,(p1-1)*P+p2);
+        tmp = scatter(ax{p1,p2}, X(:,p2), X(:,p1), 5, 'filled');        
+        ax{p2,p1} = subplot(P,P,(p2-1)*P+p1);
+        copyobj(tmp,ax{p2,p1});
+    end
+end
+for p = 1:P
+    ylabel(ax{p,1},labels{p});
+    xlabel(ax{P,p},labels{p});
+end
+end
diff --git a/huge/@tapas_Huge/parse_labels.m b/huge/@tapas_Huge/parse_labels.m
index 90d04a36..ca2e7a0a 100644
--- a/huge/@tapas_Huge/parse_labels.m
+++ b/huge/@tapas_Huge/parse_labels.m
@@ -2,7 +2,7 @@
 % Generate labels for axis ticks.
 % 
 % This is a protected method of the tapas_Huge class. It cannot be called
-% from outsite the class.
+% from outside the class.
 % 
 
 % Author: Yu Yao (yao@biomed.ee.ethz.ch)
diff --git a/huge/@tapas_Huge/simulate.m b/huge/@tapas_Huge/simulate.m
index 6904cfa9..82dcdc6a 100644
--- a/huge/@tapas_Huge/simulate.m
+++ b/huge/@tapas_Huge/simulate.m
@@ -146,7 +146,7 @@
 for k = 1:obj.K
     d{k} = repmat(k, sizes(k), 1);
     if ~isfield(clusters{k}.Y, 'y')
-        rSmp = clusters{k}.Y.dt/clusters{k}.U.dt; %%% TODO 16
+        rSmp = clusters{k}.Y.dt/clusters{k}.U.dt;
         clusters{k}.Y.y = zeros(length(clusters{k}.U.u(rSmp:rSmp:end, 1)), ...
             clusters{k}.n);
     end
@@ -188,7 +188,7 @@
         dcm.U = options.inputs{n};
     end
     L = size(dcm.U.u, 2);
-    rSmp = dcm.Y.dt/dcm.U.dt; %%% fix
+    rSmp = dcm.Y.dt/dcm.U.dt;
     
     data = struct( 'bold', zeros(fix(size(dcm.U.u, 1)/rSmp), dcm.n), ...
         'tr', dcm.Y.dt, 'te', 0.04, 'res', []);
diff --git a/huge/@tapas_Huge/tapas_Huge.m b/huge/@tapas_Huge/tapas_Huge.m
index f1d30d6e..1393cf38 100644
--- a/huge/@tapas_Huge/tapas_Huge.m
+++ b/huge/@tapas_Huge/tapas_Huge.m
@@ -5,7 +5,7 @@
 %   (task-based) fMRI data from heterogeneous cohorts. For more details on
 %   the theory behind the HUGE model, see:
 %
-%   Yao Y, Raman SS, Schiek M, Leff A, Fr�ssle S, Stephan KE (2018).
+%   Yao Y, Raman SS, Schiek M, Leff A, Fr�ssle S, Stephan KE (2018).
 %   Variational Bayesian Inversion for Hierarchical Unsupervised Generative
 %   Embedding (HUGE). NeuroImage, 179: 604-619
 %   https://doi.org/10.1016/j.neuroimage.2018.06.073
@@ -113,7 +113,7 @@
             'nPsrf', 1e5, ... % rate for convergence monitoring via PSRF
             'baseSc', -.5) % baseline self-connection
 
-        version = '2020-09'; % Toolbox version
+        version = '2021-04'; % Toolbox version
     end
 
 
@@ -153,7 +153,7 @@
             obj.options = obj.default_options( );
                         
             if nargin == 1
-                %%% TODO build from posterior
+                %%% TODO add support for building object from posterior
             elseif nargin > 1
                 % key value pairs
                 obj = obj.optional_inputs(varargin{:});
@@ -192,6 +192,9 @@
         % plot posterior
         [ fHdl ] = plot( obj, subjects )
         
+        % plot posterior
+        [ fHdl ] = pair_plot( obj, clusters, subjects, pdx, maxSmp )
+        
         % save object properties to disk
         [ ] = save( filename, obj, varargin )
         
@@ -222,6 +225,11 @@
         % VB initialization
         [ obj ] = vb_init( obj )
         
+        % MH sampling
+        [ obj ] = mh_invert( obj )
+        % MH initialization
+        [ obj ] = mh_init( obj )
+        
     end
     methods (Static, Access = protected)
         
diff --git a/huge/@tapas_Huge/test.m b/huge/@tapas_Huge/test.m
index 026ba4ef..41fcc4de 100644
--- a/huge/@tapas_Huge/test.m
+++ b/huge/@tapas_Huge/test.m
@@ -158,8 +158,23 @@
 %% test: estimate
 fprintf('Testing model inversion: \n')
 
+% monte carlo
+nSmp = 2048;
+obj = obj.estimate('K', 2,'method','mh','numberofiterations',nSmp);
+assert(obj.K == 2, 'tapas:huge:test', 'estimate K2 mcmc');
+assert(obj.options.nvp.numberofiterations==nSmp, 'tapas:huge:test', 'estimate nIt');
+assert(strcmp(obj.options.nvp.method,'MH'), 'tapas:huge:test', 'estimate mcmc');
+assert(all(size(obj.posterior.q_nk) == [obj.N obj.K]), ...
+    'tapas:huge:test', 'estimate q_nk2 mcmc');
+assert(length(obj.trace.smp)==1000, 'tapas:huge:test', 'estimate mcmc trace');
+
+% variational inference
+obj = tapas_Huge('Tag', 'import');
+obj = obj.import(listDcms);
+
 obj = obj.estimate('K', 1);
 assert(obj.K == 1, 'tapas:huge:test', 'estimate K1');
+assert(strcmp(obj.options.nvp.method,'VB'), 'tapas:huge:test', 'estimate VB');
 assert(all(size(obj.posterior.q_nk) == [obj.N obj.K]), ...
     'tapas:huge:test', 'estimate q_nk1');
 obj = obj.estimate('K', 2);
diff --git a/huge/@tapas_Huge/vb_invert.m b/huge/@tapas_Huge/vb_invert.m
index b88fad1d..a2691bc4 100644
--- a/huge/@tapas_Huge/vb_invert.m
+++ b/huge/@tapas_Huge/vb_invert.m
@@ -60,7 +60,7 @@
             obj = update_confounds( obj );
         end
         % update m_k, tau_k, nu_k, S_k
-        obj = update_clusters( obj ); %%% TODO cluster-specific prior mean and scale
+        obj = update_clusters( obj ); %%% TODO add support for cluster-specific prior mean and scale
     end
 
     % update mu_n, Sigma_n, b_nr
@@ -324,7 +324,7 @@
         Pi_n(1:obj.idx.P_c, 1:obj.idx.P_c);
     Pi_n(obj.idx.P_c+1:end, obj.idx.P_c+1:end) = obj.aux.Pi_h + ...
         Pi_n(obj.idx.P_c+1:end, obj.idx.P_c+1:end);    
-    obj.posterior.Sigma_n(:,:,n) = inv(Pi_n); %%% TODO Pi + diag(delta)
+    obj.posterior.Sigma_n(:,:,n) = inv(Pi_n);
     obj.aux.ldSigma(n) = - tapas_huge_logdet(Pi_n);
     % posterior mean
     tmp = obj.aux.epsilon{n}(:) + obj.aux.G{n}*obj.posterior.mu_n(n,:)';
diff --git a/huge/@tapas_Huge/vb_nfe.m b/huge/@tapas_Huge/vb_nfe.m
index 43dca82a..5c4eb909 100644
--- a/huge/@tapas_Huge/vb_nfe.m
+++ b/huge/@tapas_Huge/vb_nfe.m
@@ -41,7 +41,7 @@
 F_on = F_on -.5*obj.idx.P_c*sum((obj.aux.q_k + obj.prior.tau_0)./...
     obj.posterior.tau + log(obj.posterior.tau));
 
-F_on = F_on + obj.K*obj.idx.P_c*(obj.idx.P_c - 1)/4*log(pi) + ... %%% -> off
+F_on = F_on + obj.K*obj.idx.P_c*(obj.idx.P_c - 1)/4*log(pi) + ...
     sum(sum(gammaln(bsxfun(@minus, obj.posterior.nu, 0:obj.idx.P_c-1)/2)));
 
 F_on = F_on +.5*(q_plus_nu - obj.posterior.nu)'* ...
diff --git a/huge/tapas_huge_manual.pdf b/huge/tapas_huge_manual.pdf
index 31bd407e..30794843 100644
Binary files a/huge/tapas_huge_manual.pdf and b/huge/tapas_huge_manual.pdf differ
diff --git a/huge/tapas_huge_manual.tex b/huge/tapas_huge_manual.tex
index 6f165e84..2d50d821 100644
--- a/huge/tapas_huge_manual.tex
+++ b/huge/tapas_huge_manual.tex
@@ -10,6 +10,7 @@
 \definecolor{lgray}{gray}{0.65}   
 \definecolor{llgray}{gray}{0.85}
 \setlength{\parindent}{0pt}
+
 \usepackage[utf8]{inputenc}
 \usepackage{natbib}
 \usepackage{lmodern}
@@ -27,9 +28,6 @@
 
 \tableofcontents
 
-
-
-
 \section{Introduction}
 
 \begin{par}
@@ -50,15 +48,15 @@ \section{The \texttt{tapas\_Huge} Class}
 \end{par} \vspace{1em}
 \begin{par}
 Instances of the \texttt{tapas\_Huge} class are created by calling the class constructor \texttt{tapas\_Huge()}. The constructor is a function that can be called without arguments, which creates an empty \texttt{tapas\_Huge} object:
-\end{par}  
+\end{par} \vspace{1em}
 \begin{verbatim}obj = tapas_Huge()\end{verbatim}
 \begin{par}
 However, the constructor also accepts optional arguments in the form of name-value pairs. For example, one can add a short description using the \texttt{tag} property
-\end{par}  
+\end{par} \vspace{1em}
 \begin{verbatim}obj = tapas_Huge('Tag','my model')\end{verbatim}
 \begin{par}
 or import data using the \texttt{Dcm} property
-\end{par}  
+\end{par} \vspace{1em}
 \begin{verbatim}obj = tapas_Huge('Dcm',dcms)\end{verbatim}
 \begin{par}
 For more examples, see the demo script \texttt{tapas\_huge\_demo.mlx}.
@@ -69,7 +67,7 @@ \section{Class Properties}
 
 \begin{par}
 Each instance of the \texttt{tapas\_Huge} class stores data, options and results in class properties, which are documented below. Properties can be accessed using dot notation:
-\end{par}  
+\end{par} \vspace{1em}
 \begin{verbatim}value = obj.property\end{verbatim}
 \begin{par}
 where \texttt{obj} is a class instance and \texttt{property} is the name of the property.
@@ -199,7 +197,7 @@ \subsection*{ \texttt{prior}}
 \subsection*{ \texttt{posterior}}
 
 \begin{par}
-A struct storing the estimation results. The fields correspond to the parameters of the posterior distribution (see\cite{yao2018}).
+A struct storing the estimation results. For VB-based inference, the fields correspond to the parameters of the posterior distribution (see\cite{yao2018}).
 \end{par} \vspace{1em}
 \begin{itemize}[align=parleft, labelsep=10ex, leftmargin=15ex]
 \setlength{\itemsep}{-1ex}
@@ -218,6 +216,27 @@ \subsection*{ \texttt{posterior}}
    \item [ \texttt{nfe} ] The negative free energy ($F$).
    \item [ \texttt{mu\_r} ] When using group-level confounds, array containing posterior mean of residual DCM parameters, ($m_r$). Otherwise, empty array.
    \item [ \texttt{Sigma\_r} ] When using group-level confounds, array containing posterior covariance of residual DCM parameters, ($S_r$). Otherwise, empty array.
+\end{itemize}
+\begin{par}
+When using MH sampling, the field contains posterior statistics and quantiles. Note however, that the statistics associated with cluster level parameters should be treated with caution, since they are calcuated without relabeling.
+\end{par} \vspace{1em}
+\begin{itemize}[align=parleft, labelsep=10ex, leftmargin=15ex]
+\setlength{\itemsep}{-1ex}
+   \item [ \texttt{nIt} ] Number of samples (incl. burn-in).
+   \item [ \texttt{nBi} ] Length of burn-in period.
+   \item [ \texttt{q\_nk} ] Estimated subject assignment probability.
+   \item [ \texttt{ratio} ] Acceptance ratios.
+   \item [ \texttt{psrf} ] Potential scale reduction factor (convergence statistics).
+   \item [ \texttt{mean} ] Posterior mean.
+   \item [ \texttt{variance} ] Posterior variance.
+   \item [ \texttt{quantile} ] Posterior quantiles for the probability levels stored in \texttt{obj.options.quantiles}.
+   \item [ \texttt{lvarBold} ] Log-variance of observed BOLD signal.
+\end{itemize}
+\begin{par}
+Independent from the inversion method, \texttt{posterior} always contains the following fields:
+\end{par} \vspace{1em}
+\begin{itemize}[align=parleft, labelsep=10ex, leftmargin=15ex]
+\setlength{\itemsep}{-1ex}
    \item [ \texttt{nrv} ] Normalized residual variance. An array containing the amount of non-explained variance (1 minus variance explained) per subject and per region.
    \item [ \texttt{bPurity} ] When using synthetic data, balanced purity of estimation result.
    \item [ \texttt{method} ] Character array indicating inversion method.
@@ -229,7 +248,7 @@ \subsection*{ \texttt{posterior}}
 \subsection*{ \texttt{trace}}
 
 \begin{par}
-A struct containing convergence diagnostics, with fields:
+If inference method is VB, this is a struct containing convergence diagnostics, with fields:
 \end{par} \vspace{1em}
 \begin{itemize}[align=parleft, labelsep=10ex, leftmargin=15ex]
 \setlength{\itemsep}{-1ex}
@@ -239,12 +258,15 @@ \subsection*{ \texttt{trace}}
    \item [ \texttt{kmeans} ] Struct containing information about the K-means initialization step.
    \item [ \texttt{epsilon} ] Cell array containing the residual (observed minus predicted BOLD time series) for each subject at convergence.
 \end{itemize}
+\begin{par}
+If inference methods is MH, this is a struct containing fields: * [ \texttt{smp} ] struct array containg posterior samples. * [ \texttt{lpr} ] struct array containg log-probabilities for samples in \texttt{smp}.
+\end{par} \vspace{1em}
 
 
 \subsection*{ \texttt{aux}}
 
 \begin{par}
-A struct used for storing auxiliary variables during model estimation.
+A struct used for storing auxiliary variables during model estimation. Will be cleared after inference.
 \end{par} \vspace{1em}
 
 
@@ -289,11 +311,11 @@ \section{Class Methods}
 
 \begin{par}
 The main functionalities of the HUGE toolbox are implemented as methods of the \texttt{tapas\_Huge} class. These methods are documented below. There are two equivalent ways to call class methods:
-\end{par}  
+\end{par} \vspace{1em}
 \begin{verbatim}obj = obj.method( ... )\end{verbatim}
 \begin{par}
 or
-\end{par}  
+\end{par} \vspace{1em}
 \begin{verbatim}obj = method( obj, ... )\end{verbatim}
 \begin{par}
 where \texttt{obj} is an instance of the \texttt{tapas\_Huge} class and \texttt{method} is the class method you want to call.
@@ -302,7 +324,7 @@ \section{Class Methods}
 
 \subsection*{ \texttt{tapas\_Huge.estimate}}
 
-  \begin{verbatim}  Estimate parameters of the HUGE model.
+\begin{verbatim}  Estimate parameters of the HUGE model.
   
   INPUTS:
     obj - A tapas_Huge object containing fMRI time series.
@@ -339,7 +361,7 @@ \subsection*{ \texttt{tapas\_Huge.estimate}}
 
 \subsection*{ \texttt{tapas\_Huge.simulate}}
 
-  \begin{verbatim}  Generate synthetic task-based fMRI time series data, using HUGE as a
+\begin{verbatim}  Generate synthetic task-based fMRI time series data, using HUGE as a
   generative model. 
   
   INPUTS:
@@ -397,7 +419,7 @@ \subsection*{ \texttt{tapas\_Huge.simulate}}
 
 \subsection*{ \texttt{tapas\_Huge.plot}}
 
-  \begin{verbatim}  Plot cluster and subject-level estimation result from HUGE model.
+\begin{verbatim}  Plot cluster and subject-level estimation result from HUGE model.
   
   INPUTS:
     obj - A tapas_Huge object containing estimation results.
@@ -415,9 +437,39 @@ \subsection*{ \texttt{tapas\_Huge.plot}}
 \end{verbatim} \color{black}
     
 
+\subsection*{ \texttt{tapas\_Huge.pair\_plot}}
+
+\begin{verbatim}  Generate pair plots for cluster and subject-level parameters from MCMC
+  trace.
+  
+  INPUTS:
+    obj - A tapas_Huge object containing estimation results.
+  
+  OPTIONAL INPUTS:
+    clusters - A vector containing indices of clusters for which to make
+               pair plots. If empty, plot all clusters.
+    subjects - A vector containing indices of subjects for which to make
+               pair plots. If empty, plot all subjects.
+    pdx      - A vector containing indices of parameters to plot. If empty,
+               plot all parameters.
+    maxSmp   - Maximum number of samples to use for plots (default: 1000).
+  
+  OUTPUTS:
+    fHdls - Cell array of figure handles.
+ 
+  EXAMPLES:
+    [fHdls] = PAIRS_PLOT(obj, [], 1, 1:3)    Generate pairs plot for the
+        first three parameters for all clusters and the first subject.
+  
+  See also tapas_Huge.PLOT
+  
+
+\end{verbatim} \color{black}
+    
+
 \subsection*{ \texttt{tapas\_Huge.save}}
 
-  \begin{verbatim}  Save properties of HUGE object to mat file.
+\begin{verbatim}  Save properties of HUGE object to mat file.
   
   INPUTS:
     filename - File name.
@@ -438,7 +490,7 @@ \subsection*{ \texttt{tapas\_Huge.save}}
 
 \subsection*{ \texttt{tapas\_Huge.import}}
 
-  \begin{verbatim}  Import fMRI time series data into HUGE object.
+\begin{verbatim}  Import fMRI time series data into HUGE object.
  
     WARNING: Importing data into a HUGE object will delete any data and
     results which are already stored in that object.
@@ -484,7 +536,7 @@ \subsection*{ \texttt{tapas\_Huge.import}}
 
 \subsection*{ \texttt{tapas\_Huge.export}}
 
-  \begin{verbatim}  Export results and data from HUGE object to SPM's DCM format.
+\begin{verbatim}  Export results and data from HUGE object to SPM's DCM format.
   
   INPUTS:
     obj - A tapas_Huge object containing data.
@@ -510,7 +562,7 @@ \subsection*{ \texttt{tapas\_Huge.export}}
 
 \subsection*{ \texttt{tapas\_Huge.remove}}
 
-  \begin{verbatim}  Remove data (fMRI time series, confounds, DCM network structure, ... )
+\begin{verbatim}  Remove data (fMRI time series, confounds, DCM network structure, ... )
   and estimation results from HUGE object.
  
   INPUTS:
@@ -550,6 +602,17 @@ \section{Name-Value Pair Arguments}
 \renewcommand{\tabcolsep}{0.2cm}
 \begin{tabular}{|l|p{10cm}|}
 \hline
+\rowcolor{lgray} Name: & \verb+BurnIn+ \\
+\hline
+\rowcolor{llgray} Value: & positive integer (default: half of number of iterations)\\
+\hline
+ \rowcolor{llgray} Description: & Length of burn-in period in samples.
+\\
+\hline
+\end{tabular}
+\vspace{1em}
+\begin{tabular}{|l|p{10cm}|}
+\hline
 \rowcolor{lgray} Name: & \verb+Confounds+ \\
 \hline
 \rowcolor{llgray} Value: & double array\\
@@ -588,6 +651,18 @@ \section{Name-Value Pair Arguments}
 \vspace{1em}
 \begin{tabular}{|l|p{10cm}|}
 \hline
+\rowcolor{lgray} Name: & \verb+InverseTemperature+ \\
+\hline
+\rowcolor{llgray} Value: & double between 0 and 1 (default: 1)\\
+\hline
+ \rowcolor{llgray} Description: & Inverse chain temperature for Monte
+ Carlo based methods.
+\\
+\hline
+\end{tabular}
+\vspace{1em}
+\begin{tabular}{|l|p{10cm}|}
+\hline
 \rowcolor{lgray} Name: & \verb+K+ \\
 \hline
 \rowcolor{llgray} Value: & positive integer (default: 1)\\
@@ -599,11 +674,24 @@ \section{Name-Value Pair Arguments}
 \vspace{1em}
 \begin{tabular}{|l|p{10cm}|}
 \hline
+\rowcolor{lgray} Name: & \verb+KernelRatio+ \\
+\hline
+\rowcolor{llgray} Value: & 2x1 array of positive integer or zero (default: [10 100])\\
+\hline
+ \rowcolor{llgray} Description: & Inverse frequency of selecting special kernel during MCMC inversion. 1st element: GMM kernel, 2nd element: k-means kernel. 10 means: use special kernel for every 10th sample. 0 means: do not use special kernel.
+\\
+\hline
+\end{tabular}
+\vspace{1em}
+\begin{tabular}{|l|p{10cm}|}
+\hline
 \rowcolor{lgray} Name: & \verb+Method+ \\
 \hline
-\rowcolor{llgray} Value: & \verb+'VB'+\\
+\rowcolor{llgray} Value: & \verb+'VB' | 'MH'+\\
 \hline
- \rowcolor{llgray} Description: & Name of inversion method specified as character array. VB:              variational Bayes
+ \rowcolor{llgray} Description: & Name of inversion method specified as
+ character array. VB: variational Bayes, MH: Metropolized Gibbs sampling
+ on collapsed HUGE model.
 \\
 \hline
 \end{tabular}
@@ -693,6 +781,20 @@ \section{Name-Value Pair Arguments}
 \vspace{1em}
 \begin{tabular}{|l|p{10cm}|}
 \hline
+\rowcolor{lgray} Name: & \verb+RetainSamples+ \\
+\hline
+\rowcolor{llgray} Value: & 'all' | positive integer (default: 1000)\\
+\hline
+ \rowcolor{llgray} Description: & Number of samples from the Markkov
+ chain to be kept. 'all' keeps the entire chain including burn-in.
+ Otherwise discards burn-in and retains the specified number of samples
+ by uniformly thinning the post-burn-in samples.
+\\
+\hline
+\end{tabular}
+\vspace{1em}
+\begin{tabular}{|l|p{10cm}|}
+\hline
 \rowcolor{lgray} Name: & \verb+SaveTo+ \\
 \hline
 \rowcolor{llgray} Value: & character array\\
@@ -782,7 +884,7 @@ \section{Other Functions}
 
 \subsection*{ \texttt{tapas\_huge\_boxcar}}
 
-  \begin{verbatim}  Generate a boxcar function for use as experimental stimulus. All
+\begin{verbatim}  Generate a boxcar function for use as experimental stimulus. All
   timing-related arguments must be specified in seconds. 
   
   INPUTS:
@@ -811,7 +913,7 @@ \subsection*{ \texttt{tapas\_huge\_boxcar}}
 
 \subsection*{ \texttt{tapas\_huge\_bold}}
 
-  \begin{verbatim}  Integrates the DCM forward equations to generate the predicted fMRI bold
+\begin{verbatim}  Integrates the DCM forward equations to generate the predicted fMRI bold
   time series.
   
   INPUTS:
@@ -848,7 +950,7 @@ \subsection*{ \texttt{tapas\_huge\_bold}}
 
 \subsection*{ \texttt{tapas\_huge\_bpurity}}
 
-  \begin{verbatim}  Calculate balanced purity (see Brodersen2014 Eq. 13 and 14) for a set of
+\begin{verbatim}  Calculate balanced purity (see Brodersen2014 Eq. 13 and 14) for a set of
   ground truth labels and a set of estimated labels
   
   INPUTS:
@@ -881,7 +983,7 @@ \subsection*{ \texttt{tapas\_huge\_logdet}}
 \begin{par}
 This function is intended for internal use only. Do not call directly
 \end{par} \vspace{1em}
-  \begin{verbatim}  Numerical stable calculation of log-determinant for positive-definite
+\begin{verbatim}  Numerical stable calculation of log-determinant for positive-definite
   matrix.
  
   INPUT:
@@ -903,7 +1005,7 @@ \subsection*{ \texttt{tapas\_huge\_logit}}
 \begin{par}
 This function is intended for internal use only. Do not call directly
 \end{par} \vspace{1em}
-  \begin{verbatim}  Numerical stable calculation of logit function.
+\begin{verbatim}  Numerical stable calculation of logit function.
   
   INPUTS:
     x - Array of double.
@@ -920,7 +1022,7 @@ \subsection*{ \texttt{tapas\_huge\_parse\_inputs}}
 \begin{par}
 This function is intended for internal use only. Do not call directly
 \end{par} \vspace{1em}
-  \begin{verbatim}  Parse name-value pair type arguments into a struct.
+\begin{verbatim}  Parse name-value pair type arguments into a struct.
   
   INPUTS:
     opts - Struct containing all valid names as field names and 
diff --git a/huge/tapas_huge_nfe.m b/huge/tapas_huge_nfe.m
new file mode 100644
index 00000000..5e31be57
--- /dev/null
+++ b/huge/tapas_huge_nfe.m
@@ -0,0 +1,428 @@
+%% [freeEnergyFull, feAux] = tapas_huge_nfe( counts, priors, posterior, feAux, listSubjects )
+% 
+% Calculates or updates negative free energy (NFE) for HUGE.
+% 
+% INPUT:
+%       counts       - number of parameters and subjects
+%       priors       - parameters of prior distribution
+%       posterior    - parameters of approximate posterior distribution.
+%       feAux        - struct containing intermediate values of the NFE.
+%       listSubjects - indices of subjects for which to update the NFE.
+% 
+% OUTPUT:
+%       freeEnergyFull - value of the negative free energy.
+%       feAux          - struct containing intermediate values of the NFE.
+% 
+% REFERENCE:
+%
+% Yao Y, Raman SS, Schiek M, Leff A, Fr�ssle S, Stephan KE (2018).
+% Variational Bayesian Inversion for Hierarchical Unsupervised Generative
+% Embedding (HUGE). NeuroImage, 179: 604-619
+% 
+% https://doi.org/10.1016/j.neuroimage.2018.06.073
+%
+
+% 
+% Author: Yu Yao (yao@biomed.ee.ethz.ch)
+% Copyright (C) 2018 Translational Neuromodeling Unit
+%                    Institute for Biomedical Engineering,
+%                    University of Zurich and ETH Zurich.
+% 
+% This file is part of TAPAS, which is released under the terms of the GNU
+% General Public Licence (GPL), version 3. For further details, see
+% <http://www.gnu.org/licenses/>.
+% 
+% This software is intended for research only. Do not use for clinical
+% purpose. Please note that this toolbox is in an early stage of
+% development. Considerable changes are planned for future releases. For
+% support please refer to:
+% https://github.com/translationalneuromodeling/tapas/issues
+%
+function [freeEnergyFull, feAux] = tapas_huge_nfe(counts,priors,posterior,feAux,listSubjects)
+%
+fepIdx = 0;
+freeEnergyFullPart = zeros(100,1);
+
+
+nParameters = counts.nParameters;
+nDcmParamsInfCon = nParameters(2,1); %%% d_c
+nDcmParamsInfHem = nParameters(2,2); %%% d_h
+nDcmParamsInfAll = nParameters(2,3); %%% d
+
+if nargin < 4
+    feAux.line2 = zeros(counts.nSubjects,counts.nClusters,2);
+    feAux.line4 = zeros(counts.nSubjects,2);
+end
+if nargin < 5
+    listSubjects = 1:counts.nSubjects;
+end
+
+
+
+%% line 1
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -sum(sum(bsxfun(@times,posterior.softAssign,...
+        posterior.logDetClustersSigma.')/2));
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -sum(posterior.softAssign(:))*nDcmParamsInfCon*log(pi)/2;
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +sum(sum(...
+        bsxfun(@times,...
+            posterior.softAssign,...
+            sum(psi(bsxfun(@plus,...
+                posterior.clustersDeg,...
+                1-(1:nDcmParamsInfCon))/2),2).'...
+            )...
+        ))/2;
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -sum(sum(bsxfun(@times,...
+        posterior.softAssign,...
+        nDcmParamsInfCon./posterior.clustersTau.')...
+        ))/2;
+
+
+
+%% line 2
+fepIdx = fepIdx + 1;
+for iSub = listSubjects
+    for iCluster = 1:counts.nClusters
+        feAux.line2(iSub,iCluster,1) = ...
+            -posterior.clustersDeg(iCluster)*...
+                    posterior.softAssign(iSub,iCluster)*(...
+                trace(...
+                    posterior.clustersSigma(:,:,iCluster)\...
+                    posterior.dcmSigma(1:nDcmParamsInfCon,...
+                        1:nDcmParamsInfCon,...
+                        iSub...
+                        )...
+                    )...
+                )/2;
+    end
+end
+freeEnergyFullPart(fepIdx) = sum(sum(feAux.line2(:,:,1)));
+%%%
+
+fepIdx = fepIdx + 1;
+for iSub = listSubjects
+    for iCluster = 1:counts.nClusters
+        feAux.line2(iSub,iCluster,2) = ...
+            -posterior.clustersDeg(iCluster)*...
+                    posterior.softAssign(iSub,iCluster)*(...
+                (posterior.dcmMean(iSub,1:nDcmParamsInfCon)...
+                    -posterior.clustersMean(iCluster,:))*...
+                (posterior.clustersSigma(:,:,iCluster)\...
+                    (posterior.dcmMean(iSub,1:nDcmParamsInfCon)...
+                        -posterior.clustersMean(iCluster,:)).' ...
+                    )...
+                )/2;
+    end
+end
+freeEnergyFullPart(fepIdx) = sum(sum(feAux.line2(:,:,2)));
+%%%
+
+
+%% line 3
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -sum(counts.nMeasurements)*log(2*pi)/2;
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +sum(sum(...
+        bsxfun(@times,...
+            counts.nMeasurementsPerState,...
+            psi(posterior.noiseShape) - log(posterior.noiseInvScale))...
+        ))/2;    
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -posterior.meanNoisePrecision(:).'*posterior.modifiedSumSqrErr(:)/2;
+
+
+%% line 4
+fepIdx = fepIdx + 1;
+for iSub = listSubjects
+    feAux.line4(iSub,1) = ...
+        -trace(priors.hemSigma\posterior.dcmSigma(nDcmParamsInfCon+1:end,...
+                            nDcmParamsInfCon+1:end,...
+                            iSub...
+                            ) ...
+                )/2;
+end
+freeEnergyFullPart(fepIdx) = sum(feAux.line4(:,1));
+
+
+fepIdx = fepIdx + 1;
+for iSub = listSubjects
+    feAux.line4(iSub,2) = ...
+        -(posterior.dcmMean(iSub,nDcmParamsInfCon+1:end)-priors.hemMean)*...
+            (priors.hemSigma\...
+                (posterior.dcmMean(iSub,nDcmParamsInfCon+1:end)...
+                    -priors.hemMean).'...
+                )/2;
+end
+freeEnergyFullPart(fepIdx) = sum(feAux.line4(:,2));
+
+
+%% line 5
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -counts.nSubjects*tapas_huge_logdet(priors.hemSigma)/2;
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -counts.nSubjects*nDcmParamsInfHem*log(2*pi)/2;
+
+
+%% line 6
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -counts.nSubjects*sum(gammaln(priors.noiseShape));
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +counts.nSubjects*sum(priors.noiseShape.*log(priors.noiseInvScale));
+
+
+%% line 7
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +sum(sum(bsxfun(@times, ...
+        priors.noiseShape-1, ...
+        (psi(posterior.noiseShape)-log(posterior.noiseInvScale)) ...
+        )));
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -sum(sum(bsxfun(@times, ...
+        priors.noiseInvScale, ...
+        posterior.noiseShape./posterior.noiseInvScale ...
+        )));
+
+
+%% line 8
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -nDcmParamsInfCon*sum(priors.clustersTau./posterior.clustersTau)/2;
+
+
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = 0;
+for iCluster = 1:counts.nClusters
+    freeEnergyFullPart(fepIdx) = freeEnergyFullPart(fepIdx) + ...
+        -priors.clustersTau*posterior.clustersDeg(iCluster)/2 ...
+            *(posterior.clustersMean(iCluster,:)-priors.clustersMean) ...
+            *(posterior.clustersSigma(:,:,iCluster)\...
+                (posterior.clustersMean(iCluster,:)-priors.clustersMean).');
+end
+%%%
+
+
+%% line 9
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = 0;
+for iCluster = 1:counts.nClusters
+    freeEnergyFullPart(fepIdx) = freeEnergyFullPart(fepIdx) + ...
+        -posterior.clustersDeg(iCluster)/2 ...
+            *trace(posterior.clustersSigma(:,:,iCluster)\...
+                priors.clustersSigma);
+end
+%%%
+
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -(priors.clustersDeg+nDcmParamsInfCon+2)/2*(...
+        sum(posterior.logDetClustersSigma) ...
+        -sum(sum(psi(bsxfun(@plus,...
+            posterior.clustersDeg,...
+            1-(1:nDcmParamsInfCon)...
+            )/2))) ...
+        );
+
+
+
+%% line 10
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -counts.nClusters*nDcmParamsInfCon*(nDcmParamsInfCon-1)/4*log(pi);
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -counts.nClusters*sum(gammaln((priors.clustersDeg+1-...
+        (1:nDcmParamsInfCon))/2));
+
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +counts.nClusters*priors.clustersDeg*tapas_huge_logdet(...
+        priors.clustersSigma)/2;
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +counts.nClusters*nDcmParamsInfCon*(nDcmParamsInfCon+2)/2*log(2);
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -counts.nClusters*nDcmParamsInfCon*log(2*pi)/2;
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +counts.nClusters*nDcmParamsInfCon*log(priors.clustersTau)/2;
+
+
+
+%% line 11
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +sum(sum(bsxfun(@times,...
+        posterior.softAssign,...
+        psi(posterior.alpha.')-psi(sum(posterior.alpha))...
+        )));
+
+
+%% line 12
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +gammaln(sum(priors.alpha))-sum(gammaln(priors.alpha));
+
+
+%% line 13
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +(priors.alpha.'-1)*(psi(posterior.alpha)-psi(sum(posterior.alpha)));
+
+
+%% line 14
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -sum(posterior.softAssign(:).*log(posterior.softAssign(:)+realmin));
+
+
+%% line 15
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +sum(posterior.logDetPostDcmSigma)/2;
+%%% neg dF
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +counts.nSubjects*nDcmParamsInfAll/2;
+
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +counts.nSubjects*nDcmParamsInfAll*log(2*pi)/2;
+
+
+
+%% line 16
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +sum(gammaln(posterior.noiseShape(:)));
+
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -posterior.noiseShape(:).'*log(posterior.noiseInvScale(:));
+
+
+%% line 17
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -(posterior.noiseShape(:).'-1)*(psi(posterior.noiseShape(:))-...
+        log(posterior.noiseInvScale(:)));
+    
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +sum(posterior.noiseShape(:));
+
+
+%% line 18
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +counts.nClusters*nDcmParamsInfCon/2;
+
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +nDcmParamsInfCon/2*sum(posterior.clustersDeg);
+
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -counts.nClusters*nDcmParamsInfCon*(nDcmParamsInfCon+2)/2*log(2);
+
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +counts.nClusters*nDcmParamsInfCon/2*log(2*pi);
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +nDcmParamsInfCon/2*sum(log(1./posterior.clustersTau));
+
+
+%% line 19
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +(posterior.clustersDeg.'+nDcmParamsInfCon+2)/2*(...
+        posterior.logDetClustersSigma...
+        -sum(psi(bsxfun(@plus,...
+            posterior.clustersDeg,...
+            1-(1:nDcmParamsInfCon)...
+             )/2),2)...
+        );
+
+
+%% line 20
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +counts.nClusters*nDcmParamsInfCon*(nDcmParamsInfCon-1)/4*log(pi);
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) =  ...
+    +sum(sum(gammaln(bsxfun(@plus,...
+        posterior.clustersDeg,...
+        1-(1:nDcmParamsInfCon)...
+        )/2)));
+
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -posterior.clustersDeg.'*posterior.logDetClustersSigma/2;
+    
+
+
+%% line 21
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    +sum(gammaln(posterior.alpha));
+
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -gammaln(sum(posterior.alpha));
+
+%% line 22
+fepIdx = fepIdx + 1;
+freeEnergyFullPart(fepIdx) = ...
+    -(posterior.alpha.'-1)*(psi(posterior.alpha)-psi(sum(posterior.alpha)));
+    
+
+
+%% sum
+freeEnergyFull = sum(freeEnergyFullPart);
+
+
+%% aux
+freeEnergyFullPart(61) = freeEnergyFullPart(2)...
+                        +freeEnergyFullPart(13)...
+                        +freeEnergyFullPart(34);
+
+
+
diff --git a/huge/tapas_huge_property_names.m b/huge/tapas_huge_property_names.m
index 50e9332a..d81b924f 100644
--- a/huge/tapas_huge_property_names.m
+++ b/huge/tapas_huge_property_names.m
@@ -1,5 +1,8 @@
 %% List of name-value pair arguments accepted by tapas_Huge() and estimate()
 % 
+% NAME:        BurnIn
+% VALUE:       positive integer (default: half of number of iterations)
+% DESCRIPTION: Length of burn-in period in samples.
 % 
 % NAME:        Confounds
 % VALUE:       double array
@@ -22,14 +25,26 @@
 % DESCRIPTION: Specify DCM structure and BOLD time series for all subjects
 %              as cell array with one DCM struct in SPM format per subject.
 % 
+% NAME:        InverseTemperature
+% VALUE:       double between 0 and 1 (default: 1)
+% DESCRIPTION: Inverse chain temperature for Monte Carlo based methods.
+% 
 % NAME:        K
 % VALUE:       positive integer (default: 1)
 % DESCRIPTION: Number of clusters.
 % 
+% NAME:        KernelRatio
+% VALUE:       2x1 array of positive integer or zero (default: [10 100])
+% DESCRIPTION: Inverse frequency of selecting special kernel during MCMC
+%              inversion. 1st element: GMM kernel, 2nd element: k-means
+%              kernel. 10 means: use special kernel for every 10th sample.
+%              0 means: do not use special kernel.
+% 
 % NAME:        Method  
-% VALUE:       'VB'
+% VALUE:       'VB' | 'MH'
 % DESCRIPTION: Name of inversion method specified as character array. VB:
-%              variational Bayes.
+%              variational Bayes, MH: Metropolized Gibbs sampling on
+%              collapsed HUGE model.
 % 
 % NAME:        NumberOfIterations
 % VALUE:       positive integer (default: 999 for VB, 2e5 for MH)
@@ -76,6 +91,13 @@
 % DESCRIPTION: If true, starting values for subject level DCM parameter 
 %              estimates are randomized.
 % 
+% NAME:        RetainSamples
+% VALUE:       'all' | positive integer (default: 1000)
+% DESCRIPTION: Number of samples from the Markkov chain to be kept. 'all'
+%              keeps the entire chain including burn-in. Otherwise discards
+%              burn-in and retains the specified number of samples by
+%              uniformly thinning the post-burn-in samples.
+% 
 % NAME:        SaveTo
 % VALUE:       character array
 % DESCRIPTION: Location for saving results consisting of path name and
diff --git a/huge/tapas_huge_psrf.m b/huge/tapas_huge_psrf.m
new file mode 100644
index 00000000..09175e06
--- /dev/null
+++ b/huge/tapas_huge_psrf.m
@@ -0,0 +1,131 @@
+function [ psrf, neff, tau, thin ] = tapas_huge_psrf( samples, nChains, rBurnIn )
+% Calculate the Potential Scale Reduction Factor
+%
+% INPUTS:
+%   samples - array containing samples from MCMC. 1st dimension: samples,
+%             2nd dimension: parameters, 3rd dimension: chains.
+% 
+% OPTIONAL INPUTS:
+%   nChains - number of chains. If supplied 3rd dimension of samples is
+%             interpreted as parameters.
+%   rBurnIn - ratio of samples to discard for burn-in.
+% 
+% OUTPUTS:
+%   psrf - array containing Potential Scale Reduction Factor
+%   neff - effective sample size
+%   tau  - autocorrelation time
+%   thin - length of initial positive sequence
+% 
+
+% REFERENCE:
+%   Stephen P. Brooks & Andrew Gelman (1998) General Methods for
+%   Monitoring Convergence of Iterative Simulations, Journal of
+%   Computational and Graphical Statistics, 7:4, 434-455, DOI:
+%   10.1080/10618600.1998.10474787
+
+% Author: Yu Yao (yao@biomed.ee.ethz.ch)
+% Copyright (C) 2020 Translational Neuromodeling Unit
+%                    Institute for Biomedical Engineering,
+%                    University of Zurich and ETH Zurich.
+% 
+% This file is part of TAPAS, which is released under the terms of the GNU
+% General Public Licence (GPL), version 3. For further details, see
+% <https://www.gnu.org/licenses/>.
+% 
+% This software is provided "as is", without warranty of any kind, express
+% or implied, including, but not limited to the warranties of
+% merchantability, fitness for a particular purpose and non-infringement.
+% 
+% This software is intended for research only. Do not use for clinical
+% purpose. Please note that this toolbox is under active development.
+% Considerable changes may occur in future releases. For support please
+% refer to:
+% https://github.com/translationalneuromodeling/tapas/issues
+% 
+
+
+[N, P, M] = size(samples);
+if nargin < 2
+    M = 1;
+end
+if nargin < 3
+    rBurnIn = 0; % ratio of samples to discard for burn-in
+end
+psrf = zeros(P, M);
+if nargout > 1
+    neff = zeros(P, M);
+    tau  = zeros(P, M);
+    thin = zeros(P, M);
+end
+
+for p = 1:P
+    for m = 1:M
+        if nargin < 2 % treat 3rd dimension as chains
+            tmp = squeeze(samples(:, p, :));         
+        else % subdivide 1st dimension into pseudo-chains
+            tmp = reshape(samples(end-fix(N/nChains)*nChains+1:end, p, m), ...
+                fix(N/nChains), nChains);
+        end
+        psrf(p, m) = calculate_psrf( tmp, rBurnIn );
+        if nargout > 1
+            [ neff(p, m), tau(p, m), thin(p, m) ] = calculate_neff( tmp, rBurnIn );
+        end
+    end
+end
+
+end
+
+function [ psrf ] = calculate_psrf( samples, bi )
+
+[N, M] = size(samples);
+assert(M > 1, 'TAPAS:HUGE:noChains', 'Not enough chains.')
+
+% discard first part of each chain as burn-in
+samples = samples(max(1, fix(N*bi)):end, :);
+N = size(samples, 1);
+assert(N > 9, 'TAPAS:HUGE:noSamples', 'Not enough samples.')
+
+% within-chain mean and variance
+x_bar = mean(samples);
+s2 = var(samples);
+
+B = var(x_bar);
+W = mean(s2);
+
+% across-chain mean and variance
+sigma_hat2 = (N - 1)/N*W + B;
+
+V_hat = sigma_hat2 + B/M;
+psrf = sqrt(V_hat/W);
+
+% % correction based on t-distribution
+% mu_hat = mean(samples(:));
+% var_V_hat = (N - 1)^2/N^2/M*var(s2) + 2*(M + 1)^2/M^2/(M-1)*B^2 ...
+%     + 2*(M + 1)*(N - 1)/M^2/N* ...
+%     (xcov(s2, x_bar.^2, 0) - 2*mu_hat*xcov(s2, x_bar, 0));
+% df = 2*V_hat^2/var_V_hat;
+% psrf = sqrt(V_hat/W*(df + 3)/(df + 1));
+
+end
+
+function [ neff, tau, thin ] = calculate_neff( samples, bi )
+
+% discard first part of each chain as burn-in
+[N,M] = size(samples);
+samples = samples(max(1, fix(N*bi)):end, :);
+N = size(samples, 1);
+K = 2*floor(N/2) - 1;
+% autocovariance
+rho = zeros(2*N-1,1);
+for m = 1:M
+  rho = rho + xcov(samples(:,m),'coeff');
+end
+thin = 2*(find(rho(N:2:N+K)+rho(N+1:2:N+K)<0,1,'first') - 1) - 1;
+if isempty(thin)
+    thin = K;
+end
+thin = max(thin,1);
+tau = sum(rho(N-thin:N+thin))/M; % Autocorrelation time estimation
+neff = M*N/tau; % effective sample size
+
+end
diff --git a/misc/log_tapas.txt b/misc/log_tapas.txt
index 469809bc..5e61b2a9 100644
--- a/misc/log_tapas.txt
+++ b/misc/log_tapas.txt
@@ -1,3 +1,4 @@
+5.1.0   https://www.tapas.tnu-zurich.com/examples_v5.0.0.zip   24c1248d3054f025b853b5ab7ce08a1a
 5.0.0   https://www.tapas.tnu-zurich.com/examples_v5.0.0.zip   24c1248d3054f025b853b5ab7ce08a1a
 4.0.0   https://www.tapas.tnu-zurich.com/examples_v4.0.0.zip   1c8f1b5f53f2ec5e7ab6d5b2d942c1d5
 3.3.0   https://www.tapas.tnu-zurich.com/examples_v3.3.0.zip   95b885041126dfd81e689c95d7bb4aab