Skip to content

Commit a25dbb1

Browse files
committed
Merge branch 'master' of https://github.com/mimno/Mallet
2 parents 001beaf + d9fd566 commit a25dbb1

File tree

2 files changed

+5
-1
lines changed

2 files changed

+5
-1
lines changed

src/cc/mallet/topics/tui/TopicTrainer.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ public class TopicTrainer {
147147
"The number of iterations to run before first estimating dirichlet hyperparameters.", null);
148148

149149
static CommandOption.Boolean useSymmetricAlpha = new CommandOption.Boolean(TopicTrainer.class, "use-symmetric-alpha", "true|false", false, false,
150-
"Only optimize the concentration parameter of the prior over document-topic distributions. This may reduce the number of very small, poorly estimated topics, but may disperse common words over several topics.", null);
150+
"Optimize the concentration parameter (SumAlpha) of the prior over document-topic distributions while keeping it symmetric. This may reduce the number of very small, poorly estimated topics, but may disperse common words over several topics.", null);
151151

152152
static CommandOption.Double alpha = new CommandOption.Double(TopicTrainer.class, "alpha", "DECIMAL", true, 5.0,
153153
"SumAlpha parameter: sum over topics of smoothing over doc-topic distributions. alpha_k = [this value] / [num topics]",null);

src/cc/mallet/types/Dirichlet.java

+4
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,8 @@ public static void testSymmetricConcentration(int numDimensions, int numObservat
555555

556556
/**
557557
* Learn Dirichlet parameters using frequency histograms
558+
* described by Hanna Wallach in "Structured Topic Models for Language" (2008), section 2.4
559+
* Method 1: Using the Digamma Recurrence Relation (pp. 27-28)
558560
*
559561
* @param parameters A reference to the current values of the parameters, which will be updated in place
560562
* @param observations An array of count histograms. <code>observations[10][3]</code> could be the number of documents that contain exactly 3 tokens of word type 10.
@@ -571,6 +573,8 @@ public static double learnParameters(double[] parameters,
571573

572574
/**
573575
* Learn Dirichlet parameters using frequency histograms
576+
* described by Hanna Wallach in "Structured Topic Models for Language", section 2.4
577+
* Method 1: Using the Digamma Recurrence Relation (pp. 27-28) and gamma hyperpriors (section 2.5, pp. 37-39)
574578
*
575579
* @param parameters A reference to the current values of the parameters, which will be updated in place
576580
* @param observations An array of count histograms. <code>observations[10][3]</code> could be the number of documents that contain exactly 3 tokens of word type 10.

0 commit comments

Comments
 (0)