Skip to content

Commit

Permalink
Added F-test
Browse files Browse the repository at this point in the history
  • Loading branch information
dzerbino committed Nov 4, 2014
1 parent e9becb2 commit 5144e13
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 8 deletions.
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -384,13 +384,22 @@ wiggletools max test/fixedStep.bw test/variableStep.bw

* Welch's t-test

Computes the two-tailed p-value of Welch's t-test comparing to sets of numbers, each assumed to have a normal distribution:
Computes the two-tailed p-value of Welch's t-test comparing two sets of numbers, each assumed to have a normal distribution:

```
wiggletools ttest test/fixedStep.bw test/variableStep.bw test/fixedStep.wig \
: test/fixedStep.wig test/variableStep.bw test/fixedStep.wig
```

* F-test

Computes the p-value of the F-test comparing sets of numbers, each assumed to have a normal distribution:

```
wiggletools ftest test/fixedStep.bw test/variableStep.bw test/fixedStep.wig \
: test/fixedStep.wig test/variableStep.bw test/fixedStep.wig
```

* Wilcoxon's sum rank test

Non-parametric equivalent of the above:
Expand Down
43 changes: 36 additions & 7 deletions src/commandParser.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,17 @@ puts("\twiggletools program");
puts("");
puts("Program grammar:");
puts("\tprogram = (iterator) | do (iterator) | (extraction)");
puts("\titerator = (filename) | (unary_operator) (iterator) | (binary_operator) (iterator) (iterator) | (reducer) (multiplex) | (setComparison) (multiplex) (multiplex) | print (statistic)");
puts("\titerator = (in_filename) | (unary_operator) (iterator) | (binary_operator) (iterator) (iterator) | (reducer) (multiplex) | (setComparison) (multiplex_list) | print (statistic)");
puts("\tunary_operator = unit | write (output) | write_bg (ouput) | smooth (int) | exp | ln | log (float) | pow (float) | offset (float) | scale (float) | gt (float) | default (float) | isZero | (statistic)");
puts("\toutput = (filename) | -");
puts("\tfilename = *.wig | *.bw | *.bed | *.bb | *.bg | *.bam | *.vcf | *.bcf");
puts("\toutput = (out_filename) | -");
puts("\tin_filename = *.wig | *.bw | *.bed | *.bb | *.bg | *.bam | *.vcf | *.bcf");
puts("\tstatistic = AUC | meanI | varI | minI | maxI | stddevI | CVI | pearson (iterator)");
puts("\tbinary_operator = diff | ratio | overlaps | apply (statistic) | fillIn");
puts("\treducer = cat | sum | product | mean | var | stddev | entropy | CV | median | min | max");
puts("\tsetComparison = ttest | ftest | wilcoxon");
puts("\tmultiplex_list = (multiplex) | (multiplex) : (multiplex_list)");
puts("\tmultiplex = (iterator_list) | map (unary_operator) (multiplex) | strict (multiplex)");
puts("\titerator_list = (iterator) : | (iterator) (iterator_list)");
puts("\tsetComparison = ttest | wilcoxon");
puts("\titerator_list = (iterator) | (iterator) : (iterator_list)");
puts("\textraction = profile (output) (int) (iterator) (iterator) | profiles (output) (int) (iterator) (iterator) | histogram (output) (width) (iterator_list) | mwrite (output) (multiplex) | mwrite_bg (output) (multiplex)");
puts("\t\t| apply_paste (out_filename) (statistic) (bed_file) (iterator)");

Expand Down Expand Up @@ -302,6 +303,28 @@ static Multiplexer * readMultiplexer() {
return readMultiplexerToken(token);
}

static Multiplexer ** readMultiplexerList(int * count) {
size_t buffer_size = 8;
char * token;
Multiplexer ** multis = (Multiplexer **) calloc(buffer_size, sizeof(Multiplexer*));
*count = 0;

for (token = nextToken(0,0); token != NULL && strcmp(token, ":"); token = nextToken(0,0)) {
if (*count == buffer_size) {
buffer_size *= 2;
multis = (Multiplexer **) realloc(multis, buffer_size * sizeof(Multiplexer*));
}
multis[(*count)++] = readMultiplexerToken(token);
}
return multis;
}

static Multiset * readMultiset() {
int count = 0;
Multiplexer ** multis = readMultiplexerList(&count);
return newMultiset(multis, count);
}

static WiggleIterator * readTee() {
FILE * file = readOutputFilename();
return TeeWiggleIterator(readIterator(), file, false, holdFire);
Expand Down Expand Up @@ -496,6 +519,10 @@ static WiggleIterator * readTTest() {
return TTestReduction(newMultiset(multis, 2));
}

static WiggleIterator * readFTest() {
return FTestReduction(readMultiset());
}

static WiggleIterator * readMWUTest() {
Multiplexer ** multis = calloc(2, sizeof(Multiplexer *));
multis[0] = readMultiplexer();
Expand Down Expand Up @@ -600,6 +627,8 @@ static WiggleIterator * readIteratorToken(char * token) {
return readOverlap();
if (strcmp(token, "ttest") == 0)
return readTTest();
if (strcmp(token, "ftest") == 0)
return readFTest();
if (strcmp(token, "wilcoxon") == 0)
return readMWUTest();
if (strcmp(token, "AUC") == 0)
Expand All @@ -623,7 +652,7 @@ static WiggleIterator * readIteratorToken(char * token) {
if (strcmp(token, "apply") == 0)
return SelectReduction(readApply(), 0);

return SmartReader(token);
return SmartReader(token, holdFire);

}

Expand Down Expand Up @@ -701,7 +730,7 @@ static Multiplexer * readApplyPaste() {
exit(1);
}

return PasteMultiplexer(ApplyMultiplexer(SmartReader(infilename), statistics, count, readLastIterator(), strict), infile, outfile, false);
return PasteMultiplexer(ApplyMultiplexer(SmartReader(infilename, holdFire), statistics, count, readLastIterator(), strict), infile, outfile, false);
}

void readPrint() {
Expand Down
99 changes: 99 additions & 0 deletions src/setComparisons.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,105 @@ WiggleIterator * TTestReduction(Multiset * multi) {
return newWiggleIterator(data, &TTestReductionPop, &SetComparisonSeek, NAN);
}


////////////////////////////////////////////////////////
// F-test
////////////////////////////////////////////////////////

typedef struct ftestData_st {
Multiset * multi;
int * counts;
double * means;
int total_count;
} FTestData;

void FTestSeek(WiggleIterator * iter, const char * chrom, int start, int finish) {
FTestData * data = (FTestData* ) iter->data;
seekMultiset(data->multi, chrom, start, finish);
pop(iter);
}


void FTestReductionPop(WiggleIterator * wi) {
if (wi->done)
return;

FTestData * data = (FTestData *) wi->data;
Multiset * multi = data->multi;

if (multi->done) {
wi->done = true;
return;
}

// Go to first position where both of the sets have at least one value
while (!multi->inplay[0] || !multi->inplay[1]) {
popMultiset(multi);
if (multi->done) {
wi->done = true;
return;
}
}
wi->chrom = multi->chrom;
wi->start = multi->start;
wi->finish = multi->finish;

// Compute means
double mean = 0;
int groups = multi->count;
int index, index2;
for (index = 0; index < groups; index++) {
Multiplexer * mplx = multi->multis[index];
data->means[index] = 0;
for (index2 = 0; index < mplx->count; index++) {
if (mplx->inplay[index2])
data->means[index] += mplx->values[index2];
else
data->means[index] += mplx->iters[index2]->default_value;
}
mean += data->means[index];
data->means[index] /= data->counts[index];
}
mean /= data->total_count;

double inter = 0;
double intra = 0;
for (index = 0; index < groups; index++) {
Multiplexer * mplx = multi->multis[index];
inter += mplx->count * (data->means[index] - mean) * (data->means[index] - mean);
for (index2 = 0; index < mplx->count; index++) {
if (mplx->inplay[index2])
intra += (mplx->values[index2] - data->means[index]) * (mplx->values[index2] - data->means[index]);
else
intra += (mplx->iters[index2]->default_value - data->means[index]) * (mplx->iters[index2]->default_value - data->means[index]);
}
}

// F-statistic
inter /= groups - 1;
intra /= data->total_count - groups;
double f = inter / intra;

// P-value
wi->value = 2 * gsl_cdf_fdist_Q(f, multi->count - 1, data->total_count - multi->count);

// Update inputs
popMultiset(multi);
}

WiggleIterator * FTestReduction(Multiset * multi) {
FTestData * data = (FTestData *) calloc(1, sizeof(FTestData));
data->multi = multi;
data->means = calloc(multi->count, sizeof(double));
data->counts = calloc(multi->count, sizeof(int));
int index;
for (index = 0; index < multi->count; index++) {
data->counts[index] = multi->multis[index]->count;
data->total_count += data->counts[index];
}
return newWiggleIterator(data, &FTestReductionPop, &FTestSeek, NAN);
}

////////////////////////////////////////////////////////
// Mann-Whitney U (Wilcoxon rank-sum test)
////////////////////////////////////////////////////////
Expand Down
1 change: 1 addition & 0 deletions src/wiggletools.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ Multiset * newMultiset(Multiplexer **, int);

// Reduction operators on sets of sets:
WiggleIterator * TTestReduction(Multiset *);
WiggleIterator * FTestReduction(Multiset *);
WiggleIterator * MWUReduction(Multiset *);

// Output
Expand Down

0 comments on commit 5144e13

Please sign in to comment.