Skip to content

Commit aa71fbe

Browse files
author
Sven-Kristjan Bormann
committed
Added example for calculating the statistics for the leukemia dataset to the documentation of sgpv.
Minor bugfixes/improvements in sgpvalue and sgpv. Allow now more abbreviations for options in all commands.
1 parent 168f763 commit aa71fbe

11 files changed

+151
-97
lines changed

checkdim.ado

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ opt[sort sort the results in descending order.]
1212
opt[keep keep the generated variables]
1313
opt[keepname() the stub for the generated variables. The default stub is "mis".]
1414
opt[replace replace existing variables which contain the results.]
15-
opt2[drop drop if the drop condition is met. The default drop condition is to drop variables which having missing values for all levels of a dimension when only the drop option is set.]
15+
opt[drop drop if the drop condition is met. The default drop condition is to drop variables which having missing values for all levels of a dimension when only the drop option is set.]
1616
1717
example[ Generate an artificial example dataset ]
1818
return[dropvar list of dropped variables]

checkdim.sthlp

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
{marker syntax}{...}
1616
{title:Syntax}
1717
{p 8 17 2}
18-
{cmdab:checkdim2}
18+
{cmdab:checkdim}
1919
[{help varlist}]
2020
[{help in}]
2121
[{cmd:,}

fdrisk.ado

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
*!Based on the R-code for fdisk.R
33
*!Version 0.9 : Initial Github release
44
*!Version 0.91 : Removed the dependency on the user-provided integrate-command -> Removed nomata option
5-
*!Version 0.95 : Updated documentation, last Github release before submission to SSC
5+
*!Version 0.95 : Updated documentation, added more possibilities to abbreviate options ,last Github release before submission to SSC
66
*!To-Do: Rewrite to use Mata whenever possible instead of workarounds in Stata -> Shorten the code
77
*! Evaluate input of options directly with the expression parser `= XXX' to allow more flexible input -> somewhat done, but not available for all options
88
/* START HELP FILE

leukemia-example.do

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
*!leukemia-example.do
2+
*!Example code how to calculate SGPVs and Bonus Statistics for the leukemia dataset without modifying the existing code.
3+
*!The leukemia dataset does not contain missing values/ rows or columns with no usuable information
4+
*!You have to make sure that this condition holds for your own large matrix or dataset
5+
preserve
6+
sysuse leukemia, clear
7+
sort p_value // To show the results the same way they are shown in the example for plotsgpv
8+
mata: final =J(1,5,.)
9+
forvalues i=1/`=ceil(_N/c(matsize))'{
10+
tempname part`i' res`i' final
11+
local start = (`i'-1)*c(matsize)+1
12+
local end = `i'*c(matsize)
13+
if `end'>`=_N' local end l
14+
mkmat estimate se t_stat p_value ci_lo ci_hi in `start'/`end', matrix(`part`i'')
15+
mat `part`i'' = `part`i'''
16+
mat rownames `part`i'' = b se t pvalue ll ul
17+
qui sgpv,m(`part`i'')
18+
mat `res`i'' = r(comparison)
19+
mata: res=st_matrix("`res`i''")
20+
mata: final= final \ res
21+
}
22+
mata: st_matrix("`final'",final)
23+
24+
matlist `final' ,title("Comparison of ordinary P-Values and Second Generation P-Values") rowtitle(Variables)
25+
mat leukemia_result = `final'
26+
restore
27+

plotsgpv.ado

+5-5
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ opt[noshow do not show the outcome of the SGPV calculations. Useful for larger c
2121
opt[xshow() number representing the maximum ranking on the x-axis that is displayed. Default is to display all intervals.]
2222
opt[nullcol() coloring of the null interval (indifference zone). Default is the R-colour Hawkes Blue]
2323
opt[intcol() coloring of the intervals according to SGPV ranking. Default are the R-colours ("cornflowerblue","firebrick3","darkslateblue")} for SGPVs of {it:0}, in {it:(0,1)}, and {it:1} respectively.]
24-
opt[noplotx_axis deactive showing the x-axis.]
25-
opt[noploty_axis deactive showing the y-axis.]
24+
opt[noplotx:_axis deactive showing the x-axis.]
25+
opt[noploty:_axis deactive showing the y-axis.]
2626
opt[nooutlinezone deactivate drawing a slim white outline around the null zone. Helpful visual aid when plotting many intervals. Default is on.]
2727
opt[title() title of the plot.]
2828
opt[xtitle() label of the x-axis.]
@@ -64,9 +64,9 @@ END HELP FILE */
6464
program define plotsgpv
6565
version 12.0
6666
syntax [if] [in] , esthi(string) estlo(string) nullhi(string) nulllo(string) ///
67-
[setorder(string) xshow(string) nullcol(string) intcol(string) ///
68-
noploty_axis noplotx_axis nullpt(real 0.0) nooutlinezone title(string) ///
69-
xtitle(string) ytitle(string) nolegend nomata noshow replace twoway_opt(string asis) ]
67+
[SETOrder(string) xshow(string) nullcol(string) INTCol(string) ///
68+
noPLOTY_axis noPLOTX_axis nullpt(real 0.0) nooutlinezone Title(string) ///
69+
XTitle(string) YTitle(string) noLEGend nomata noshow replace TWOway_opt(string asis) ]
7070

7171

7272
***Some default values : Color settings -> translated R-colors into RGB for Stata -> Not sure how to install the colours in Stata for easier referencing.

plotsgpv.sthlp

+19-19
Original file line numberDiff line numberDiff line change
@@ -33,36 +33,36 @@
3333
{p_end}
3434
{synopt:{opt nulllo(string)}} lower bound of null interval.
3535
{p_end}
36-
{synopt:{opt setorder(string)}} a variable giving the desired order along the x-axis.
36+
{synopt:{opt seto:rder(string)}} a variable giving the desired order along the x-axis.
3737
{p_end}
3838
{synopt:{opt xshow(string)}} number representing the maximum ranking on the x-axis that is displayed. Default is to display all intervals.
3939
{p_end}
4040
{synopt:{opt nullcol(string)}} coloring of the null interval (indifference zone). Default is the R-colour Hawkes Blue.
4141
{p_end}
42-
{synopt:{opt intcol(string)}} coloring of the intervals according to SGPV ranking.
42+
{synopt:{opt intc:ol(string)}} coloring of the intervals according to SGPV ranking.
4343
{p_end}
44-
{synopt:{opt noploty_axis}} deactive showing the y-axis.
44+
{synopt:{opt noploty:_axis}} deactive showing the y-axis.
4545
{p_end}
46-
{synopt:{opt noplotx_axis}} deactive showing the x-axis.
46+
{synopt:{opt noplotx:_axis}} deactive showing the x-axis.
4747
{p_end}
4848
{synopt:{opt nullpt(#)}} a scalar representing a point null hypothesis. {p_end}
4949
{synopt:{opt nooutlinezone}} deactivate drawing a slim white outline around the null zone. Helpful visual aid when plotting many intervals. Default is on.
5050
{p_end}
51-
{synopt:{opt title(string)}} title of the plot.
51+
{synopt:{opt t:itle(string)}} title of the plot.
5252
{p_end}
53-
{synopt:{opt xtitle(string)}} label of the x-axis label.
53+
{synopt:{opt xt:itle(string)}} label of the x-axis label.
5454
{p_end}
55-
{synopt:{opt ytitle(string)}} label of the y-axis.
55+
{synopt:{opt yt:itle(string)}} label of the y-axis.
5656
{p_end}
57-
{synopt:{opt nolegend}} deactivate plotting the legend.
57+
{synopt:{opt noleg:end}} deactivate plotting the legend.
5858
{p_end}
5959
{synopt:{opt nomata}} do not use Mata for calculating the SGPVs if esthi() and estlo() are variables as inputs or if {cmd:c(matsize)} is smaller than the size of these options.
6060
{p_end}
6161
{synopt:{opt noshow}} do not show the outcome of the SGPV calculations. Useful for larger calculations.
6262
{p_end}
6363
{synopt:{opt replace}} replace existing variables in case the nomata-option was used.
6464
{p_end}
65-
{synopt:{opt twoway_opt(string)}} any additional options for the plotting go here.
65+
{synopt:{opt two:way_opt(string asis)}} any additional options for the plotting go here.
6666
{p_end}
6767
{synoptline}
6868
{p2colreset}{...}
@@ -99,7 +99,7 @@ To specify that the upper limit is +infinity just specify the missing value . in
9999
{pstd}
100100
{p_end}
101101
{phang}
102-
{opt setorder(string)} a variable giving the desired order along the x-axis. If {bf:setorder} is set to {bf:"sgpv"}, the second-generation {it:p}-value ranking is used. If {bf:setorder} is empty, the original input ordering is used.
102+
{opt seto:rder(string)} a variable giving the desired order along the x-axis. If {bf:setorder} is set to {bf:"sgpv"}, the second-generation {it:p}-value ranking is used. If {bf:setorder} is empty, the original input ordering is used.
103103

104104
{pstd}
105105
{p_end}
@@ -115,7 +115,7 @@ To specify that the upper limit is +infinity just specify the missing value . in
115115
{pstd}
116116
{p_end}
117117
{phang}
118-
{opt intcol(string)} coloring of the intervals according to SGPV ranking. Default are the R-colours ("cornflowerblue","firebrick3","darkslateblue")} for SGPVs of {it:0}, in {it:(0,1)}, and {it:1} respectively.
118+
{opt intc:ol(string)} coloring of the intervals according to SGPV ranking. Default are the R-colours ("cornflowerblue","firebrick3","darkslateblue")} for SGPVs of {it:0}, in {it:(0,1)}, and {it:1} respectively.
119119
You can see the colour before plotting via:
120120

121121
{stata palette color 100 149 237 } // cornflowerblue
@@ -125,12 +125,12 @@ You can see the colour before plotting via:
125125
{pstd}
126126
{p_end}
127127
{phang}
128-
{opt noploty_axis} deactive showing the y-axis.
128+
{opt noploty:_axis} deactive showing the y-axis.
129129

130130
{pstd}
131131
{p_end}
132132
{phang}
133-
{opt noplotx_axis} deactive showing the x-axis.
133+
{opt noplotx:_axis} deactive showing the x-axis.
134134

135135
{pstd}
136136
{p_end}
@@ -145,22 +145,22 @@ You can see the colour before plotting via:
145145
{pstd}
146146
{p_end}
147147
{phang}
148-
{opt title(string)} title of the plot.
148+
{opt t:itle(string)} title of the plot.
149149

150150
{pstd}
151151
{p_end}
152152
{phang}
153-
{opt xtitle(string)} label of the x-axis.
153+
{opt xt:itle(string)} label of the x-axis.
154154

155155
{pstd}
156156
{p_end}
157157
{phang}
158-
{opt ytitle(string)} label of the y-axis.
158+
{opt yt:itle(string)} label of the y-axis.
159159

160160
{pstd}
161161
{p_end}
162162
{phang}
163-
{opt nolegend} deactivate plotting the legend.
163+
{opt noleg:end} deactivate plotting the legend.
164164

165165
{pstd}
166166
{p_end}
@@ -180,7 +180,7 @@ You can see the colour before plotting via:
180180
{pstd}
181181
{p_end}
182182
{phang}
183-
{opt twoway_opt(string)} any additional options for the plotting go here. See {help twoway} for more information about the possible options. Options set here {bf:do not} override the values set in other options before.
183+
{opt two:way_opt(string asis)} any additional options for the plotting go here. See {help twoway} for more information about the possible options. Options set here {bf:do not} override the values set in other options before.
184184
{p_end}
185185

186186

@@ -189,7 +189,7 @@ You can see the colour before plotting via:
189189
{pstd}
190190
{stata sysuse leukstats} // Load the example dataset provided with this command
191191

192-
plotsgpv, esthi(ci_hi) estlo(ci_lo) nulllo(-0.3) nullhi(0.3) nomata replace noshow setorder(p_value) title("Leukemia Example") xtitle("Classical p-value ranking") ytitle("Fold Change (base 10)") ylabel(`=log10(1/1000)' "1/1000" `=log10(1/100)' "1/100" `=log10(1/10)' "1/10" `=log10(1/2)' "1/2" `=log10(2)' "2" `=log10(10)' "10" `=log10(100)' "100" `=log10(1000)' "1000") //Replicate the example plot from the R-code
192+
plotsgpv, esthi(ci_hi) estlo(ci_lo) nulllo(-0.3) nullhi(0.3) nomata replace noshow setorder(p_value) title("Leukemia Example") xtitle("Classical p-value ranking") ytitle("Fold Change (base 10)") twoway_opt(ylabel(`=log10(1/1000)' "1/1000" `=log10(1/100)' "1/100" `=log10(1/10)' "1/10" `=log10(1/2)' "1/2" `=log10(2)' "2" `=log10(10)' "10" `=log10(100)' "100" `=log10(1000)' "1000")) //Replicate the example plot from the R-code
193193

194194
{title:References}
195195
{pstd}

sgpv.ado

+15-5
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,21 @@
11
*! A wrapper program for calculating the Second-Generation P-Values and their associated diagnosis
22
*!Version 0.9: Initial Github release
3-
*!Version 0.95: Fixed minor mistakes in the documentation, minor bugfixes. changed the way the results are presented
3+
*!Version 0.95: Fixed minor mistakes in the documentation, added more information about SGPVs and more example use cases; minor bugfixes; changed the way the results are presented
4+
*!Version 0.96: Added an example how to calculate all statistics for the leukemia dataset; minor fixes in the documentation of all commands and better handling of the matrix option.
45

56
/*
7+
Bugfixes to implement for next release:
8+
- avoid conflicts between different options and prefix, replay modes
9+
-> what does take precedence? replaying results or matrix or estimate based calculations
10+
- true replay function -> if previous results exist -> redisplay existing matrix
11+
612
To-Do(Things that I wish to implement at some point or that I think that might be interesting to have:
713
- support for more commands which do not report their results in a matrix named "r(table)".
814
- Make results exportable or change the command to an e-class command to allow processing in commands like esttab or estpost from Ben Jann
915
- Make matrix parsing more flexible and rely on the names of the rows for identifiying the necessary numbers; allow calculations for more than one stored estimate
1016
- Return more infos
1117
- Allow plotting of the resulting SGPVs against the normal p-values directly after the calculations
1218
- Calculate automatically a null interval based on the statistical properties of the dependent variable of an estimation to encourage the usage of interval null-hypotheses.
13-
- Add a way to calculate the statistics for the whole leukemia dataset. Only parts of the code written yet -> might require some further rewrite of sgpvalue and fdrisk.
1419
- change the help file generation from makehlp to markdoc for more control over the layout of the help files -> currently requires a lot of manual tuning to get desired results.
1520
*/
1621
/* START HELP FILE
@@ -112,9 +117,14 @@ version 12.0
112117
*Parse the initial input -> Not captured yet the case that sgpv is called only with options or further situations > Should implement a replay function to avoid repeated calculations when only a selection on the matrix is required.
113118
capture _on_colon_parse `0'
114119

115-
116-
if _rc & "`e(cmd)'"=="" & !ustrregexm(`"`0'"',"matrix\(\w+\)") { // If the command was not prefixed and no previous estimation exists. -> needs changes to work with matrix option better
117-
disp as error "No last estimates for calculating SGPV found."
120+
/*How avoid conflicts between different option and prefix, replay modes?*/
121+
/*if _rc & (!ustrregexm(`"`0'"',"matrix\(\w+\)") | !ustrregexm(`"`0'"',"m\(\w+\)") ) { // If the command was not prefixed and no previous estimation exists. -> needs changes to work with matrix option better -> does not work yet with allowed abbreviation of matrix option
122+
disp as error "No matrix for calculating SGPV found. Make sure that the matrix option is correctly specified as 'matrix(matrixname)' or 'm(matrixname)' . "
123+
exit 198
124+
}
125+
*/
126+
if _rc & "`e(cmd)'"=="" & (!ustrregexm(`"`0'"',"matrix\(\w+\)") & !ustrregexm(`"`0'"',"m\(\w+\)") ) { // If the command was not prefixed and no previous estimation exists. -> needs changes to work with matrix option better -> does not work yet with allowed abbreviation of matrix option
127+
disp as error "No last estimate or matrix for calculating SGPV found. Make sure that the matrix option is correctly specified as 'matrix(matrixname)' or 'm(matrixname)' . "
118128
exit 301
119129
}
120130

sgpv.pkg

+3-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ d 'SGPV': Second Generation P-Values
33
d Based on: the original R-code for the sgpv-package from {browse "https://github.com/weltybiostat/sgpv"}
44
d An additional {cmd:sgpv}-command makes it easier to calculate the SGPVs after common estimation commands.
55
d Author: Sven-Kristjan Bormann
6-
d Distribution-Date: 20200306
6+
d Distribution-Date: 20200307
77
d License: MIT
88
d
99
f fdrisk.ado
@@ -16,4 +16,5 @@ f plotsgpv.sthlp
1616
f sgpower.sthlp
1717
f sgpv.sthlp
1818
f sgpvalue.sthlp
19-
f leukstats.dta
19+
f leukstats.dta
20+
f leukemia-example.do

0 commit comments

Comments
 (0)