diff --git a/.flake8 b/.flake8
index 4124b2b6..fed4715d 100644
--- a/.flake8
+++ b/.flake8
@@ -3,7 +3,8 @@ max-line-length = 120
 
 # F405 - ignore error if class is imported in `*` import.
 # W293 - blank line contains whitespace
-ignore = F405,W293
+# W503 - line break before binary operator
+ignore = F405,W293,W503
 
 exclude =
     .git,
diff --git a/docs/img/phenotype_group_counts.png b/docs/img/phenotype_group_counts.png
deleted file mode 100644
index e08a430a..00000000
Binary files a/docs/img/phenotype_group_counts.png and /dev/null differ
diff --git a/docs/img/rere_phenotype_score_boxplot.png b/docs/img/rere_phenotype_score_boxplot.png
new file mode 100644
index 00000000..684e57ae
Binary files /dev/null and b/docs/img/rere_phenotype_score_boxplot.png differ
diff --git a/docs/report/tbx5_frameshift_vs_missense.csv b/docs/report/tbx5_frameshift_vs_missense.csv
index db0572a0..e4a80ba2 100644
--- a/docs/report/tbx5_frameshift_vs_missense.csv
+++ b/docs/report/tbx5_frameshift_vs_missense.csv
@@ -1,18 +1,261 @@
 "Genotype group: Missense, Frameshift",Missense,Missense,Frameshift,Frameshift,,
-,Count,Percent,Count,Percent,p value,Corrected p value
-Ventricular septal defect [HP:0001629],31/60,52%,19/19,100%,5.6190936213143254e-05,0.0008990549794102921
-Abnormal atrioventricular conduction [HP:0005150],0/22,0%,3/3,100%,0.00043478260869565214,0.003478260869565217
-Atrioventricular block [HP:0001678],0/22,0%,2/2,100%,0.0036231884057971015,0.014492753623188406
-Heart block [HP:0012722],0/22,0%,2/2,100%,0.0036231884057971015,0.014492753623188406
-Absent thumb [HP:0009777],12/71,17%,14/31,45%,0.005628510156750059,0.018011232501600187
-Patent ductus arteriosus [HP:0001643],3/37,8%,2/2,100%,0.01349527665317139,0.03598740440845704
-Triphalangeal thumb [HP:0001199],13/72,18%,13/32,41%,0.02560162393963452,0.058517997576307476
-Cardiac conduction abnormality [HP:0031546],14/36,39%,3/3,100%,0.07440639019586388,0.14881278039172777
-Secundum atrial septal defect [HP:0001684],14/35,40%,4/22,18%,0.1424522583078588,0.2532484592139712
-Muscular ventricular septal defect [HP:0011623],6/59,10%,6/25,24%,0.1687456462342971,0.2699930339748754
-Pulmonary arterial hypertension [HP:0002092],4/6,67%,0/2,0%,0.42857142857142855,0.6233766233766234
-Hypoplasia of the ulna [HP:0003022],1/12,8%,2/10,20%,0.5714285714285713,0.7619047619047618
-Hypoplasia of the radius [HP:0002984],30/62,48%,6/14,43%,0.7735491022101784,0.9520604334894502
-Absent radius [HP:0003974],7/32,22%,6/25,24%,1.0,1.0
-Short humerus [HP:0005792],7/17,41%,4/9,44%,1.0,1.0
+,Count,Percent,Count,Percent,Corrected p values,p values
+Ventricular septal defect [HP:0001629],31/60,52%,19/19,100%,0.0008990549794102921,5.6190936213143254e-05
+Abnormal atrioventricular conduction [HP:0005150],0/22,0%,3/3,100%,0.003478260869565217,0.00043478260869565214
+Atrioventricular block [HP:0001678],0/22,0%,2/2,100%,0.014492753623188406,0.0036231884057971015
+Heart block [HP:0012722],0/22,0%,2/2,100%,0.014492753623188406,0.0036231884057971015
+Absent thumb [HP:0009777],12/71,17%,14/31,45%,0.018011232501600187,0.005628510156750059
+Patent ductus arteriosus [HP:0001643],3/37,8%,2/2,100%,0.03598740440845704,0.01349527665317139
+Triphalangeal thumb [HP:0001199],13/72,18%,13/32,41%,0.058517997576307476,0.02560162393963452
+Cardiac conduction abnormality [HP:0031546],14/36,39%,3/3,100%,0.14881278039172777,0.07440639019586388
+Secundum atrial septal defect [HP:0001684],14/35,40%,4/22,18%,0.2532484592139712,0.1424522583078588
+Muscular ventricular septal defect [HP:0011623],6/59,10%,6/25,24%,0.2699930339748754,0.1687456462342971
+Pulmonary arterial hypertension [HP:0002092],4/6,67%,0/2,0%,0.6233766233766234,0.42857142857142855
+Hypoplasia of the ulna [HP:0003022],1/12,8%,2/10,20%,0.7619047619047618,0.5714285714285713
+Hypoplasia of the radius [HP:0002984],30/62,48%,6/14,43%,0.9520604334894502,0.7735491022101784
 Atrial septal defect [HP:0001631],42/44,95%,20/20,100%,1.0,1.0
+Short humerus [HP:0005792],7/17,41%,4/9,44%,1.0,1.0
+Absent radius [HP:0003974],7/32,22%,6/25,24%,1.0,1.0
+Abnormal atrial septum morphology [HP:0011994],43/43,100%,20/20,100%,,
+Abnormal cardiac septum morphology [HP:0001671],62/62,100%,28/28,100%,,
+Abnormal heart morphology [HP:0001627],62/62,100%,30/30,100%,,
+Abnormal cardiovascular system morphology [HP:0030680],63/63,100%,30/30,100%,,
+Abnormality of the cardiovascular system [HP:0001626],65/65,100%,32/32,100%,,
+Phenotypic abnormality [HP:0000118],82/82,100%,38/38,100%,,
+All [HP:0000001],82/82,100%,38/38,100%,,
+Abnormal cardiac atrium morphology [HP:0005120],43/43,100%,20/20,100%,,
+Abnormal ventricular septum morphology [HP:0010438],31/31,100%,19/19,100%,,
+Abnormal cardiac ventricle morphology [HP:0001713],31/31,100%,19/19,100%,,
+Perimembranous ventricular septal defect [HP:0011682],3/59,5%,3/25,12%,,
+Short thumb [HP:0009778],11/41,27%,8/30,27%,,
+Aplasia/Hypoplasia of the thumb [HP:0009601],20/20,100%,19/19,100%,,
+Aplasia/Hypoplasia of fingers [HP:0006265],22/22,100%,19/19,100%,,
+Aplasia/hypoplasia involving bones of the hand [HP:0005927],22/22,100%,19/19,100%,,
+Aplasia/hypoplasia involving bones of the upper limbs [HP:0006496],55/55,100%,22/22,100%,,
+Aplasia/hypoplasia involving bones of the extremities [HP:0045060],55/55,100%,22/22,100%,,
+Aplasia/hypoplasia of the extremities [HP:0009815],55/55,100%,22/22,100%,,
+Abnormality of limbs [HP:0040064],73/73,100%,34/34,100%,,
+Aplasia/hypoplasia involving the skeleton [HP:0009115],56/56,100%,23/23,100%,,
+Abnormal skeletal morphology [HP:0011842],73/73,100%,35/35,100%,,
+Abnormality of the skeletal system [HP:0000924],73/73,100%,35/35,100%,,
+Abnormality of the musculoskeletal system [HP:0033127],74/74,100%,35/35,100%,,
+Abnormal limb bone morphology [HP:0002813],63/63,100%,34/34,100%,,
+Abnormality of limb bone [HP:0040068],63/63,100%,34/34,100%,,
+Abnormal appendicular skeleton morphology [HP:0011844],64/64,100%,34/34,100%,,
+Abnormality of the upper limb [HP:0002817],73/73,100%,34/34,100%,,
+Abnormal hand morphology [HP:0005922],53/53,100%,20/20,100%,,
+Abnormality of the hand [HP:0001155],60/60,100%,31/31,100%,,
+Abnormal finger morphology [HP:0001167],36/36,100%,31/31,100%,,
+Abnormal digit morphology [HP:0011297],38/38,100%,33/33,100%,,
+Abnormal thumb morphology [HP:0001172],30/30,100%,31/31,100%,,
+Short finger [HP:0009381],11/11,100%,8/8,100%,,
+Short digit [HP:0011927],11/11,100%,10/10,100%,,
+Abnormality of thumb phalanx [HP:0009602],13/13,100%,13/13,100%,,
+Forearm undergrowth [HP:0009821],30/30,100%,7/7,100%,,
+Upper limb undergrowth [HP:0009824],33/33,100%,7/7,100%,,
+Limb undergrowth [HP:0009826],33/33,100%,7/7,100%,,
+Aplasia/hypoplasia involving forearm bones [HP:0006503],37/37,100%,12/12,100%,,
+Abnormal forearm bone morphology [HP:0040072],37/37,100%,14/14,100%,,
+Abnormal upper limb bone morphology [HP:0040070],40/40,100%,14/14,100%,,
+Abnormal forearm morphology [HP:0002973],37/37,100%,14/14,100%,,
+Short long bone [HP:0003026],35/35,100%,9/9,100%,,
+Abnormal long bone morphology [HP:0011314],44/44,100%,13/13,100%,,
+Aplasia/Hypoplasia of the radius [HP:0006501],37/37,100%,11/11,100%,,
+Abnormal morphology of the radius [HP:0002818],37/37,100%,13/13,100%,,
+Aplasia of the fingers [HP:0009380],15/15,100%,14/14,100%,,
+Tricuspid regurgitation [HP:0005180],3/3,100%,0/0,0%,,
+Atrioventricular valve regurgitation [HP:0034376],4/4,100%,2/2,100%,,
+Abnormal atrioventricular valve physiology [HP:0031650],4/4,100%,2/2,100%,,
+Abnormal heart valve physiology [HP:0031653],4/4,100%,2/2,100%,,
+Abnormal cardiovascular system physiology [HP:0011025],23/23,100%,5/5,100%,,
+Abnormal tricuspid valve physiology [HP:0031651],3/3,100%,0/0,0%,,
+Deviation of the hand or of fingers of the hand [HP:0009484],2/2,100%,2/2,100%,,
+Cleft soft palate [HP:0000185],2/2,100%,0/0,0%,,
+Abnormal soft palate morphology [HP:0100736],2/2,100%,0/0,0%,,
+Abnormal palate morphology [HP:0000174],5/5,100%,0/0,0%,,
+Abnormal oral cavity morphology [HP:0000163],5/5,100%,1/1,100%,,
+Abnormal oral morphology [HP:0031816],5/5,100%,1/1,100%,,
+Abnormality of the mouth [HP:0000153],5/5,100%,1/1,100%,,
+Abnormality of the face [HP:0000271],5/5,100%,1/1,100%,,
+Abnormality of the head [HP:0000234],5/5,100%,2/2,100%,,
+Abnormality of head or neck [HP:0000152],5/5,100%,2/2,100%,,
+Cleft palate [HP:0000175],2/2,100%,0/0,0%,,
+Orofacial cleft [HP:0000202],2/2,100%,0/0,0%,,
+Abnormal carpal morphology [HP:0001191],30/32,94%,0/0,0%,,
+Abnormality of the wrist [HP:0003019],30/30,100%,0/0,0%,,
+Abnormality of upper limb joint [HP:0009810],30/30,100%,6/6,100%,,
+Abnormal joint morphology [HP:0001367],31/31,100%,6/6,100%,,
+Mitral valve prolapse [HP:0001634],0/0,0%,1/1,100%,,
+Abnormal mitral valve morphology [HP:0001633],0/0,0%,1/1,100%,,
+Abnormal atrioventricular valve morphology [HP:0006705],0/0,0%,1/1,100%,,
+Abnormal heart valve morphology [HP:0001654],0/0,0%,1/1,100%,,
+Mitral regurgitation [HP:0001653],1/1,100%,2/2,100%,,
+Abnormal mitral valve physiology [HP:0031481],1/1,100%,2/2,100%,,
+Limited pronation/supination of forearm [HP:0006394],0/0,0%,3/3,100%,,
+Limited elbow movement [HP:0002996],0/0,0%,4/4,100%,,
+Abnormality of the elbow [HP:0009811],0/0,0%,5/5,100%,,
+Limitation of joint mobility [HP:0001376],0/0,0%,4/4,100%,,
+Abnormality of joint mobility [HP:0011729],1/1,100%,5/5,100%,,
+Abnormal joint physiology [HP:0034430],1/1,100%,5/5,100%,,
+Abnormal musculoskeletal physiology [HP:0011843],1/1,100%,5/5,100%,,
+Abnormality of cardiovascular system electrophysiology [HP:0030956],15/15,100%,3/3,100%,,
+Upper limb phocomelia [HP:0009813],8/85,9%,2/37,5%,,
+Phocomelia [HP:0009829],8/8,100%,2/2,100%,,
+Pre-capillary pulmonary hypertension [HP:0033578],4/4,100%,0/0,0%,,
+Elevated pulmonary artery pressure [HP:0004890],4/4,100%,0/0,0%,,
+Abnormality of pulmonary circulation [HP:0030875],4/4,100%,0/0,0%,,
+Abnormal vascular physiology [HP:0030163],4/4,100%,0/0,0%,,
+Abnormality of the vasculature [HP:0002597],10/10,100%,2/2,100%,,
+Abnormal respiratory system physiology [HP:0002795],4/4,100%,0/0,0%,,
+Abnormality of the respiratory system [HP:0002086],4/4,100%,0/0,0%,,
+Aplasia of the 1st metacarpal [HP:0010035],0/0,0%,0/0,0%,,
+Aplasia of the proximal phalanges of the hand [HP:0010242],0/0,0%,0/0,0%,,
+Aplasia/Hypoplasia of the proximal phalanges of the hand [HP:0009851],0/0,0%,0/0,0%,,
+Abnormal proximal phalanx morphology of the hand [HP:0009834],0/0,0%,0/0,0%,,
+Abnormal finger phalanx morphology [HP:0005918],3/3,100%,0/0,0%,,
+Aplasia/Hypoplasia of the phalanges of the hand [HP:0009767],0/0,0%,0/0,0%,,
+Aplasia of the phalanges of the hand [HP:0009802],0/0,0%,0/0,0%,,
+Aplasia involving bones of the upper limbs [HP:0009823],0/0,0%,0/0,0%,,
+Aplasia involving bones of the extremities [HP:0009825],0/0,0%,0/0,0%,,
+Aplasia of metacarpal bones [HP:0010048],0/0,0%,0/0,0%,,
+Aplasia/Hypoplasia involving the metacarpal bones [HP:0005914],0/0,0%,0/0,0%,,
+Abnormal metacarpal morphology [HP:0005916],0/0,0%,0/0,0%,,
+Aplasia/Hypoplasia of the 1st metacarpal [HP:0010026],0/0,0%,0/0,0%,,
+Abnormal 1st metacarpal morphology [HP:0010009],0/0,0%,0/0,0%,,
+Aplasia/Hypoplasia of the phalanges of the thumb [HP:0009658],0/0,0%,0/0,0%,,
+Partial absence of thumb [HP:0009659],0/0,0%,0/0,0%,,
+1-2 finger syndactyly [HP:0010704],3/3,100%,1/1,100%,,
+Finger syndactyly [HP:0006101],4/4,100%,2/2,100%,,
+Syndactyly [HP:0001159],4/4,100%,2/2,100%,,
+Complete atrioventricular canal defect [HP:0001674],5/37,14%,3/36,8%,,
+Atrioventricular canal defect [HP:0006695],5/5,100%,3/3,100%,,
+Persistent left superior vena cava [HP:0005301],2/37,5%,0/0,0%,,
+Abnormal superior vena cava morphology [HP:0025575],2/2,100%,0/0,0%,,
+Abnormal vena cava morphology [HP:0005345],2/2,100%,0/0,0%,,
+Abnormal morphology of the great vessels [HP:0030962],6/6,100%,2/2,100%,,
+Abnormal blood vessel morphology [HP:0033353],6/6,100%,2/2,100%,,
+Abnormal vascular morphology [HP:0025015],6/6,100%,2/2,100%,,
+Abnormal venous morphology [HP:0002624],2/2,100%,0/0,0%,,
+Abnormal shoulder morphology [HP:0003043],1/1,100%,1/1,100%,,
+Abnormal thorax morphology [HP:0000765],6/6,100%,5/5,100%,,
+Abnormal axial skeleton morphology [HP:0009121],8/8,100%,5/5,100%,,
+Aplasia/hypoplasia of the humerus [HP:0006507],7/7,100%,4/4,100%,,
+Abnormality of the humerus [HP:0003063],7/7,100%,4/4,100%,,
+Abnormality of the upper arm [HP:0001454],7/7,100%,4/4,100%,,
+Right atrial enlargement [HP:0030718],4/4,100%,0/0,0%,,
+Abnormal right atrium morphology [HP:0025580],4/4,100%,0/0,0%,,
+Atrial septal dilatation [HP:0011995],4/4,100%,0/0,0%,,
+Pectus excavatum [HP:0000767],3/4,75%,2/2,100%,,
+Abnormal sternum morphology [HP:0000766],3/3,100%,2/2,100%,,
+Postaxial hand polydactyly [HP:0001162],3/4,75%,0/0,0%,,
+Postaxial polydactyly [HP:0100259],3/3,100%,0/0,0%,,
+Polydactyly [HP:0010442],3/3,100%,0/0,0%,,
+Abnormal 5th finger morphology [HP:0004207],4/4,100%,0/0,0%,,
+Hand polydactyly [HP:0001161],3/3,100%,0/0,0%,,
+Duplication of phalanx of hand [HP:0009997],3/3,100%,0/0,0%,,
+Duplication of hand bones [HP:0004275],3/3,100%,0/0,0%,,
+Duplication of bones involving the upper extremities [HP:0009142],3/3,100%,0/0,0%,,
+High palate [HP:0000218],3/3,100%,0/0,0%,,
+Short neck [HP:0000470],3/3,100%,0/0,0%,,
+Abnormal neck morphology [HP:0025668],3/3,100%,0/0,0%,,
+Abnormality of the neck [HP:0000464],3/3,100%,0/0,0%,,
+Abnormality of the cervical spine [HP:0003319],3/3,100%,0/0,0%,,
+Abnormality of the vertebral column [HP:0000925],4/4,100%,1/1,100%,,
+Shield chest [HP:0000914],3/3,100%,0/0,0%,,
+Enlarged thorax [HP:0100625],3/3,100%,0/0,0%,,
+Abnormal rib cage morphology [HP:0001547],4/4,100%,0/0,0%,,
+Y-shaped metatarsals [HP:0010567],3/3,100%,0/0,0%,,
+Abnormal metatarsal morphology [HP:0001832],3/3,100%,0/0,0%,,
+Abnormal lower limb bone morphology [HP:0040069],3/3,100%,0/0,0%,,
+Abnormality of the lower limb [HP:0002814],3/3,100%,0/0,0%,,
+Abnormal foot morphology [HP:0001760],3/3,100%,0/0,0%,,
+Upper extremity joint dislocation [HP:0030310],0/0,0%,2/2,100%,,
+Joint dislocation [HP:0001373],0/0,0%,2/2,100%,,
+Clinodactyly of the 5th finger [HP:0004209],1/1,100%,0/0,0%,,
+Finger clinodactyly [HP:0040019],1/1,100%,0/0,0%,,
+Clinodactyly [HP:0030084],1/1,100%,0/0,0%,,
+Deviation of finger [HP:0004097],1/1,100%,2/2,100%,,
+Deviation of the 5th finger [HP:0009179],1/1,100%,0/0,0%,,
+Proximal placement of thumb [HP:0009623],0/0,0%,0/0,0%,,
+Deviation of the thumb [HP:0009603],0/0,0%,2/2,100%,,
+Abnormal 2nd finger morphology [HP:0004100],1/1,100%,0/0,0%,,
+Aplasia/Hypoplasia of the 2nd finger [HP:0006264],1/1,100%,0/0,0%,,
+Short middle phalanx of finger [HP:0005819],0/0,0%,0/0,0%,,
+Aplasia/Hypoplasia of the middle phalanges of the hand [HP:0009843],0/0,0%,0/0,0%,,
+Abnormal middle phalanx morphology of the hand [HP:0009833],0/0,0%,0/0,0%,,
+Short phalanx of finger [HP:0009803],0/0,0%,0/0,0%,,
+Short middle phalanx of the 5th finger [HP:0004220],0/0,0%,0/0,0%,,
+Type A brachydactyly [HP:0009370],0/0,0%,0/0,0%,,
+Brachydactyly [HP:0001156],0/0,0%,0/0,0%,,
+Short 5th finger [HP:0009237],0/0,0%,0/0,0%,,
+Aplasia/Hypoplasia of the 5th finger [HP:0006262],0/0,0%,0/0,0%,,
+Aplasia/Hypoplasia of the middle phalanx of the 5th finger [HP:0009161],0/0,0%,0/0,0%,,
+Aplasia/Hypoplasia of the phalanges of the 5th finger [HP:0009376],0/0,0%,0/0,0%,,
+Abnormal 5th finger phalanx morphology [HP:0004213],0/0,0%,0/0,0%,,
+Abnormality of the middle phalanx of the 5th finger [HP:0004219],0/0,0%,0/0,0%,,
+Congenital malformation of the great arteries [HP:0011603],4/4,100%,2/2,100%,,
+Aplasia involving forearm bones [HP:0009822],7/7,100%,6/6,100%,,
+Absent forearm bone [HP:0003953],7/7,100%,6/6,100%,,
+Aplasia/Hypoplasia of the ulna [HP:0006495],2/2,100%,2/2,100%,,
+Abnormal morphology of ulna [HP:0040071],2/2,100%,4/4,100%,,
+Hypoplasia of deltoid muscle [HP:0030241],0/0,0%,0/0,0%,,
+Shoulder muscle hypoplasia [HP:0008952],0/0,0%,0/0,0%,,
+Hypoplasia of the musculature [HP:0009004],0/0,0%,0/0,0%,,
+Aplasia/Hypoplasia involving the skeletal musculature [HP:0001460],0/0,0%,0/0,0%,,
+Abnormality of muscle size [HP:0030236],0/0,0%,0/0,0%,,
+Abnormal skeletal muscle morphology [HP:0011805],2/2,100%,0/0,0%,,
+Abnormality of the musculature [HP:0003011],2/2,100%,0/0,0%,,
+Aplasia/Hypoplasia involving the shoulder musculature [HP:0001464],0/0,0%,0/0,0%,,
+Aplasia/Hypoplasia involving the musculature of the upper limbs [HP:0001467],0/0,0%,0/0,0%,,
+Aplasia/Hypoplasia involving the musculature of the extremities [HP:0009128],0/0,0%,0/0,0%,,
+Abnormality of the musculature of the limbs [HP:0009127],2/2,100%,0/0,0%,,
+Abnormality of the musculature of the upper limbs [HP:0001446],2/2,100%,0/0,0%,,
+Abnormality of the shoulder girdle musculature [HP:0001435],0/0,0%,0/0,0%,,
+Atrioventricular dissociation [HP:0011709],0/22,0%,1/1,100%,,
+Abnormal toe morphology [HP:0001780],0/0,0%,0/0,0%,,
+Abnormal toe phalanx morphology [HP:0010161],0/0,0%,0/0,0%,,
+Abnormality of the distal phalanges of the toes [HP:0010182],0/0,0%,0/0,0%,,
+Patent foramen ovale [HP:0001655],4/40,10%,0/36,0%,,
+Synostosis of joints [HP:0100240],1/1,100%,1/1,100%,,
+Short 1st metacarpal [HP:0010034],0/30,0%,0/22,0%,,
+Short phalanx of the thumb [HP:0009660],0/30,0%,0/22,0%,,
+11 pairs of ribs [HP:0000878],1/1,100%,0/0,0%,,
+Missing ribs [HP:0000921],1/1,100%,0/0,0%,,
+Aplasia/Hypoplasia of the ribs [HP:0006712],1/1,100%,0/0,0%,,
+Aplasia/Hypoplasia involving bones of the thorax [HP:0006711],1/1,100%,2/2,100%,,
+Aplasia/hypoplasia affecting bones of the axial skeleton [HP:0009122],2/2,100%,2/2,100%,,
+Abnormal rib morphology [HP:0000772],1/1,100%,0/0,0%,,
+First degree atrioventricular block [HP:0011705],0/22,0%,1/1,100%,,
+Micrognathia [HP:0000347],1/1,100%,1/1,100%,,
+Aplasia/Hypoplasia of the mandible [HP:0009118],1/1,100%,1/1,100%,,
+Aplasia/Hypoplasia involving bones of the skull [HP:0009116],1/1,100%,1/1,100%,,
+Abnormal skull morphology [HP:0000929],1/1,100%,2/2,100%,,
+Abnormal mandible morphology [HP:0000277],1/1,100%,1/1,100%,,
+Abnormal jaw morphology [HP:0030791],1/1,100%,1/1,100%,,
+Abnormal facial skeleton morphology [HP:0011821],1/1,100%,1/1,100%,,
+Bowed forearm bones [HP:0003956],0/0,0%,1/1,100%,,
+Bowing of the arm [HP:0006488],0/0,0%,1/1,100%,,
+Bowing of the long bones [HP:0006487],0/0,0%,1/1,100%,,
+Abnormal diaphysis morphology [HP:0000940],0/0,0%,1/1,100%,,
+Common atrium [HP:0011565],0/83,0%,0/38,0%,,
+Unroofed coronary sinus [HP:0031297],0/85,0%,0/38,0%,,
+Amelia involving the upper limbs [HP:0009812],0/83,0%,1/37,3%,,
+Third degree atrioventricular block [HP:0001709],0/22,0%,1/1,100%,,
+Small hypothenar eminence [HP:0010487],2/2,100%,0/0,0%,,
+Abnormality of the hypothenar eminence [HP:0010486],2/2,100%,0/0,0%,,
+Abnormality of the musculature of the hand [HP:0001421],2/2,100%,0/0,0%,,
+Hypoplastic scapulae [HP:0000882],1/1,100%,1/1,100%,,
+Aplasia/Hypoplasia of the scapulae [HP:0006713],1/1,100%,1/1,100%,,
+Abnormal scapula morphology [HP:0000782],1/1,100%,1/1,100%,,
+Left ventricular noncompaction cardiomyopathy [HP:0011664],0/1,0%,1/5,20%,,
+Noncompaction cardiomyopathy [HP:0012817],0/0,0%,1/1,100%,,
+Cardiomyopathy [HP:0001638],0/0,0%,1/1,100%,,
+Abnormal myocardium morphology [HP:0001637],0/0,0%,1/1,100%,,
+Sinus bradycardia [HP:0001688],0/0,0%,1/1,100%,,
+Abnormal electrophysiology of sinoatrial node origin [HP:0011702],0/0,0%,1/1,100%,,
+Arrhythmia [HP:0011675],1/1,100%,1/1,100%,,
+Bradycardia [HP:0001662],0/0,0%,1/1,100%,,
+Abnormal skin morphology [HP:0011121],0/0,0%,1/1,100%,,
+Abnormality of the skin [HP:0000951],0/0,0%,1/1,100%,,
+Abnormality of the integument [HP:0001574],0/0,0%,1/1,100%,,
+Sinus venosus atrial septal defect [HP:0011567],0/2,0%,1/1,100%,,
diff --git a/docs/report/tbx5_mtc_report.html b/docs/report/tbx5_frameshift_vs_missense.mtc_report.html
similarity index 91%
rename from docs/report/tbx5_mtc_report.html
rename to docs/report/tbx5_frameshift_vs_missense.mtc_report.html
index 175a0245..78345301 100644
--- a/docs/report/tbx5_mtc_report.html
+++ b/docs/report/tbx5_frameshift_vs_missense.mtc_report.html
@@ -48,9 +48,9 @@
   <h1>Phenotype testing report</h1>
   <p>Phenotype MTC filter: <em>HPO MTC filter</em></p>
   <p>Multiple testing correction: <em>fdr_bh</em></p>
-  <p>Performed statistical tests for 15 out of the total of 259 HPO terms.</p>
+  <p>Performed statistical tests for 16 out of the total of 259 HPO terms.</p>
     <table>
-      <caption>Using <em>HPO MTC filter</em>, 244 term(s) were omitted from statistical analysis.</caption>
+      <caption>Using <em>HPO MTC filter</em>, 243 term(s) were omitted from statistical analysis.</caption>
         <tbody>
           <tr>
             <th>Code</th>
@@ -61,15 +61,15 @@ <h1>Phenotype testing report</h1>
           <tr>
             <!-- TODO: plug the real reason code here -->
             <td>TODO</td>
-            <td>Skipping term because all genotypes have same HPO observed proportions</td>
-            <td>42</td>
+            <td>Skipping general term</td>
+            <td>43</td>
           </tr>
           
           <tr>
             <!-- TODO: plug the real reason code here -->
             <td>TODO</td>
-            <td>Skipping general term</td>
-            <td>41</td>
+            <td>Skipping term because all genotypes have same HPO observed proportions</td>
+            <td>42</td>
           </tr>
           
           <tr>
@@ -135,13 +135,6 @@ <h1>Phenotype testing report</h1>
             <td>3</td>
           </tr>
           
-          <tr>
-            <!-- TODO: plug the real reason code here -->
-            <td>TODO</td>
-            <td>Skipping non-target term</td>
-            <td>3</td>
-          </tr>
-          
           <tr>
             <!-- TODO: plug the real reason code here -->
             <td>TODO</td>
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
index 9fad88dd..a2bc3230 100644
--- a/docs/tutorial.rst
+++ b/docs/tutorial.rst
@@ -9,16 +9,16 @@ The tutorial illustrates just one of the many ways GPSEA can be used to characte
 The :ref:`user-guide` contains details about additional methods and functionalities.
 
 
-The tutorial presents an analysis of a cohort of individuals with pathogenic variants in *TBX5* leading to 
-`Holt-Oram syndrome MIM:142900 <https://omim.org/entry/142900>`_. 
+The tutorial presents an analysis of a cohort of individuals with pathogenic variants in *TBX5* leading to
+`Holt-Oram syndrome MIM:142900 <https://omim.org/entry/142900>`_.
 
-Holt-Oram syndrome is an autosomal dominant disorder characterized by 
+Holt-Oram syndrome is an autosomal dominant disorder characterized by
 upper limb defects, congenital heart defects, and arrhythmias (`PMID:38336121 <https://pubmed.ncbi.nlm.nih.gov/38336121/>`_).
 It has been observed in the literature that congenital defects of the ventricular and atrial septum are more
 common in the truncating than in the missense variants (`PMID:30552424 <https://pubmed.ncbi.nlm.nih.gov/30552424/>`_).
 Additionally, upper limb defects are more frequent in patients with protein-truncating variants (`PMID:38336121 <https://pubmed.ncbi.nlm.nih.gov/38336121/>`_).
 
-To perform the analysis, we curated the literature and created on `GA4GH phenopacket <https://pubmed.ncbi.nlm.nih.gov/35705716/>`_ for each 
+To perform the analysis, we curated the literature and created on `GA4GH phenopacket <https://pubmed.ncbi.nlm.nih.gov/35705716/>`_ for each
 affected individual. The phenopackets are made available in `Phenopacket Store <https://github.com/monarch-initiative/phenopacket-store>`_.
 
 
@@ -26,9 +26,9 @@ affected individual. The phenopackets are made available in `Phenopacket Store <
 The analysis
 ~~~~~~~~~~~~
 
-For the analysis, the `MANE <https://www.ncbi.nlm.nih.gov/refseq/MANE/>`_ transcript 
+For the analysis, the `MANE <https://www.ncbi.nlm.nih.gov/refseq/MANE/>`_ transcript
 (i.e., the "main" biomedically relevant transcript of a gene) should be chosen unless
-there is a specific reason not to (which should occur rarely if at all). 
+there is a specific reason not to (which should occur rarely if at all).
 
 In the case of *TBX5* the MANE transcript is `NM_181486.4`. Note that the trascript identifier (`NM_181486`) and the version (`4`) are both required.
 A good way to find the MANE transcript is to search on the gene symbol (e.g., *TBX5*) in `ClinVar <https://www.ncbi.nlm.nih.gov/clinvar/>`_ and to
@@ -49,7 +49,7 @@ corresponding protein accession `NP_852259.1`.
 Load HPO
 ^^^^^^^^
 
-GPSEA needs HPO to do the analysis. 
+GPSEA needs HPO to do the analysis.
 We use HPO toolkit to load HPO version `v2023-10-09`:
 
 >>> import hpotk
@@ -75,13 +75,13 @@ and stored in `Phenopacket Store <https://github.com/monarch-initiative/phenopac
 156
 
 We loaded 156 phenopackets which need further preprocessing to prepare for the analysis.
-We will compute functional annotations for the mutations and then include the individuals into 
+We will compute functional annotations for the mutations and then include the individuals into
 a :class:`~gpsea.model.Cohort`:
 
 >>> from gpsea.preprocessing import configure_caching_cohort_creator, load_phenopackets
 >>> cohort_creator = configure_caching_cohort_creator(hpo)
 >>> cohort, validation = load_phenopackets(  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
-...     phenopackets=phenopackets, 
+...     phenopackets=phenopackets,
 ...     cohort_creator=cohort_creator,
 ... )
 Patients Created: ...
@@ -96,7 +96,7 @@ We loaded the patient data into a `cohort` which is ready for the next steps.
 
 .. seealso::
 
-  Here we show how to create a :class:`~gpsea.model.Cohort` from phenopackets. 
+  Here we show how to create a :class:`~gpsea.model.Cohort` from phenopackets.
   See :ref:`input-data` section to learn how to create a cohort from another inputs.
 
 
@@ -113,7 +113,7 @@ We can now explore the cohort to see how many patients are included.
 
 .. raw:: html
   :file: report/tbx5_cohort_info.html
-  
+
 .. note::
 
   The report can also be displayed directly in a Jupyter notebook by running::
@@ -123,14 +123,14 @@ We can now explore the cohort to see how many patients are included.
 
 Now we can show the distribution of variants with respect to the encoded protein.
 We first obtain `tx_coordinates` (:class:`~gpsea.model.TranscriptCoordinates`)
-and `protein_meta` (:class:`~gpsea.model.ProteinMetadata`) 
+and `protein_meta` (:class:`~gpsea.model.ProteinMetadata`)
 with information about the transcript and protein "anatomy":
 
 >>> from gpsea.model.genome import GRCh38
 >>> from gpsea.preprocessing import configure_protein_metadata_service, VVMultiCoordinateService
 >>> txc_service = VVMultiCoordinateService(genome_build=GRCh38)
 >>> pms = configure_protein_metadata_service()
->>> tx_coordinates = txc_service.fetch(tx_id) 
+>>> tx_coordinates = txc_service.fetch(tx_id)
 >>> protein_meta = pms.annotate(px_id)
 
 and we follow with plotting the diagram of the mutations on the protein:
@@ -158,7 +158,7 @@ Prepare genotype and phenotype predicates
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 We will create a predicate to bin patients into group
-depending on presence of a missense and frameshift variant to test 
+depending on presence of a missense and frameshift variant to test
 if there is a difference between frameshift and non-frameshift variants
 in the individuals of the *TBX5* cohort.
 
@@ -176,74 +176,99 @@ in the individuals of the *TBX5* cohort.
 
 .. note::
 
-  There are many other ways to set up a predicate for testing 
+  There are many other ways to set up a predicate for testing
   for a GP correlation.
   See the :ref:`predicates` section to learn more about building
   a predicate of interest.
 
+The phenotype grouping is based on presence or absence of an HPO term.
+We take advantage of the ontology "true path rule" to infer presence
+of the ancestor terms for all present HPO terms
+(e.g. presence of `Abnormal ventricular septum morphology <https://hpo.jax.org/browse/term/HP:0010438>`_
+in an individual annotated with `Ventricular septal defect <https://hpo.jax.org/browse/term/HP:0001629>`_)
+and exclusion of the descendant terms for all excluded terms (e.g. exclusion of
+`Motor seizure <https://hpo.jax.org/browse/term/HP:0020219>`_
+in an individual where `Seizure <https://hpo.jax.org/browse/term/HP:0001250>`_
+was excluded):
+
+>>> from gpsea.analysis.predicate.phenotype import prepare_predicates_for_terms_of_interest
+>>> pheno_predicates = prepare_predicates_for_terms_of_interest(
+...     cohort=cohort,
+...     hpo=hpo,
+...     min_n_of_patients_with_term=2,
+... )
 
-By default, GPSEA will perform one hypothesis test for each HPO term used to annotate more than one individual in the cohort.
-This also includes the terms implied by the ontology "true path rule", 
-which states that presence of a term 
-(e.g., `Ventricular septal defect <https://hpo.jax.org/browse/term/HP:0001629>`_)
-implies presence of all its ancestor terms 
-(e.g., `Abnormal ventricular septum morphology <https://hpo.jax.org/browse/term/HP:0010438>`_, 
-`Abnormal cardiac septum morphology <https://hpo.jax.org/browse/term/HP:0001671>`_,
-`Abnormal cardiac ventricle morphology <https://hpo.jax.org/browse/term/HP:0001713>`_, ...).
-However, testing multiple hypothesis increases the chance of receiving false positive result,
-and multiple testing correction must be applied.
-See :ref:`mtc` for information about how to perform multiple testing correction with GPSEA. 
+By default, GPSEA will perform one hypothesis test for each HPO term used to annotate two or more individuals in the cohort
+(see ``min_n_of_patients_with_term=2`` above).
+Testing multiple hypothesis on the same dataset increases the chance of receiving false positive result.
+However, GPSEA simplifies the application of an appropriate multiple testing correction.
 
 For general use, we recommend using a combination
 of a *Phenotype MTC filter* (:class:`~gpsea.analysis.PhenotypeMtcFilter`) with a *multiple testing correction*.
-Phenotype MTC filter chooses the HPO terms to test according to several heuristics, which 
+Phenotype MTC filter chooses the HPO terms to test according to several heuristics, which
 reduce the multiple testing burden and focus the analysis
 on the most interesting terms (see :ref:`HPO MTC filter <hpo-mtc-filter-strategy>` for more info).
 Then the multiple testing correction, such as Bonferroni or Benjamini-Hochberg,
 is used to control the family-wise error rate or the false discovery rate.
+See :ref:`mtc` for more information.
 
-Here we use HPO MTC filter (:meth:`~gpsea.analysis.CohortAnalysisConfiguration.hpo_mtc_strategy`)
-along with Benjamini-Hochberg procedure (:meth:`~gpsea.analysis.CohortAnalysisConfiguration.pval_correction`):
+In this example, we will use a combination of the HPO MTC filter (:class:`~gpsea.analysis.mtc_filter.HpoMtcFilter`)
+with Benjamini-Hochberg procedure (``mtc_correction='fdr_bh'``)
+with a false discovery control level at (``mtc_alpha=0.05``):
 
->>> from gpsea.analysis import configure_cohort_analysis, CohortAnalysisConfiguration
->>> config = CohortAnalysisConfiguration()
->>> config.hpo_mtc_strategy()
->>> config.pval_correction = 'fdr_bh'
->>> analysis = configure_cohort_analysis(
-...     cohort=cohort,
-...     hpo=hpo,
-...     config=config,
+>>> from gpsea.analysis.mtc_filter import HpoMtcFilter
+>>> mtc_filter = HpoMtcFilter.default_filter(hpo, term_frequency_threshold=0.2)
+>>> mtc_correction = 'fdr_bh'
+>>> mtc_alpha = 0.05
+
+Choosing the statistical procedure for assessment of association between genotype and phenotype
+groups is the last missing piece of the analysis. We will use Fisher Exact Test:
+
+>>> from gpsea.analysis.pcats.stats import ScipyFisherExact
+>>> count_statistic = ScipyFisherExact()
+
+and we finalize the analysis setup by putting all components together
+into :class:`~gpsea.analysis.pcats.HpoTermAnalysis`:
+
+>>> from gpsea.analysis.pcats import HpoTermAnalysis
+>>> analysis = HpoTermAnalysis(
+...     count_statistic=count_statistic,
+...     mtc_filter=mtc_filter,
+...     mtc_correction=mtc_correction,
+...     mtc_alpha=mtc_alpha,
 ... )
 
 Now we can perform the analysis and investigate the results.
 
->>> result = analysis.compare_genotype_vs_cohort_phenotypes(gt_predicate)
+>>> result = analysis.compare_genotype_vs_phenotypes(
+...     cohort=cohort,
+...     gt_predicate=gt_predicate,
+...     pheno_predicates=pheno_predicates,
+... )
 >>> result.total_tests
 16
 
 We only tested 16 HPO terms. This is despite the individuals being collectively annotated with
 259 direct and indirect HPO terms
 
->>> from gpsea.analysis import prepare_hpo_terms_of_interest
->>> terms = prepare_hpo_terms_of_interest(hpo, cohort.all_patients, min_n_of_patients_with_term=2)
->>> len(terms)
+>>> len(result.phenotypes)
 259
 
 We can show the reasoning behind *not* testing 243 (`259 - 16`) HPO terms
 by exploring the phenotype MTC filtering report.
 
 >>> from gpsea.view import MtcStatsViewer
->>> mtc_viewer = MtcStatsViewer() 
->>> mtc_report = mtc_viewer.process(result.mtc_filter_report)
->>> with open('docs/report/tbx5_mtc_report.html', 'w') as fh:  # doctest: +SKIP
+>>> mtc_viewer = MtcStatsViewer()
+>>> mtc_report = mtc_viewer.process(result)
+>>> with open('docs/report/tbx5_frameshift_vs_missense.mtc_report.html', 'w') as fh:  # doctest: +SKIP
 ...     _ = fh.write(mtc_report)
 
 .. raw:: html
-  :file: report/tbx5_mtc_report.html
+  :file: report/tbx5_frameshift_vs_missense.mtc_report.html
 
 ..
-  
-  TODO: 
+
+  TODO:
   Show how to write out the tested HPO terms.
 
 and these are the HPO terms ordered by the p value corrected with the Benjamini-Hochberg procedure:
@@ -259,8 +284,8 @@ and these are the HPO terms ordered by the p value corrected with the Benjamini-
 We see that several HPO terms are significantly associated
 with presence of a frameshift variant in *TBX5*.
 For example, `Ventricular septal defect <https://hpo.jax.org/browse/term/HP:0001629>`_
-was observed in 31/60 (52%) patients with a missense variant 
+was observed in 31/60 (52%) patients with a missense variant
 but it was observed in 19/19 (100%) patients with a frameshift variant.
-Fisher exact test computed a p value of `~0.0000562` 
-and the p value corrected by Benjamini-Hochberg procedure 
+Fisher exact test computed a p value of `~0.0000562`
+and the p value corrected by Benjamini-Hochberg procedure
 is `~0.00112`.
diff --git a/docs/user-guide/glossary.rst b/docs/user-guide/glossary.rst
new file mode 100644
index 00000000..bf8f9d90
--- /dev/null
+++ b/docs/user-guide/glossary.rst
@@ -0,0 +1,89 @@
+.. _glossary:
+
+========
+Glossary
+========
+
+The glossary summarizes several frequently used concepts.
+
+.. _true-path-rule:
+
+True path rule
+~~~~~~~~~~~~~~
+
+The true path rule of ontologies states that an item (e.g. an individual) annotated with an ontology term
+(e.g. `Focal-onset seizure <https://hpo.jax.org/browse/term/HP:0007359>`_)
+is implicitly annotated with all its *ancestor* terms
+(`Seizure <https://hpo.jax.org/browse/term/HP:0001250>`_,
+`Abnormal nervous system physiology <https://hpo.jax.org/browse/term/HP:0012638>`_, ...).
+Conversely, exclusion of a term (e.g. `Abnormal ventricular septum morphology <https://hpo.jax.org/browse/term/HP:0010438>`_)
+implies exclusion of all its *descendants*
+(`Ventricular septal defect <https://hpo.jax.org/browse/term/HP:0001629>`_,
+`Ventricular septal aneurysm <https://hpo.jax.org/browse/term/HP:0030957>`_, ...).
+
+
+.. _length-of-the-reference-allele:
+
+Length of the reference allele
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The length of the REF allele corresponds to the length of the genomic region affected by the variant.
+
+Let's show a few examples.
+
+>>> from gpsea.model import VariantCoordinates
+>>> from gpsea.model.genome import GRCh38
+>>> chr1 = GRCh38.contig_by_name("chr1")
+
+The length of the reference allele of a missense variant is 1
+because the variant affects a 1-bp region spanned by the ``C`` nucleotide:
+
+>>> missense = VariantCoordinates.from_vcf_literal(chr1, 1001, 'C', 'T')
+>>> len(missense)
+1
+
+The length of a "small" deletion is the same as the length of the ref allele `str`:
+(``'CCC'`` in the example below):
+
+>>> deletion = VariantCoordinates.from_vcf_literal(chr1, 1001, 'CCC', 'C')
+>>> len(deletion)
+3
+
+This is because the literal notation spells out the alleles.
+However, this simple rule does not apply in symbolic notation.
+Here, the REF length corresponds to the length of the allele region.
+
+For instance, for the following structural variant::
+
+    #CHROM   POS    ID   REF  ALT     QUAL   FILTER   INFO
+    1        1001   .    C    <DEL>   6      PASS     SVTYPE=DEL;END=1003;SVLEN=-3
+
+the length of the REF allele is `3`:
+
+>>> sv_deletion = VariantCoordinates.from_vcf_symbolic(
+...     chr1, pos=1001, end=1003,
+...     ref='C', alt='<DEL>', svlen=-3,
+... )
+>>> len(sv_deletion)
+3
+
+because the deletion removes 3 base pairs at the coordinates :math:`[1001, 1003]`.
+
+
+.. _change-length-of-an-allele:
+
+Change length of an allele
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Change length is defined as the difference between lengths of the *alt* and *ref* alleles.
+SNVs lead to change length of zero, deletions and insertions/duplications lead to negative
+and positive change lengths, respectively. See the table below for examples.
+
+==================  ==================  ===============
+ Reference allele    Alternate allele    Change length
+==================  ==================  ===============
+ C                   T                    0
+ CG                  AT                   0
+ ACTG                A                   -3
+ A                   AAT                  2
+==================  ==================  ===============
diff --git a/docs/user-guide/index.rst b/docs/user-guide/index.rst
index 923b1834..9f7933ee 100644
--- a/docs/user-guide/index.rst
+++ b/docs/user-guide/index.rst
@@ -13,6 +13,7 @@ TODO - write high level overview and bridge to individual sections.
   input-data
   exploratory
   predicates
-  mtc
   stats
+  mtc
+  glossary
   autosomal_recessive
diff --git a/docs/user-guide/mtc.rst b/docs/user-guide/mtc.rst
index 30ee95df..7aa3e23e 100644
--- a/docs/user-guide/mtc.rst
+++ b/docs/user-guide/mtc.rst
@@ -104,6 +104,8 @@ This is how we can set an alternative MTC correction procedure:
 'fdr_bh'
 
 
+.. _mtc-filters:
+
 MTC filters: Choosing which terms to test
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/docs/user-guide/report/tbx5_frameshift.csv b/docs/user-guide/report/tbx5_frameshift.csv
new file mode 100644
index 00000000..d794cec4
--- /dev/null
+++ b/docs/user-guide/report/tbx5_frameshift.csv
@@ -0,0 +1,262 @@
+FRAMESHIFT_VARIANT on NM_181486.4,Yes,Yes,No,No,,
+,Count,Percent,Count,Percent,Corrected p values,p values
+Ventricular septal defect [HP:0001629],19/19,100%,42/71,59%,0.004112753923262261,0.00024192670136836828
+Abnormal atrioventricular conduction [HP:0005150],3/3,100%,1/23,4%,0.013076923076923076,0.0015384615384615385
+Absent thumb [HP:0009777],14/31,45%,18/100,18%,0.021044138590779585,0.0037136715160199273
+Atrioventricular block [HP:0001678],2/2,100%,1/23,4%,0.034,0.010000000000000002
+Heart block [HP:0012722],2/2,100%,1/23,4%,0.034,0.010000000000000002
+Patent ductus arteriosus [HP:0001643],2/2,100%,6/40,15%,0.09214092140921408,0.03252032520325203
+Secundum atrial septal defect [HP:0001684],4/22,18%,23/55,42%,0.1440020479198931,0.06544319142266644
+Triphalangeal thumb [HP:0001199],13/32,41%,23/99,23%,0.1440020479198931,0.06932119159387057
+Cardiac conduction abnormality [HP:0031546],3/3,100%,15/37,41%,0.1440020479198931,0.08259109311740892
+Muscular ventricular septal defect [HP:0011623],6/25,24%,8/84,10%,0.1440020479198931,0.08470708701170182
+Pulmonary arterial hypertension [HP:0002092],0/2,0%,8/14,57%,0.6899307928951144,0.4666666666666667
+Short thumb [HP:0009778],8/30,27%,25/69,36%,0.6899307928951144,0.4870099714553749
+Absent radius [HP:0003974],6/25,24%,9/43,21%,1.0,0.7703831604944444
+Atrial septal defect [HP:0001631],20/20,100%,63/65,97%,1.0,1.0
+Hypoplasia of the radius [HP:0002984],6/14,43%,34/75,45%,1.0,1.0
+Short humerus [HP:0005792],4/9,44%,8/21,38%,1.0,1.0
+Hypoplasia of the ulna [HP:0003022],2/10,20%,3/17,18%,1.0,1.0
+Aplasia/Hypoplasia of the thumb [HP:0009601],19/19,100%,40/40,100%,,
+Aplasia/Hypoplasia of fingers [HP:0006265],19/19,100%,44/44,100%,,
+Aplasia/hypoplasia involving bones of the hand [HP:0005927],19/19,100%,44/44,100%,,
+Aplasia/hypoplasia involving bones of the upper limbs [HP:0006496],22/22,100%,78/78,100%,,
+Aplasia/hypoplasia involving bones of the extremities [HP:0045060],22/22,100%,78/78,100%,,
+Aplasia/hypoplasia of the extremities [HP:0009815],22/22,100%,78/78,100%,,
+Abnormality of limbs [HP:0040064],34/34,100%,102/102,100%,,
+Phenotypic abnormality [HP:0000118],38/38,100%,114/114,100%,,
+All [HP:0000001],38/38,100%,114/114,100%,,
+Aplasia/hypoplasia involving the skeleton [HP:0009115],23/23,100%,80/80,100%,,
+Abnormal skeletal morphology [HP:0011842],35/35,100%,103/103,100%,,
+Abnormality of the skeletal system [HP:0000924],35/35,100%,103/103,100%,,
+Abnormality of the musculoskeletal system [HP:0033127],35/35,100%,104/104,100%,,
+Abnormal limb bone morphology [HP:0002813],34/34,100%,92/92,100%,,
+Abnormality of limb bone [HP:0040068],34/34,100%,92/92,100%,,
+Abnormal appendicular skeleton morphology [HP:0011844],34/34,100%,93/93,100%,,
+Abnormality of the upper limb [HP:0002817],34/34,100%,102/102,100%,,
+Abnormal hand morphology [HP:0005922],20/20,100%,75/75,100%,,
+Abnormality of the hand [HP:0001155],31/31,100%,88/88,100%,,
+Abnormal finger morphology [HP:0001167],31/31,100%,64/64,100%,,
+Abnormal digit morphology [HP:0011297],33/33,100%,67/67,100%,,
+Abnormal thumb morphology [HP:0001172],31/31,100%,58/58,100%,,
+Short finger [HP:0009381],8/8,100%,27/27,100%,,
+Short digit [HP:0011927],10/10,100%,28/28,100%,,
+Abnormality of thumb phalanx [HP:0009602],13/13,100%,26/26,100%,,
+Congenital malformation of the great arteries [HP:0011603],2/2,100%,7/7,100%,,
+Abnormal morphology of the great vessels [HP:0030962],2/2,100%,10/10,100%,,
+Abnormal blood vessel morphology [HP:0033353],2/2,100%,11/11,100%,,
+Abnormal vascular morphology [HP:0025015],2/2,100%,11/11,100%,,
+Abnormal cardiovascular system morphology [HP:0030680],30/30,100%,92/92,100%,,
+Abnormality of the cardiovascular system [HP:0001626],32/32,100%,94/94,100%,,
+Abnormality of the vasculature [HP:0002597],2/2,100%,17/17,100%,,
+Aplasia of the 1st metacarpal [HP:0010035],0/0,0%,3/3,100%,,
+Aplasia of the proximal phalanges of the hand [HP:0010242],0/0,0%,3/3,100%,,
+Aplasia/Hypoplasia of the proximal phalanges of the hand [HP:0009851],0/0,0%,3/3,100%,,
+Abnormal proximal phalanx morphology of the hand [HP:0009834],0/0,0%,3/3,100%,,
+Abnormal finger phalanx morphology [HP:0005918],0/0,0%,9/9,100%,,
+Aplasia/Hypoplasia of the phalanges of the hand [HP:0009767],0/0,0%,6/6,100%,,
+Aplasia of the phalanges of the hand [HP:0009802],0/0,0%,3/3,100%,,
+Aplasia involving bones of the upper limbs [HP:0009823],0/0,0%,3/3,100%,,
+Aplasia involving bones of the extremities [HP:0009825],0/0,0%,3/3,100%,,
+Finger aplasia [HP:0009380],14/14,100%,23/23,100%,,
+Aplasia of metacarpal bones [HP:0010048],0/0,0%,3/3,100%,,
+Aplasia/Hypoplasia involving the metacarpal bones [HP:0005914],0/0,0%,4/4,100%,,
+Abnormal metacarpal morphology [HP:0005916],0/0,0%,4/4,100%,,
+Abnormal upper limb bone morphology [HP:0040070],14/14,100%,50/50,100%,,
+Aplasia/Hypoplasia of the 1st metacarpal [HP:0010026],0/0,0%,4/4,100%,,
+Abnormal 1st metacarpal morphology [HP:0010009],0/0,0%,4/4,100%,,
+Aplasia/Hypoplasia of the phalanges of the thumb [HP:0009658],0/0,0%,4/4,100%,,
+Partial absence of thumb [HP:0009659],0/0,0%,3/3,100%,,
+Abnormal atrial septum morphology [HP:0011994],20/20,100%,64/64,100%,,
+Abnormal cardiac septum morphology [HP:0001671],28/28,100%,89/89,100%,,
+Abnormal heart morphology [HP:0001627],30/30,100%,89/89,100%,,
+Abnormal cardiac atrium morphology [HP:0005120],20/20,100%,64/64,100%,,
+Perimembranous ventricular septal defect [HP:0011682],3/25,12%,6/84,7%,,
+Abnormal ventricular septum morphology [HP:0010438],19/19,100%,42/42,100%,,
+Abnormal cardiac ventricle morphology [HP:0001713],19/19,100%,43/43,100%,,
+Forearm undergrowth [HP:0009821],7/7,100%,35/35,100%,,
+Upper limb undergrowth [HP:0009824],7/7,100%,38/38,100%,,
+Limb undergrowth [HP:0009826],7/7,100%,38/38,100%,,
+Aplasia/hypoplasia involving forearm bones [HP:0006503],12/12,100%,43/43,100%,,
+Abnormal forearm bone morphology [HP:0040072],14/14,100%,43/43,100%,,
+Abnormal forearm morphology [HP:0002973],14/14,100%,43/43,100%,,
+Short long bone [HP:0003026],9/9,100%,41/41,100%,,
+Abnormal long bone morphology [HP:0011314],13/13,100%,50/50,100%,,
+Aplasia/Hypoplasia of the radius [HP:0006501],11/11,100%,43/43,100%,,
+Abnormal morphology of the radius [HP:0002818],13/13,100%,43/43,100%,,
+Abnormal carpal morphology [HP:0001191],0/0,0%,30/32,94%,,
+Abnormality of the wrist [HP:0003019],0/0,0%,30/30,100%,,
+Abnormality of upper limb joint [HP:0009810],6/6,100%,32/32,100%,,
+Abnormal joint morphology [HP:0001367],6/6,100%,33/33,100%,,
+Persistent left superior vena cava [HP:0005301],0/0,0%,4/39,10%,,
+Abnormal superior vena cava morphology [HP:0025575],0/0,0%,4/4,100%,,
+Abnormal vena cava morphology [HP:0005345],0/0,0%,4/4,100%,,
+Abnormal venous morphology [HP:0002624],0/0,0%,4/4,100%,,
+Tricuspid regurgitation [HP:0005180],0/0,0%,5/5,100%,,
+Atrioventricular valve regurgitation [HP:0034376],2/2,100%,7/7,100%,,
+Abnormal atrioventricular valve physiology [HP:0031650],2/2,100%,7/7,100%,,
+Abnormal heart valve physiology [HP:0031653],2/2,100%,7/7,100%,,
+Abnormal cardiovascular system physiology [HP:0011025],5/5,100%,30/30,100%,,
+Abnormal tricuspid valve physiology [HP:0031651],0/0,0%,5/5,100%,,
+Micrognathia [HP:0000347],1/1,100%,1/1,100%,,
+Aplasia/Hypoplasia of the mandible [HP:0009118],1/1,100%,1/1,100%,,
+Aplasia/Hypoplasia involving bones of the skull [HP:0009116],1/1,100%,1/1,100%,,
+Aplasia/hypoplasia affecting bones of the axial skeleton [HP:0009122],2/2,100%,3/3,100%,,
+Abnormal axial skeleton morphology [HP:0009121],5/5,100%,9/9,100%,,
+Abnormal skull morphology [HP:0000929],2/2,100%,1/1,100%,,
+Abnormality of the head [HP:0000234],2/2,100%,5/5,100%,,
+Abnormality of head or neck [HP:0000152],2/2,100%,5/5,100%,,
+Abnormal mandible morphology [HP:0000277],1/1,100%,1/1,100%,,
+Abnormal jaw morphology [HP:0030791],1/1,100%,1/1,100%,,
+Abnormal facial skeleton morphology [HP:0011821],1/1,100%,1/1,100%,,
+Cleft soft palate [HP:0000185],0/0,0%,2/2,100%,,
+Abnormal soft palate morphology [HP:0100736],0/0,0%,2/2,100%,,
+Abnormal palate morphology [HP:0000174],0/0,0%,5/5,100%,,
+Abnormal oral cavity morphology [HP:0000163],1/1,100%,5/5,100%,,
+Abnormal oral morphology [HP:0031816],1/1,100%,5/5,100%,,
+Abnormality of the mouth [HP:0000153],1/1,100%,5/5,100%,,
+Abnormality of the face [HP:0000271],1/1,100%,5/5,100%,,
+Cleft palate [HP:0000175],0/0,0%,2/2,100%,,
+Orofacial cleft [HP:0000202],0/0,0%,2/2,100%,,
+Craniofacial cleft [HP:5201015],0/0,0%,2/2,100%,,
+Aplasia involving forearm bones [HP:0009822],6/6,100%,9/9,100%,,
+Absent forearm bone [HP:0003953],6/6,100%,9/9,100%,,
+Aplasia/hypoplasia of the humerus [HP:0006507],4/4,100%,8/8,100%,,
+Abnormality of the humerus [HP:0003063],4/4,100%,8/8,100%,,
+Abnormality of the upper arm [HP:0001454],4/4,100%,8/8,100%,,
+1-2 finger syndactyly [HP:0010704],1/1,100%,4/4,100%,,
+Finger syndactyly [HP:0006101],2/2,100%,5/5,100%,,
+Syndactyly [HP:0001159],2/2,100%,5/5,100%,,
+Complete atrioventricular canal defect [HP:0001674],3/36,8%,6/67,9%,,
+Atrioventricular canal defect [HP:0006695],3/3,100%,6/6,100%,,
+Upper limb phocomelia [HP:0009813],2/37,5%,8/116,7%,,
+Phocomelia [HP:0009829],2/2,100%,8/8,100%,,
+Abnormality of cardiovascular system electrophysiology [HP:0030956],3/3,100%,18/18,100%,,
+Atrioventricular dissociation [HP:0011709],1/1,100%,0/22,0%,,
+Abnormal morphology of ulna [HP:0040071],4/4,100%,4/4,100%,,
+Aplasia/Hypoplasia of the ulna [HP:0006495],2/2,100%,4/4,100%,,
+Mitral valve prolapse [HP:0001634],1/1,100%,1/1,100%,,
+Abnormal mitral valve morphology [HP:0001633],1/1,100%,1/1,100%,,
+Abnormal atrioventricular valve morphology [HP:0006705],1/1,100%,1/1,100%,,
+Abnormal heart valve morphology [HP:0001654],1/1,100%,1/1,100%,,
+Proximal placement of thumb [HP:0009623],0/0,0%,3/3,100%,,
+Deviation of the thumb [HP:0009603],2/2,100%,3/3,100%,,
+Deviation of finger [HP:0004097],2/2,100%,4/4,100%,,
+Deviation of the hand or of fingers of the hand [HP:0009484],2/2,100%,5/5,100%,,
+Short 1st metacarpal [HP:0010034],0/22,0%,1/45,2%,,
+Short phalanx of the thumb [HP:0009660],0/22,0%,1/45,2%,,
+Sinus bradycardia [HP:0001688],1/1,100%,2/2,100%,,
+Abnormal electrophysiology of sinoatrial node origin [HP:0011702],1/1,100%,2/2,100%,,
+Arrhythmia [HP:0011675],1/1,100%,3/3,100%,,
+Bradycardia [HP:0001662],1/1,100%,2/2,100%,,
+Hypoplasia of deltoid muscle [HP:0030241],0/0,0%,6/6,100%,,
+Shoulder muscle hypoplasia [HP:0008952],0/0,0%,6/6,100%,,
+Hypoplasia of the musculature [HP:0009004],0/0,0%,6/6,100%,,
+Aplasia/Hypoplasia involving the skeletal musculature [HP:0001460],0/0,0%,6/6,100%,,
+Abnormality of muscle size [HP:0030236],0/0,0%,6/6,100%,,
+Abnormal skeletal muscle morphology [HP:0011805],0/0,0%,8/8,100%,,
+Abnormality of the musculature [HP:0003011],0/0,0%,8/8,100%,,
+Aplasia/Hypoplasia involving the shoulder musculature [HP:0001464],0/0,0%,6/6,100%,,
+Aplasia/Hypoplasia involving the musculature of the upper limbs [HP:0001467],0/0,0%,6/6,100%,,
+Aplasia/Hypoplasia involving the musculature of the extremities [HP:0009128],0/0,0%,6/6,100%,,
+Abnormality of the musculature of the limbs [HP:0009127],0/0,0%,8/8,100%,,
+Abnormality of the musculature of the upper limbs [HP:0001446],0/0,0%,8/8,100%,,
+Abnormality of the shoulder girdle musculature [HP:0001435],0/0,0%,6/6,100%,,
+Patent foramen ovale [HP:0001655],0/36,0%,4/69,6%,,
+Synostosis of joints [HP:0100240],1/1,100%,1/1,100%,,
+Abnormality of joint mobility [HP:0011729],5/5,100%,3/3,100%,,
+Abnormal joint physiology [HP:0034430],5/5,100%,3/3,100%,,
+Abnormal musculoskeletal physiology [HP:0011843],5/5,100%,3/3,100%,,
+Abnormality of the vertebral column [HP:0000925],1/1,100%,4/4,100%,,
+Upper extremity joint dislocation [HP:0030310],2/2,100%,0/0,0%,,
+Joint dislocation [HP:0001373],2/2,100%,0/0,0%,,
+Abnormal shoulder morphology [HP:0003043],1/1,100%,1/1,100%,,
+Abnormal thorax morphology [HP:0000765],5/5,100%,7/7,100%,,
+Limited elbow movement [HP:0002996],4/4,100%,2/2,100%,,
+Abnormality of the elbow [HP:0009811],5/5,100%,2/2,100%,,
+Limitation of joint mobility [HP:0001376],4/4,100%,2/2,100%,,
+Mitral regurgitation [HP:0001653],2/2,100%,2/2,100%,,
+Abnormal mitral valve physiology [HP:0031481],2/2,100%,2/2,100%,,
+Pre-capillary pulmonary hypertension [HP:0033578],0/0,0%,8/8,100%,,
+Elevated pulmonary artery pressure [HP:0004890],0/0,0%,8/8,100%,,
+Abnormality of pulmonary circulation [HP:0030875],0/0,0%,8/8,100%,,
+Abnormal vascular physiology [HP:0030163],0/0,0%,8/8,100%,,
+Abnormal respiratory system physiology [HP:0002795],0/0,0%,8/8,100%,,
+Abnormality of the respiratory system [HP:0002086],0/0,0%,8/8,100%,,
+Right atrial enlargement [HP:0030718],0/0,0%,4/4,100%,,
+Abnormal right atrium morphology [HP:0025580],0/0,0%,4/4,100%,,
+Atrial septal dilatation [HP:0011995],0/0,0%,4/4,100%,,
+Pectus excavatum [HP:0000767],2/2,100%,3/4,75%,,
+Abnormal sternum morphology [HP:0000766],2/2,100%,3/3,100%,,
+Postaxial hand polydactyly [HP:0001162],0/0,0%,3/4,75%,,
+Postaxial polydactyly [HP:0100259],0/0,0%,3/3,100%,,
+Polydactyly [HP:0010442],0/0,0%,3/3,100%,,
+Abnormal 5th finger morphology [HP:0004207],0/0,0%,6/6,100%,,
+Hand polydactyly [HP:0001161],0/0,0%,3/3,100%,,
+Duplication of phalanx of hand [HP:0009997],0/0,0%,3/3,100%,,
+Duplication of hand bones [HP:0004275],0/0,0%,3/3,100%,,
+Duplication of bones involving the upper extremities [HP:0009142],0/0,0%,3/3,100%,,
+High palate [HP:0000218],0/0,0%,3/3,100%,,
+Short neck [HP:0000470],0/0,0%,3/3,100%,,
+Abnormal neck morphology [HP:0025668],0/0,0%,3/3,100%,,
+Abnormality of the neck [HP:0000464],0/0,0%,3/3,100%,,
+Abnormality of the cervical spine [HP:0003319],0/0,0%,3/3,100%,,
+Shield chest [HP:0000914],0/0,0%,3/3,100%,,
+Enlarged thorax [HP:0100625],0/0,0%,3/3,100%,,
+Abnormal rib cage morphology [HP:0001547],0/0,0%,5/5,100%,,
+Y-shaped metatarsals [HP:0010567],0/0,0%,3/3,100%,,
+Abnormal metatarsal morphology [HP:0001832],0/0,0%,3/3,100%,,
+Abnormal lower limb bone morphology [HP:0040069],0/0,0%,4/4,100%,,
+Abnormality of the lower limb [HP:0002814],0/0,0%,4/4,100%,,
+Abnormal foot morphology [HP:0001760],0/0,0%,4/4,100%,,
+Common atrium [HP:0011565],0/38,0%,1/115,1%,,
+Unroofed coronary sinus [HP:0031297],0/38,0%,1/117,1%,,
+Limited pronation/supination of forearm [HP:0006394],3/3,100%,2/2,100%,,
+First degree atrioventricular block [HP:0011705],1/1,100%,1/23,4%,,
+Hypoplastic scapulae [HP:0000882],1/1,100%,1/1,100%,,
+Aplasia/Hypoplasia of the scapulae [HP:0006713],1/1,100%,1/1,100%,,
+Aplasia/Hypoplasia involving bones of the thorax [HP:0006711],2/2,100%,2/2,100%,,
+Abnormal scapula morphology [HP:0000782],1/1,100%,1/1,100%,,
+Clinodactyly of the 5th finger [HP:0004209],0/0,0%,2/2,100%,,
+Finger clinodactyly [HP:0040019],0/0,0%,2/2,100%,,
+Clinodactyly [HP:0030084],0/0,0%,2/2,100%,,
+Deviation of the 5th finger [HP:0009179],0/0,0%,2/2,100%,,
+Abnormal 2nd finger morphology [HP:0004100],0/0,0%,2/2,100%,,
+Aplasia/Hypoplasia of the 2nd finger [HP:0006264],0/0,0%,2/2,100%,,
+Short middle phalanx of finger [HP:0005819],0/0,0%,2/2,100%,,
+Aplasia/Hypoplasia of the middle phalanges of the hand [HP:0009843],0/0,0%,2/2,100%,,
+Abnormal middle phalanx morphology of the hand [HP:0009833],0/0,0%,2/2,100%,,
+Short phalanx of finger [HP:0009803],0/0,0%,2/2,100%,,
+Short middle phalanx of the 5th finger [HP:0004220],0/0,0%,2/2,100%,,
+Type A brachydactyly [HP:0009370],0/0,0%,2/2,100%,,
+Brachydactyly [HP:0001156],0/0,0%,2/2,100%,,
+Short 5th finger [HP:0009237],0/0,0%,2/2,100%,,
+Aplasia/Hypoplasia of the 5th finger [HP:0006262],0/0,0%,2/2,100%,,
+Aplasia/Hypoplasia of the middle phalanx of the 5th finger [HP:0009161],0/0,0%,2/2,100%,,
+Aplasia/Hypoplasia of the phalanges of the 5th finger [HP:0009376],0/0,0%,2/2,100%,,
+Abnormal 5th finger phalanx morphology [HP:0004213],0/0,0%,2/2,100%,,
+Abnormality of the middle phalanx of the 5th finger [HP:0004219],0/0,0%,2/2,100%,,
+11 pairs of ribs [HP:0000878],0/0,0%,2/2,100%,,
+Missing ribs [HP:0000921],0/0,0%,2/2,100%,,
+Aplasia/Hypoplasia of the ribs [HP:0006712],0/0,0%,2/2,100%,,
+Abnormal rib morphology [HP:0000772],0/0,0%,2/2,100%,,
+Bowed forearm bones [HP:0003956],1/1,100%,0/0,0%,,
+Bowing of the arm [HP:0006488],1/1,100%,0/0,0%,,
+Bowing of the long bones [HP:0006487],1/1,100%,0/0,0%,,
+Abnormal diaphysis morphology [HP:0000940],1/1,100%,0/0,0%,,
+Abnormal skin morphology [HP:0011121],1/1,100%,0/0,0%,,
+Abnormality of the skin [HP:0000951],1/1,100%,0/0,0%,,
+Abnormality of the integument [HP:0001574],1/1,100%,0/0,0%,,
+Sinus venosus atrial septal defect [HP:0011567],1/1,100%,0/2,0%,,
+Left ventricular noncompaction cardiomyopathy [HP:0011664],1/5,20%,1/7,14%,,
+Noncompaction cardiomyopathy [HP:0012817],1/1,100%,1/1,100%,,
+Cardiomyopathy [HP:0001638],1/1,100%,1/1,100%,,
+Abnormal myocardium morphology [HP:0001637],1/1,100%,1/1,100%,,
+Abnormal toe morphology [HP:0001780],0/0,0%,1/1,100%,,
+Abnormal toe phalanx morphology [HP:0010161],0/0,0%,1/1,100%,,
+Abnormality of the distal phalanges of the toes [HP:0010182],0/0,0%,1/1,100%,,
+Amelia involving the upper limbs [HP:0009812],1/37,3%,0/114,0%,,
+Third degree atrioventricular block [HP:0001709],1/1,100%,0/22,0%,,
+Small hypothenar eminence [HP:0010487],0/0,0%,2/2,100%,,
+Abnormality of the hypothenar eminence [HP:0010486],0/0,0%,2/2,100%,,
+Abnormality of the musculature of the hand [HP:0001421],0/0,0%,2/2,100%,,
diff --git a/docs/user-guide/report/tbx5_frameshift.mtc_report.html b/docs/user-guide/report/tbx5_frameshift.mtc_report.html
new file mode 100644
index 00000000..3d00300a
--- /dev/null
+++ b/docs/user-guide/report/tbx5_frameshift.mtc_report.html
@@ -0,0 +1,141 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <title>Cohort</title>
+  <style>table {
+    border-collapse: collapse;
+    margin: 25px 0;
+      font-size: 0.9em;
+      font-family: sans-serif;
+      min-width: 400px;
+      box-shadow: 0 0 20px rgba(0, 0, 0, 0.15);
+  }
+
+
+  th {
+    background-color: #f2f2f2;
+    border: 1px solid #dddddd;
+    text-align: left;
+    padding: 2px;
+  }
+
+  tr {
+    border: 1px solid #dddddd;
+  }
+
+  td {
+    padding: 2px;
+    font-weight: bold;
+  }
+
+  tr:nth-child(even) {
+    background-color: #f2f2f2;
+  }
+
+  .table td,tr {
+    text-align: left;
+}
+
+.table td:first-child, tr:first-child {
+    text-align: left;
+}
+
+  </style>
+</head>
+
+<body>
+  <h1>Phenotype testing report</h1>
+  <p>Phenotype MTC filter: <em>HPO MTC filter</em></p>
+  <p>Multiple testing correction: <em>fdr_bh</em></p>
+  <p>Performed statistical tests for 17 out of the total of 260 HPO terms.</p>
+    <table>
+      <caption>Using <em>HPO MTC filter</em>, 243 term(s) were omitted from statistical analysis.</caption>
+        <tbody>
+          <tr>
+            <th>Code</th>
+            <th>Reason</th>
+            <th>Count</th>
+          </tr>
+          
+          <tr>
+            <!-- TODO: plug the real reason code here -->
+            <td>TODO</td>
+            <td>Skipping term with only 2 observations (not powered for 2x2)</td>
+            <td>51</td>
+          </tr>
+          
+          <tr>
+            <!-- TODO: plug the real reason code here -->
+            <td>TODO</td>
+            <td>Skipping term because all genotypes have same HPO observed proportions</td>
+            <td>50</td>
+          </tr>
+          
+          <tr>
+            <!-- TODO: plug the real reason code here -->
+            <td>TODO</td>
+            <td>Skipping general term</td>
+            <td>44</td>
+          </tr>
+          
+          <tr>
+            <!-- TODO: plug the real reason code here -->
+            <td>TODO</td>
+            <td>Skipping term with only 3 observations (not powered for 2x2)</td>
+            <td>27</td>
+          </tr>
+          
+          <tr>
+            <!-- TODO: plug the real reason code here -->
+            <td>TODO</td>
+            <td>Skipping term with only 6 observations (not powered for 2x2)</td>
+            <td>19</td>
+          </tr>
+          
+          <tr>
+            <!-- TODO: plug the real reason code here -->
+            <td>TODO</td>
+            <td>Skipping term with only 4 observations (not powered for 2x2)</td>
+            <td>16</td>
+          </tr>
+          
+          <tr>
+            <!-- TODO: plug the real reason code here -->
+            <td>TODO</td>
+            <td>Skipping term with maximum frequency that was less than threshold 0.2</td>
+            <td>10</td>
+          </tr>
+          
+          <tr>
+            <!-- TODO: plug the real reason code here -->
+            <td>TODO</td>
+            <td>Skipping term with only 5 observations (not powered for 2x2)</td>
+            <td>10</td>
+          </tr>
+          
+          <tr>
+            <!-- TODO: plug the real reason code here -->
+            <td>TODO</td>
+            <td>Skipping term with only 1 observations (not powered for 2x2)</td>
+            <td>7</td>
+          </tr>
+          
+          <tr>
+            <!-- TODO: plug the real reason code here -->
+            <td>TODO</td>
+            <td>Skipping term because one genotype had zero observations</td>
+            <td>5</td>
+          </tr>
+          
+          <tr>
+            <!-- TODO: plug the real reason code here -->
+            <td>TODO</td>
+            <td>Skipping term because no genotype has more than one observed HPO count</td>
+            <td>4</td>
+          </tr>
+          
+        </tbody>
+    </table>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/user-guide/stats.rst b/docs/user-guide/stats.rst
index 054763d5..993dd592 100644
--- a/docs/user-guide/stats.rst
+++ b/docs/user-guide/stats.rst
@@ -4,25 +4,32 @@
 Statistical tests
 =================
 
-There are many different ways of statistically testing for genotype-phenotype correlations, 
-and the appropriate statistical test depends on the question. 
-This document provides an overview of the tests offered by the GPSEA library 
+There are many different ways of statistically testing for genotype-phenotype correlations,
+and the appropriate statistical test depends on the question.
+This document provides an overview of the tests offered by the GPSEA library
 and explanations of how they are implemented by our software.
 
+*************************************
+Compare genotype and phenotype groups
+*************************************
+
+TODO
+
+.. _fisher-exact-test:
 
 Fisher exact test (FET)
-~~~~~~~~~~~~~~~~~~~~~~~
+=======================
 
-The Fisher exact test (FET) calculates the exact probability value 
-for the relationship between two dichotomous variables. 
+The Fisher exact test (FET) calculates the exact probability value
+for the relationship between two dichotomous variables.
 In our implementation, the two dichotomous variables are the genotype and the phenotype.
-For instance, the individuals of the cohort may be divided 
-according to whether or not they have a nonsense variant 
+For instance, the individuals of the cohort may be divided
+according to whether or not they have a nonsense variant
 and according to whether or not they have Strabismus (`HP:0000486 <https://hpo.jax.org/browse/term/HP:0000486>`_).
 
 
-The results of FET are expressed in terms of an exact probability (P-value), varying within 0 and 1. 
-Two groups are considered statistically significant if the P-value is less 
+The results of FET are expressed in terms of an exact probability (P-value), varying within 0 and 1.
+Two groups are considered statistically significant if the P-value is less
 than the chosen significance level (usually :math:`\alpha = 0.05`).
 
 The following graphic shows an example contingency table that is used to conduct a Fisher exact test.
@@ -32,12 +39,12 @@ We are comparing the frequency of *strabismus* in individuals with missense and
    :alt: Fisher exact text example
    :align: center
    :width: 600px
- 
+
 To perform the corresponding test in Python, we would use the following code.
 
 >>> import scipy.stats as stats
 >>> contingency_table = [
-... #   Missense,    Nonsense    
+... #   Missense,    Nonsense
 ...    [17,          6       ],  # Strabismus observed
 ...    [1,          19       ],  # Strabismus excluded
 ... ]
@@ -49,27 +56,213 @@ To perform the corresponding test in Python, we would use the following code.
 
 The ``p_value`` evaluates to `5.432292015291845e-06`, meaning there is a significant difference between the groups.
 
-The Fisher exact test evaluates whether the observed frequencies in a contingency table significantly 
+The Fisher exact test evaluates whether the observed frequencies in a contingency table significantly
 deviate from the frequencies we would expect if there were no association between the variables.
-We want to test whether the frequency of `HP:0000486`` is significantly higher or lower in 
-one genotype group compared to what would be expected if there were no association. 
-Note that by default, the *two-tailed* Fisher exact test is performed, meaning we have no 
-hypothesis as to whether there is a higher or lower frequency in one of the genotype groups. 
+We want to test whether the frequency of `HP:0000486`` is significantly higher or lower in
+one genotype group compared to what would be expected if there were no association.
+Note that by default, the *two-tailed* Fisher exact test is performed, meaning we have no
+hypothesis as to whether there is a higher or lower frequency in one of the genotype groups.
+
+However, we are typically interested in testing the associations between the genotype and multiple phenotypic features at once.
+GPSEA takes advantage of the HPO structure and simplifies the testing for all HPO terms encoded in the cohort.
+
+Example
+-------
+
+Let's illustrate this in a real-life example of the analysis of the association between frameshift variants in *TBX5* gene
+and congenital heart defects in the dataset of 156 individuals with mutations in *TBX5* whose signs and symptoms were
+encoded into HPO terms, stored as phenopackets of the `GA4GH Phenopacket Schema <https://pubmed.ncbi.nlm.nih.gov/35705716>`_,
+and deposited in `Phenopacket Store <https://github.com/monarch-initiative/phenopacket-store>`_ (version `0.1.18`).
+
+.. note::
+
+   The shorter version of the same analysis has been presented in the :ref:`tutorial`.
+
+
+Create cohort
+^^^^^^^^^^^^^
+
+We will load and transform the phenopackets into a :class:`~gpsea.model.Cohort`,
+as described in :ref:`input-data` section. Briefly, we will load the phenopackets:
+
+>>> from ppktstore.registry import configure_phenopacket_registry
+>>> registry = configure_phenopacket_registry()
+>>> with registry.open_phenopacket_store(release='0.1.18') as ps:
+...     phenopackets = tuple(ps.iter_cohort_phenopackets('TBX5'))
+>>> len(phenopackets)
+156
+
+followed by loading HPO release `v2024-07-01`:
+
+>>> import hpotk
+>>> store = hpotk.configure_ontology_store()
+>>> hpo = store.load_minimal_hpo(release='v2024-07-01')
+
+and we will perform Q/C and functional annotations for the mutations
+with the default cohort creator:
+
+>>> from gpsea.preprocessing import configure_caching_cohort_creator, load_phenopackets
+>>> cohort_creator = configure_caching_cohort_creator(hpo)
+>>> cohort, qc_results = load_phenopackets(phenopackets, cohort_creator)  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+Patients Created: ...
+>>> qc_results.summarize()  # doctest: +SKIP
+Validated under none policy
+No errors or warnings were found
+
+Configure analysis
+^^^^^^^^^^^^^^^^^^
+
+We want to test the association between frameshift *TBX5* variants and phenotypic abnormalities.
+GPSEA exposes a flexible predicate API that lets us create genotype and phenotype predicates
+to assign the cohort members into genotype and phenotype categories based on the variants
+and the HPO terms. We need to create one genotype predicate and one or more phenotype predicates.
+
+
+**Genotype predicate**
+
+We want to separate the patients into two groups: a group *with* a frameshift variant
+and a group *without* a frameshift variant, based on the functional annotation.
+We will use the *MANE* transcript for the analysis:
+
+>>> from gpsea.model import VariantEffect
+>>> from gpsea.analysis.predicate.genotype import VariantPredicates, boolean_predicate
+>>> tx_id = 'NM_181486.4'
+>>> gt_predicate = boolean_predicate(VariantPredicates.variant_effect(VariantEffect.FRAMESHIFT_VARIANT, tx_id))
+>>> gt_predicate.get_question()
+'FRAMESHIFT_VARIANT on NM_181486.4'
+
+
+**Phenotype predicates**
+
+We recommend testing the genotype phenotype association for all HPO terms that are present in 2 or more cohort members,
+while taking advantage of the HPO graph structure and of the :ref:`true-path-rule`.
+We will use the :func:`~gpsea.analysis.predicate.phenotype.prepare_predicates_for_terms_of_interest`
+utility function to generate phenotype predicates for all HPO terms:
+
+>>> from gpsea.analysis.predicate.phenotype import prepare_predicates_for_terms_of_interest
+>>> pheno_predicates = prepare_predicates_for_terms_of_interest(
+...     cohort=cohort,
+...     hpo=hpo,
+...     min_n_of_patients_with_term=2,
+... )
+>>> len(pheno_predicates)
+260
+
+The function finds all HPO terms that annotate at least *n* (``min_n_of_patients_with_term=2`` above) individuals,
+including the *indirect* annotations whose presence is implied by the true path rule.
+
+
+**Statistical test**
+
+We will use :ref:<fisher-exact-test> to test the association
+between genotype and phenotype groups, as described previously.
+
+>>> from gpsea.analysis.pcats.stats import ScipyFisherExact
+>>> count_statistic = ScipyFisherExact()
+
+FET will compute a p value for each genotype phenotype group.
+
+
+**Multiple testing correction**
+
+In the case of this cohort, we could test association between having a frameshift variant and one of 260 HPO terms.
+However, testing multiple hypotheses on the same dataset increases the risk of finding a significant association
+by chance.
+GPSEA uses a two-pronged strategy to mitigate this risk - use Phenotype MTC filter and multiple testing correction.
+
+.. note::
+
+   See the :ref:`mtc` section for more info on multiple testing procedures.
+
+Here we will use a combination of the HPO MTC filter (:class:`~gpsea.analysis.mtc_filter.HpoMtcFilter`)
+with Benjamini-Hochberg procedure (``mtc_correction='fdr_bh'``)
+with a false discovery control level set to `0.05` (``mtc_alpha=0.05``):
+
+>>> from gpsea.analysis.mtc_filter import HpoMtcFilter
+>>> mtc_filter = HpoMtcFilter.default_filter(hpo, term_frequency_threshold=0.2)
+>>> mtc_correction = 'fdr_bh'
+>>> mtc_alpha = 0.05
+
+
+**Final analysis**
+
+We finalize the analysis setup by putting all components together
+into :class:`~gpsea.analysis.pcats.HpoTermAnalysis`:
+
+>>> from gpsea.analysis.pcats import HpoTermAnalysis
+>>> analysis = HpoTermAnalysis(
+...     count_statistic=count_statistic,
+...     mtc_filter=mtc_filter,
+...     mtc_correction=mtc_correction,
+...     mtc_alpha=mtc_alpha,
+... )
+
+
+Analysis
+^^^^^^^^
+
+We can now execute the analysis:
+
+>>> result = analysis.compare_genotype_vs_phenotypes(
+...     cohort=cohort,
+...     gt_predicate=gt_predicate,
+...     pheno_predicates=pheno_predicates,
+... )
+>>> len(result.phenotypes)
+260
+>>> result.total_tests
+17
+
+
+Thanks to Phenotype MTC filter, we only tested 16 out of 260 terms.
+We can learn more by showing the MTC filter report:
+
+>>> from gpsea.view import MtcStatsViewer
+>>> mtc_viewer = MtcStatsViewer()
+>>> mtc_report = mtc_viewer.process(result)
+>>> with open('docs/user-guide/report/tbx5_frameshift.mtc_report.html', 'w') as fh:  # doctest: +SKIP
+...     _ = fh.write(mtc_report)
+
+
+.. raw:: html
+  :file: report/tbx5_frameshift.mtc_report.html
+
+
+Genotype phenotype associations
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Last, let's explore the associations. This is a table of the tested HPO terms
+ordered by the corrected p value (Benjamini-Hochberg FDR):
+
+>>> from gpsea.analysis.predicate import PatientCategories
+>>> summary_df = result.summarize(hpo, PatientCategories.YES)
+>>> summary_df.to_csv('docs/user-guide/report/tbx5_frameshift.csv')  # doctest: +SKIP
+
+.. csv-table:: *TBX5* frameshift vs rest
+   :file: report/tbx5_frameshift.csv
+   :header-rows: 2
 
 
 .. _phenotype-score-stats:
 
-Mann-Whitney U Test 
-~~~~~~~~~~~~~~~~~~~
+***************
+Phenotype score
+***************
+
+
+.. _mann-whitney-u-test:
+
+Mann-Whitney U Test
+===================
 
 We may want to compare the total number of occurences of a specific set of phenotypic features between two different genotype groups.
-For instance, `Jordan et al (2018) <https://pubmed.ncbi.nlm.nih.gov/29330883/>`_ found that the total number of structural defects 
+For instance, `Jordan et al (2018) <https://pubmed.ncbi.nlm.nih.gov/29330883/>`_ found that the total number of structural defects
 of the brain, eye, heart, and kidney and sensorineural hearing loss seen in individuals with point mutations in the Atrophin-1 domain of the RERE gene
 is significantly higher than expected based on the number of similar defects seen in individuals with putative loss-of-function variants.
-Since there are five potential defects, each individual has a count ranging between 0 and 5. 
+Since there are five potential defects, each individual has a count ranging between 0 and 5.
 
 We perform a Mann-Whitney U Test (or Wilcoxon Rank-Sum Test) to compare the distribution of such counts between genotype groups.
-This is a non-parametric test that compares the medians of the two groups to determine if they come from the same distribution. 
+This is a non-parametric test that compares the medians of the two groups to determine if they come from the same distribution.
 
 >>> import scipy.stats as stats
 >>> group1 = [0, 0, 1, 0, 2, 0, 1, 1, 1, 0, 2, 0, 0, 3, 1, 1, 1, 0]
@@ -79,14 +272,20 @@ This is a non-parametric test that compares the medians of the two groups to det
 >>> float(p_value)
 6.348081479150902e-06
 
+
 ``p_value`` evaluates to `6.348081479150901e-06`, meaning there is a significant difference between the groups.
 
 
 Example
-^^^^^^^
+-------
+
+Let's now analyze the subjects reported in *Jordan et al*.
+We will load 19 phenopackets that represent individuals with mutations in *RERE*
+whose signs and symptoms were encoded into HPO terms and deposited into Phenopacket Store.
+The phenopackets will be processed into :class:`~gpsea.model.Cohort`
+as described in the :ref:`input-data` section.
 
-Let's now analyze the subjects reported in *Jordan et al*. 
-We start by loading the cohort from Phenopacket Store (version `0.1.18`):
+Briefly, we will first load 19 phenopackets
 
 >>> from ppktstore.registry import configure_phenopacket_registry
 >>> registry = configure_phenopacket_registry()
@@ -95,37 +294,55 @@ We start by loading the cohort from Phenopacket Store (version `0.1.18`):
 >>> len(phenopackets)
 19
 
-We loaded 19 phenopackets. 
 
-Now, we need to prepare the phenopackets for using with GPSEA.
-We will need HPO (version `v2024-07-01`)
+and load HPO (version `v2024-07-01`)
 
 >>> import hpotk
 >>> store = hpotk.configure_ontology_store()
 >>> hpo = store.load_minimal_hpo(release='v2024-07-01')
 
-to create cohort creator
+
+to create a :class:`~gpsea.preprocessing.CohortCreator`
 
 >>> from gpsea.preprocessing import configure_caching_cohort_creator
 >>> cohort_creator = configure_caching_cohort_creator(hpo)
 
+
 which we will use to preprocess the cohort
 
 >>> from gpsea.preprocessing import load_phenopackets
 >>> cohort, _ = load_phenopackets(phenopackets, cohort_creator)  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
 Patients Created: ...
+
+
+resulting in a cohort consisting of 19 individuals
+
 >>> len(cohort)
 19
 
-Now we can set up the phenotype and genotype predicates. Jordan et al tests ...
 
-.. todo: improve the text
+Configure analysis
+^^^^^^^^^^^^^^^^^^
+
+Now we can set up the analysis of genotype and phenotype.
+We will perform the analysis using the *RERE* transcript selected
+as the "main" biologically relevant by the `MANE` consortium.
 
->>> rere_mane_tx_id = 'NM_001042681.2'
+>>> tx_id = 'NM_001042681.2'
 
-Now let's create a predicate for testing if the variant is a point mutation or a loss of function mutation.
-The point mutation predicate is defined as ... 
-TODO: improve!
+
+**Genotype predicate**
+
+*Jordan et al.* compare phenotype of individuals harboring point mutations
+with the individuals carrying loss of function mutations. 
+Let's create a predicate for testing if the variant 
+is a point mutation or a loss of function mutation.
+
+In this example, the point mutation is a mutation that meets the following conditions:
+
+* predicted to lead to a missense variant on the `MANE` transcript
+* the :ref:`length-of-the-reference-allele` is equal to `1`
+* the :ref:`change-length-of-an-allele` is equal to `0`
 
 >>> from gpsea.model import VariantEffect
 >>> from gpsea.analysis.predicate.genotype import VariantPredicates
@@ -134,11 +351,12 @@ TODO: improve!
 ... )
 >>> point_mutation = VariantPredicates.change_length('==', 0) \
 ...     & VariantPredicates.ref_length('==', 1) \
-...     & VariantPredicates.any(VariantPredicates.variant_effect(effect, rere_mane_tx_id) for effect in point_mutation_effects)
+...     & VariantPredicates.any(VariantPredicates.variant_effect(effect, tx_id) for effect in point_mutation_effects)
 >>> point_mutation.get_question()
 '((change length == 0 AND ref allele length == 1) AND MISSENSE_VARIANT on NM_001042681.2)'
 
-For the loss of function predicate, these variant effects are considered loss of function:
+
+For the loss of function predicate, the following variant effects are considered loss of function:
 
 >>> lof_effects = (
 ...     VariantEffect.TRANSCRIPT_ABLATION,
@@ -146,10 +364,11 @@ For the loss of function predicate, these variant effects are considered loss of
 ...     VariantEffect.START_LOST,
 ...     VariantEffect.STOP_GAINED,
 ... )
->>> lof_mutation = VariantPredicates.any(VariantPredicates.variant_effect(eff, rere_mane_tx_id) for eff in lof_effects)
+>>> lof_mutation = VariantPredicates.any(VariantPredicates.variant_effect(eff, tx_id) for eff in lof_effects)
 >>> lof_mutation.get_question()
 '(TRANSCRIPT_ABLATION on NM_001042681.2 OR FRAMESHIFT_VARIANT on NM_001042681.2 OR START_LOST on NM_001042681.2 OR STOP_GAINED on NM_001042681.2)'
 
+
 The genotype predicate will bin the patient into two groups: a point mutation group or the loss of function group:
 
 >>> from gpsea.analysis.predicate.genotype import groups_predicate
@@ -160,8 +379,24 @@ The genotype predicate will bin the patient into two groups: a point mutation gr
 >>> gt_predicate.get_question()
 'Genotype group: Point, LoF'
 
-Now phenotype predicate. The authors divide the patients into groups according to the count of structural defects
-in these groups:
+
+**Phenotype score**
+
+The authors score the individuals based on the number of structural defects
+from the following 5 categories:
+
+* Brain anomalies
+* Eye anomalies
+* Congenital heart defects
+* Renal anomalies
+* Sensorineural hearing loss
+
+and they assign each individual a score based on the number structural defects.
+For example, an individual with a congenital heart defect would be assigned a score of `1`,
+an individual with congenital heart defect and a renal anomaly would be assigned a score of `2`,
+and so on.
+
+We automatize this scoring method by encoding the categories into HPO terms
 
 >>> structural_defects = (
 ...     'HP:0012443',  # Abnormal brain morphology (Brain anomalies)
@@ -171,58 +406,109 @@ in these groups:
 ...     'HP:0000407',  # Sensorineural hearing impairment (Sensorineural hearing loss)
 ... )
 
-Let's run the analysis.
 
->>> from gpsea.analysis import configure_cohort_analysis
->>> analysis = configure_cohort_analysis(
-...     cohort, hpo,
+and then test the individuals for presence of at least one HPO term
+that corresponds to the structural defect category
+(e.g. `Abnormal brain morphology <https://hpo.jax.org/browse/term/HP:0012443>`_)
+or that is its descendant
+(e.g. `Cerebellar atrophy <https://hpo.jax.org/browse/term/HP:0001272>`_).
+
+GPSEA implements this scoring method in :class:`~gpsea.analysis.pscore.CountingPhenotypeScorer`.
+
+>>> from gpsea.analysis.pscore import CountingPhenotypeScorer
+>>> pheno_scorer = CountingPhenotypeScorer.from_query_curies(
+...     hpo=hpo,
+...     query=structural_defects,   
 ... )
->>> result = analysis.compare_genotype_vs_phenotype_group_count(
-...     gt_predicate=gt_predicate,   
-...     phenotype_group_terms=structural_defects,
+
+
+**Statistical test**
+
+We will use :ref:`mann-whitney-u-test` as described above.
+
+>>> from gpsea.analysis.pscore.stats import MannWhitneyStatistic
+>>> score_statistic = MannWhitneyStatistic()
+
+
+**Final analysis**
+
+We will put the final analysis together into :class:`~gpsea.analysis.pscore.PhenotypeScoreAnalysis`.
+
+>>> from gpsea.analysis.pscore import PhenotypeScoreAnalysis
+>>> score_analysis = PhenotypeScoreAnalysis(
+...     score_statistic=score_statistic,   
 ... )
->>> round(result.p_value, 9)
-0.027066902
 
 
-We have the counts:
+Analysis
+^^^^^^^^
+
+We execute the analysis by running
 
->>> counts = result.genotype_phenotype_scores
->>> counts.head()  # doctest: +NORMALIZE_WHITESPACE
+>>> result = score_analysis.compare_genotype_vs_phenotype_score(
+...     cohort=cohort,
+...     gt_predicate=gt_predicate,
+...     pheno_scorer=pheno_scorer,
+... )
+
+
+The analysis shows a significant difference between the number of structural defects
+in individuals with point vs. loss-of-function mutations.
+
+>>> result.pval
+0.012074957610483744
+
+
+To explore further, we can access a data frame with genotype categories and phenotype counts:
+
+>>> scores = result.genotype_phenotype_scores.sort_index()
+>>> scores.head()  # doctest: +NORMALIZE_WHITESPACE
                                      genotype phenotype
-patient_id                                             
+patient_id
 Subject 10[PMID_27087320_Subject_10]        1         0
 Subject 1[PMID_27087320_Subject_1]          0         4
+Subject 1[PMID_29330883_Subject_1]          1         0
 Subject 2[PMID_27087320_Subject_2]       None         4
 Subject 2[PMID_29330883_Subject_2]          1         1
-Subject 3[PMID_27087320_Subject_3]          0         4
 
-The data frame provides a genotype category and a phenotype score for each patient.
+
+The data frame provides a `genotype` category and a `phenotype` score for each patient.
 The genotype category should be interpreted in the context of the genotype predicate:
 
 >>> gt_id_to_name = {c.category.cat_id: c.category.name for c in gt_predicate.get_categorizations()}
 >>> gt_id_to_name
 {0: 'Point', 1: 'LoF'}
 
-Let's plot the data:
+
+The genotype code `0` is assigned to patients with a point mutation, `1` corresponds to the loss-of-function mutations,
+and `None` is assigned to patients who cannot be assigned into any of the groups.
+
+Last, let's use :meth:`~gpsea.analysis.pscore.PhenotypeScoreAnalysisResult.plot_boxplots` method
+to show a box plot of the phenotype score distributions:
 
 >>> import matplotlib.pyplot as plt
 >>> fig, ax = plt.subplots(figsize=(6, 4), dpi=120)
->>> data = counts.loc[counts['genotype'].notna()]  # skip the patients with unassigned genotype group
->>> x = [data.loc[data['genotype'] == c.category.cat_id, 'phenotype'].to_list() for c in gt_predicate.get_categorizations()]
->>> gt_cat_labels = [gt_id_to_name[c.category.cat_id] for c in gt_predicate.get_categorizations()]
->>> bplot = ax.boxplot(
-...     x=x, 
-...     patch_artist=True, tick_labels=gt_cat_labels,
+>>> result.plot_boxplots(
+...     gt_predicate=gt_predicate,
+...     ax=ax,
+... )
+>>> _ = ax.grid(axis="y")
+>>> _ = ax.set(
+...     ylabel="Phenotype score", ylim=(-0.5, len(structural_defects) + 0.5)
 ... )
->>> _ = ax.grid(axis='y')
->>> _ = ax.set(ylabel='Phenotype group count', ylim=(-.5, len(structural_defects) + .5))
->>> for patch, color in zip(bplot['boxes'], ['darksalmon', 'honeydew']):
-...     patch.set_facecolor(color)
->>> fig.savefig('docs/img/phenotype_group_counts.png')  # doctest: +SKIP
+>>> fig.savefig('docs/img/rere_phenotype_score_boxplot.png')  # doctest: +SKIP
 
 
-.. image:: /img/phenotype_group_counts.png
-   :alt: Phenotype group counts
+.. image:: /img/rere_phenotype_score_boxplot.png
+   :alt: Phenotype score distribution
    :align: center
    :width: 600px
+
+
+We see that the individuals with the point mutations feature structural defects
+than the individuals with the loss-of-function mutations.
+
+The box extends from the first quartile (Q1) to the third quartile (Q3) of the data,
+with a red line at the median.
+The whiskers extend from the box to the farthest data point
+lying within 1.5x the inter-quartile range (IQR) from the box.
diff --git a/src/gpsea/analysis/__init__.py b/src/gpsea/analysis/__init__.py
index 0b4f157b..1adf3c2e 100644
--- a/src/gpsea/analysis/__init__.py
+++ b/src/gpsea/analysis/__init__.py
@@ -1,9 +1,7 @@
-from . import predicate
-
 from ._api import CohortAnalysis, GenotypePhenotypeAnalysisResult, HpoMtcReport
+# TODO This should go away
 from ._config import CohortAnalysisConfiguration, configure_cohort_analysis, configure_default_protein_metadata_service, MtcStrategy
 from ._gp_analysis import apply_predicates_on_patients
-from ._mtc_filter import PhenotypeMtcFilter, UseAllTermsMtcFilter, SpecifiedTermsMtcFilter, HpoMtcFilter
 from ._util import prepare_hpo_terms_of_interest
 
 __all__ = [
@@ -11,7 +9,6 @@
     'CohortAnalysis', 'GenotypePhenotypeAnalysisResult',
     'CohortAnalysisConfiguration', 'MtcStrategy',
     'HpoMtcReport',
-    'PhenotypeMtcFilter', 'UseAllTermsMtcFilter', 'SpecifiedTermsMtcFilter', 'HpoMtcFilter',
     'apply_predicates_on_patients',
     'configure_default_protein_metadata_service',
     'prepare_hpo_terms_of_interest',
diff --git a/src/gpsea/analysis/_api.py b/src/gpsea/analysis/_api.py
index 5555e225..ca7bd8e4 100644
--- a/src/gpsea/analysis/_api.py
+++ b/src/gpsea/analysis/_api.py
@@ -7,9 +7,10 @@
 
 from gpsea.model import Patient
 from gpsea.preprocessing import ProteinMetadataService
-from .predicate import PolyPredicate, GenotypePolyPredicate, PatientCategory
-from .predicate.genotype import VariantPredicate, ProteinPredicates
-from .predicate.phenotype import P, PhenotypePolyPredicate, CountingPhenotypeScorer
+from .predicate import PolyPredicate, PatientCategory
+from .predicate.genotype import GenotypePolyPredicate, VariantPredicate, ProteinPredicates
+from .predicate.phenotype import P, PhenotypePolyPredicate
+from .pscore import PhenotypeScorer, CountingPhenotypeScorer
 
 PatientsByHPO = namedtuple('PatientsByHPO', field_names=['all_with_hpo', 'all_without_hpo'])
 
@@ -18,6 +19,7 @@ class HpoMtcReport:
     """
     Class to simplify reporting results of multiple testing filtering by HpoMtcFilter subclasses.
     """
+    # TODO: delete with no replacement.
 
     def __init__(
             self,
@@ -74,6 +76,7 @@ class GenotypePhenotypeAnalysisResult:
     """
     `GenotypePhenotypeAnalysisResult` summarizes results of genotype-phenotype correlation analysis of a cohort.
     """
+    # TODO: delete and use `gpsea.analysis.pcats.MultiPhenotypeAnalysisResult`.
 
     def __init__(
             self,
@@ -142,7 +145,7 @@ def phenotype_categories(self) -> typing.Sequence[PatientCategory]:
         Get a sequence of phenotype patient categories that can be investigated.
         """
         return self._phenotype_categories
-    
+
     @property
     def total_tests(self) -> int:
         """
@@ -239,6 +242,7 @@ class PhenotypeScoreAnalysisResult:
 
     See :ref:`Mann Whitney U Test for phenotype score <phenotype-score-stats>` for more background.
     """
+    # TODO: delete and use `gpsea.analysis.pscore.PhenotypeScoreAnalysisResult`
 
     def __init__(
         self,
@@ -292,6 +296,7 @@ class CohortAnalysis(metaclass=abc.ABCMeta):
     The class provides various methods to test genotype-phenotype correlations. All methods wrap results
     into :class:`GenotypePhenotypeAnalysisResult`.
     """
+    # TODO: remove and use the analyses described in `User Guide > Statistical tests`.
 
     def __init__(
         self,
@@ -308,7 +313,7 @@ def compare_hpo_vs_genotype(
             predicate: VariantPredicate,
     ) -> GenotypePhenotypeAnalysisResult:
         """
-        Bin patients according to a presence of at least one allele that matches `predicate` 
+        Bin patients according to a presence of at least one allele that matches `predicate`
         and test for genotype-phenotype correlations.
         """
         pass
@@ -352,6 +357,7 @@ def compare_genotype_vs_phenotype_group_count(
         gt_predicate: GenotypePolyPredicate,
         phenotype_group_terms: typing.Iterable[typing.Union[str, hpotk.TermId]],
     ) -> PhenotypeScoreAnalysisResult:
+        # TODO: separate into pscore module
         assert isinstance(gt_predicate, GenotypePolyPredicate)
         assert gt_predicate.n_categorizations() == 2
 
@@ -369,7 +375,7 @@ def compare_genotype_vs_phenotype_group_count(
     def compare_genotype_vs_phenotype_score(
         self,
         gt_predicate: GenotypePolyPredicate,
-        phenotype_scorer: typing.Callable[[Patient,], float],
+        phenotype_scorer: PhenotypeScorer,
     ) -> PhenotypeScoreAnalysisResult:
         """
         Score the patients with a phenotype scoring method and test for correlation between the genotype group
@@ -386,8 +392,6 @@ def compare_genotype_vs_cohort_phenotypes(
         self,
         gt_predicate: GenotypePolyPredicate,
     ) -> GenotypePhenotypeAnalysisResult:
-        # TODO: if we had access to the cohort, it would be trivial to prepare
-        # all phenotype predicates and to call `self.compare_genotype_vs_phenotypes`.
         pass
 
     @abc.abstractmethod
diff --git a/src/gpsea/analysis/_config.py b/src/gpsea/analysis/_config.py
index cbbed148..3fb67fc7 100644
--- a/src/gpsea/analysis/_config.py
+++ b/src/gpsea/analysis/_config.py
@@ -8,8 +8,8 @@
 from gpsea.model import Cohort
 from gpsea.preprocessing import ProteinMetadataService, UniprotProteinMetadataService, ProteinAnnotationCache, \
     ProtCachingMetadataService
+from .mtc_filter import PhenotypeMtcFilter, UseAllTermsMtcFilter, SpecifiedTermsMtcFilter, HpoMtcFilter
 from ._api import CohortAnalysis
-from ._mtc_filter import PhenotypeMtcFilter, UseAllTermsMtcFilter, SpecifiedTermsMtcFilter, HpoMtcFilter
 from ._gp_analysis import FisherExactAnalyzer
 from ._gp_impl import GpCohortAnalysis
 
diff --git a/src/gpsea/analysis/_gp_analysis.py b/src/gpsea/analysis/_gp_analysis.py
index 63a43f18..35d14305 100644
--- a/src/gpsea/analysis/_gp_analysis.py
+++ b/src/gpsea/analysis/_gp_analysis.py
@@ -6,11 +6,12 @@
 from statsmodels.stats import multitest
 
 from gpsea.model import Patient
+from .mtc_filter import PhenotypeMtcFilter
+from .predicate import PatientCategory
+from .predicate.genotype import GenotypePolyPredicate
+from .predicate.phenotype import PhenotypePolyPredicate, P
 from ._api import GenotypePhenotypeAnalysisResult, HpoMtcReport
-from ._mtc_filter import PhenotypeMtcFilter
 from ._stats import run_fisher_exact, run_recessive_fisher_exact
-from .predicate import GenotypePolyPredicate, PatientCategory
-from .predicate.phenotype import PhenotypePolyPredicate, P
 
 
 def apply_predicates_on_patients(
@@ -44,33 +45,31 @@ def apply_predicates_on_patients(
         - a mapping from phenotype :class:`P` to a data frame with counts of patients
           in i-th phenotype category and j-th genotype category where i and j are rows and columns of the data frame
     """
+    # TODO: delete with no replacement.
     phenotypes = set()
     categories = set()
     for predicate in pheno_predicates:
         categories.update(predicate.get_categories())
-        phenotypes.update(predicate.phenotypes)
+        phenotypes.add(predicate.phenotype)
 
     n_usable_patients = pd.Series(data=0, index=pd.Index(phenotypes))
 
     # Apply genotype and phenotype predicates
     counts = {}
     for ph_predicate in pheno_predicates:
-        phenotypes = ph_predicate.phenotypes
-
-        for phenotype in phenotypes:
-            if phenotype not in counts:
-                # Make an empty frame for keeping track of the counts.
-                counts[phenotype] = pd.DataFrame(
-                    data=0,
-                    index=pd.Index(
-                        data=ph_predicate.get_categories(),
-                        name=ph_predicate.get_question(),
-                    ),
-                    columns=pd.Index(
-                        data=gt_predicate.get_categories(),
-                        name=gt_predicate.get_question(),
-                    ),
-                )
+        if ph_predicate.phenotype not in counts:
+            # Make an empty frame for keeping track of the counts.
+            counts[ph_predicate.phenotype] = pd.DataFrame(
+                data=0,
+                index=pd.Index(
+                    data=ph_predicate.get_categories(),
+                    name=ph_predicate.get_question(),
+                ),
+                columns=pd.Index(
+                    data=gt_predicate.get_categories(),
+                    name=gt_predicate.get_question(),
+                ),
+            )
 
         for patient in patients:
             pheno_cat = ph_predicate.test(patient)
@@ -87,6 +86,7 @@ class GPAnalyzer(typing.Generic[P], metaclass=abc.ABCMeta):
     """
     `GPAnalyzer` calculates p values for genotype-phenotype correlation of phenotypic features of interest.
     """
+    # TODO: delete with no replacement.
 
     @abc.abstractmethod
     def analyze(
@@ -115,6 +115,7 @@ class FisherExactAnalyzer(typing.Generic[P], GPAnalyzer[P]):
       should be applied.
     :param mtc_alpha: a `float` in range :math:`(0, 1]` with the multiple testing correction alpha value.
     """
+    # TODO: delete and use `gpsea.analysis.pcats.MultiPhenotypeAnalysis`.
 
     def __init__(
         self,
diff --git a/src/gpsea/analysis/_gp_impl.py b/src/gpsea/analysis/_gp_impl.py
index a35c6b96..c6251f8b 100644
--- a/src/gpsea/analysis/_gp_impl.py
+++ b/src/gpsea/analysis/_gp_impl.py
@@ -6,10 +6,10 @@
 
 from scipy.stats import mannwhitneyu
 
-from gpsea.model import Cohort, Patient
+from gpsea.model import Cohort
 from gpsea.preprocessing import ProteinMetadataService
-from .predicate import GenotypePolyPredicate
-from .predicate.genotype import VariantPredicate
+from .pscore import PhenotypeScorer
+from .predicate.genotype import GenotypePolyPredicate, VariantPredicate
 from .predicate.genotype import boolean_predicate as wrap_as_boolean_predicate
 from .predicate.genotype import groups_predicate as wrap_as_groups_predicate
 from .predicate.genotype import recessive_predicate as wrap_as_recessive_predicate
@@ -131,7 +131,7 @@ def _prepare_disease_predicates(
     def compare_genotype_vs_phenotype_score(
         self,
         gt_predicate: GenotypePolyPredicate,
-        phenotype_scorer: typing.Callable[[Patient,], float],
+        phenotype_scorer: PhenotypeScorer,
     ) -> PhenotypeScoreAnalysisResult:
         idx = pd.Index(tuple(p.patient_id for p in self._patient_list), name='patient_id')
         data = pd.DataFrame(
@@ -144,7 +144,7 @@ def compare_genotype_vs_phenotype_score(
         for patient in self._patient_list:
             gt_cat = gt_predicate.test(patient)
             data.loc[patient.patient_id, 'genotype'] = None if gt_cat is None else gt_cat.category.cat_id
-            data.loc[patient.patient_id, 'phenotype'] = phenotype_scorer(patient)
+            data.loc[patient.patient_id, 'phenotype'] = phenotype_scorer.score(patient)
 
         # To improve the determinism
         data.sort_index(inplace=True)
diff --git a/src/gpsea/analysis/_inheritance.py b/src/gpsea/analysis/_inheritance.py
deleted file mode 100644
index 5c0c4236..00000000
--- a/src/gpsea/analysis/_inheritance.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import abc
-import typing
-
-from gpsea.model import Variant
-
-
-class InheritanceCompatibilityChecker(metaclass=abc.ABCMeta):
-
-    @abc.abstractmethod
-    def check_compatibility(self, patient: typing.Sequence[Variant]) -> bool:
-        pass
-
-
-class AutosomalDominantChecker(InheritanceCompatibilityChecker):
-
-    def check_compatibility(self, patient: typing.Sequence[Variant]) -> bool:
-        pass
-
-
-class AutosomalRecessiveChecker(InheritanceCompatibilityChecker):
-
-    def check_compatibility(self, patient: typing.Sequence[Variant]) -> bool:
-        pass
-
-
-class MitochondrialChecker(InheritanceCompatibilityChecker):
-
-    def check_compatibility(self, patient: typing.Sequence[Variant]) -> bool:
-        pass
diff --git a/src/gpsea/analysis/_stats.py b/src/gpsea/analysis/_stats.py
index f2a480b9..1364e7d5 100644
--- a/src/gpsea/analysis/_stats.py
+++ b/src/gpsea/analysis/_stats.py
@@ -6,6 +6,8 @@
 import numpy as np
 import scipy
 
+# TODO: delete the module
+
 
 class MultiFisherExact(metaclass=abc.ABCMeta):
 
diff --git a/src/gpsea/analysis/_test_fisherExact.py b/src/gpsea/analysis/_test_fisherExact.py
index aba6ff26..fd530f5c 100644
--- a/src/gpsea/analysis/_test_fisherExact.py
+++ b/src/gpsea/analysis/_test_fisherExact.py
@@ -8,6 +8,8 @@
 def MultiExact() -> PythonMultiFisherExact:
     return PythonMultiFisherExact()
 
+# TODO: remove
+
 @pytest.mark.parametrize('table, raise_error, pVal', 
                         ([[[0,0,0],[0,0,0]], pytest.raises(ValueError), None],
                         [[[2, 1, 0],[3, 0, 2]], does_not_raise(), 0.6429],
diff --git a/src/gpsea/analysis/_util.py b/src/gpsea/analysis/_util.py
index 1e0bdc77..2c3445b5 100644
--- a/src/gpsea/analysis/_util.py
+++ b/src/gpsea/analysis/_util.py
@@ -24,6 +24,7 @@ def prepare_hpo_terms_of_interest(
     :param min_n_of_patients_with_term: the minimum number of patients that must feature an HPO term
         (either directly or indirectly) for the term to be included in the analysis.
     """
+    # TODO remove in favor of `gpsea.analysis.predicate.phenotype`
     present_count = Counter()
     excluded_count = Counter()
 
diff --git a/src/gpsea/analysis/mtc_filter/__init__.py b/src/gpsea/analysis/mtc_filter/__init__.py
new file mode 100644
index 00000000..4497ac34
--- /dev/null
+++ b/src/gpsea/analysis/mtc_filter/__init__.py
@@ -0,0 +1,14 @@
+"""
+The `gpsea.analysis.mtc_filter` provides the strategies for reducing multiple testing burden
+by pre-filtering the phenotype terms to test in advance.
+
+See :ref:`MTC filters <mtc-filters>` section for more info.
+"""
+
+from ._impl import PhenotypeMtcFilter, PhenotypeMtcResult
+from ._impl import UseAllTermsMtcFilter, SpecifiedTermsMtcFilter, HpoMtcFilter
+
+__all__ = [
+    'PhenotypeMtcFilter', 'PhenotypeMtcResult',
+    'UseAllTermsMtcFilter', 'SpecifiedTermsMtcFilter', 'HpoMtcFilter',
+]
diff --git a/src/gpsea/analysis/_mtc_filter.py b/src/gpsea/analysis/mtc_filter/_impl.py
similarity index 68%
rename from src/gpsea/analysis/_mtc_filter.py
rename to src/gpsea/analysis/mtc_filter/_impl.py
index 2025c0ed..b15e4bc6 100644
--- a/src/gpsea/analysis/_mtc_filter.py
+++ b/src/gpsea/analysis/mtc_filter/_impl.py
@@ -6,10 +6,62 @@
 import hpotk
 import pandas as pd
 
-from .predicate import PatientCategories, PatientCategory, GenotypePolyPredicate
+from ..predicate import PatientCategories, PatientCategory
+from ..predicate.genotype import GenotypePolyPredicate
+from ..predicate.phenotype import P
 
 
-class PhenotypeMtcFilter(metaclass=abc.ABCMeta):
+class PhenotypeMtcResult:
+    """
+    `PhenotypeMtcResult` represents a result of :class:`PhenotypeMtcFilter` for a single phenotype.
+
+    The phenotype can either pass the filter, in order to be included in the downstream analysis (:meth:`is_passed`)
+    of be filtered out (:meth:`is_filtered_out`) in which case :attr:`reason` must be available.
+    """
+
+    @staticmethod
+    def ok() -> "PhenotypeMtcResult":
+        # A singleton would be nice here...
+        return PhenotypeMtcResult(status=True, reason=None)
+
+    @staticmethod
+    def fail(reason: str) -> "PhenotypeMtcResult":
+        return PhenotypeMtcResult(status=False, reason=reason)
+
+    def __init__(
+        self,
+        status: bool,
+        reason: typing.Optional[str],
+    ):
+        self._status = status
+        self._reason = reason
+
+    def is_passed(self) -> bool:
+        return self._status
+    
+    def is_filtered_out(self) -> bool:
+        return not self._status
+    
+    @property
+    def reason(self) -> typing.Optional[str]:
+        return self._reason
+    
+    def __eq__(self, value: object) -> bool:
+        return isinstance(value, PhenotypeMtcResult) \
+            and self._status == value._status \
+            and self._reason == value._reason
+
+    def __hash__(self) -> int:
+        return hash((self._status, self._reason))
+    
+    def __str__(self) -> str:
+        return f'PhenotypeMtcResult(status={self._status}, reason={self._reason})'
+    
+    def __repr__(self) -> str:
+        return str(self)
+
+
+class PhenotypeMtcFilter(typing.Generic[P], metaclass=abc.ABCMeta):
     """
     `PhenotypeMtcFilter` decides which phenotypes should be tested and which phenotypes
     are not worth testing in order to reduce the multiple testing burden.
@@ -20,6 +72,24 @@ class PhenotypeMtcFilter(metaclass=abc.ABCMeta):
     categorized according to the HPO term.
     """
 
+    @abc.abstractmethod
+    def filter(
+        self,
+        gt_predicate: GenotypePolyPredicate,
+        phenotypes: typing.Sequence[P],
+        counts: typing.Sequence[pd.DataFrame],
+    ) -> typing.Sequence[PhenotypeMtcResult]:
+        """
+        Test if the phenotype with given counts should be included in the downstream analysis.
+
+        :param gt_predicate: the predicate that produced the columns of the `count` data frame.
+        :param phenotypes: the tested phenotypes.
+        :param counts: a sequence of 2D data frames for the tested phenotypes.
+            Each data frame corrresponds to a genotype/phenotype contingency matrix.
+        :returns: a sequence of filter results for the input `phenotypes`.
+        """
+        pass
+
     @abc.abstractmethod
     def filter_terms_to_test(
         self,
@@ -58,13 +128,25 @@ def filter_method_name(self) -> str:
         pass
 
 
-class UseAllTermsMtcFilter(PhenotypeMtcFilter):
+class UseAllTermsMtcFilter(PhenotypeMtcFilter[typing.Any]):
     """
     `UseAllTermsMtcFilter` filters out *no* phenotype terms.
 
     See :ref:`use-all-terms-strategy` section for more info.
     """
 
+    def __init__(self):
+        self._ok = PhenotypeMtcResult.ok()
+
+    def filter(
+        self,
+        gt_predicate: GenotypePolyPredicate,
+        phenotypes: typing.Sequence[P],
+        counts: typing.Sequence[pd.DataFrame],
+    ) -> typing.Sequence[PhenotypeMtcResult]:
+        # Always OK! 😏
+        return tuple(self._ok for _ in phenotypes)
+
     def filter_terms_to_test(
         self,
         gt_predicate: GenotypePolyPredicate,
@@ -85,7 +167,7 @@ def filter_method_name(self) -> str:
         return "All HPO terms"
 
 
-class SpecifiedTermsMtcFilter(PhenotypeMtcFilter):
+class SpecifiedTermsMtcFilter(PhenotypeMtcFilter[hpotk.TermId]):
     """
     `SpecifiedTermsMtcFilter` limits the HPO terms to be tested to a selection of provided terms.
 
@@ -108,9 +190,25 @@ def __init__(
             hpo: reference to HPO ontology object
             terms_to_test: an iterable of TermIds representing the terms to test
         """
+        self._ok = PhenotypeMtcResult.ok()
+        self._fail = PhenotypeMtcResult.fail("Non-specified term")
         self._hpo = hpo
         self._terms_to_test_set = set(terms_to_test)
 
+    def filter(
+        self,
+        gt_predicate: GenotypePolyPredicate,
+        phenotypes: typing.Sequence[P],
+        counts: typing.Sequence[pd.DataFrame],
+    ) -> typing.Sequence[PhenotypeMtcResult]:
+        results = []
+        for phenotype in phenotypes:
+            if phenotype in self._terms_to_test_set:
+                results.append(self._ok)
+            else:
+                results.append(self._fail)
+        return tuple(results)
+
     def filter_terms_to_test(
         self,
         gt_predicate: GenotypePolyPredicate,
@@ -153,12 +251,14 @@ def filter_method_name(self) -> str:
         return "Specified terms MTC filter"
 
 
-class HpoMtcFilter(PhenotypeMtcFilter):
+class HpoMtcFilter(PhenotypeMtcFilter[hpotk.TermId]):
     """
     `HpoMtcFilter` decides which phenotypes should be tested and which phenotypes are not worth testing.
 
     The class leverages a number of heuristics and domain decisions.
     See :ref:`hpo-mtc-filter-strategy` section for more info.
+
+    We recommend creating an instance using the :func:`default_filter` static factory method.
     """
 
     # TODO: this has been here before. Is it still valid?
@@ -176,6 +276,7 @@ class HpoMtcFilter(PhenotypeMtcFilter):
     def default_filter(
         hpo: hpotk.MinimalOntology,
         term_frequency_threshold: float,
+        phenotypic_abnormality: hpotk.TermId = hpotk.constants.hpo.base.PHENOTYPIC_ABNORMALITY,
     ):
         """
         Args:
@@ -184,21 +285,25 @@ def default_filter(
               for an HPO term to have in at least one of the genotype groups
               (e.g., 22% in missense and 3% in nonsense genotypes would be OK,
               but not 13% missense and 10% nonsense genotypes if the threshold is 0.2)
+            phenotypic_abnormality: a :class:`~hpotk.TermId` corresponding to the root of HPO phenotype hierarchy.
+              Having to specify this option should be very rarely, if ever.
         """
-        # ## Collect sets of top-level and second-level terms
+        # The very top of the ontology is completely uninteresting.
+        # Knowing about association between `All`, or `Phenotypic abnormality` tells us very little
+        top_level_terms = {phenotypic_abnormality, }
+        top_level_terms.update(hpo.graph.get_ancestors(phenotypic_abnormality))
+
+        # Collect sets of the 1st level terms
         # e.g., Abnormality of the cardiovascular system (HP:0001626)
-        top_level_terms = set(
-            hpo.graph.get_children(hpotk.constants.hpo.base.PHENOTYPIC_ABNORMALITY)
+        first_level_terms = set(
+            hpo.graph.get_children(phenotypic_abnormality)
         )
 
-        # e.g. Abnormality of the vasculature (HP:0002597)
-        second_level_terms = set()
-        third_level_terms = set()
-
         # the second level (children of the top level), contains terms we want to keep,
         # such as HP:0001611 Hypernasal speech, but it also contains "general" terms that
         # we skip according to this heuristic, e.g., HP:0030680	Abnormal cardiovascular system morphology
-        for t1 in top_level_terms:
+        second_level_terms = set()
+        for t1 in first_level_terms:
             for t2 in hpo.graph.get_children(t1):
                 label = hpo.get_term_name(t2)
                 if label is not None and label.startswith("Abnormal"):
@@ -207,6 +312,7 @@ def default_filter(
         # The third level (children of the second level), contains terms we want to keep,
         # such as HP:0031109 Agalactia, but it also contains "general" terms
         # that we skip according to this heuristic, e.g., HP:0006500 Abnormal lower limb epiphysis morphology
+        third_level_terms = set()
         for t2 in second_level_terms:
             for t3 in hpo.graph.get_children(t2):
                 label = hpo.get_term_name(t3)
@@ -217,6 +323,7 @@ def default_filter(
         # In the future, one could do this on a per-level basis.
         general_hpo_term_set = set()
         general_hpo_term_set.update(top_level_terms)
+        general_hpo_term_set.update(first_level_terms)
         general_hpo_term_set.update(second_level_terms)
         general_hpo_term_set.update(third_level_terms)
 
@@ -248,6 +355,107 @@ def __init__(
         # thus if the total count is 6 or less, there is no power - CAN WE SIMPLIFY?
 
         self._general_hpo_terms = set(general_hpo_terms)
+        self._ok = PhenotypeMtcResult.ok()
+
+    def filter(
+        self,
+        gt_predicate: GenotypePolyPredicate,
+        phenotypes: typing.Sequence[hpotk.TermId],
+        counts: typing.Sequence[pd.DataFrame],
+    ) -> typing.Sequence[PhenotypeMtcResult]:
+        p_to_idx = {p: i for i, p in enumerate(phenotypes)}
+        
+        results = [None for _ in range(len(phenotypes))]
+        categories = tuple(gt_predicate.get_categories())
+        for term_id in self._get_ordered_terms(phenotypes):
+            try:
+                idx = p_to_idx[term_id]
+            except KeyError:
+                # The procedure for getting the ordered terms produced a term
+                # that is not included in `phenotypes`.
+                # This is normal and we just move on.
+                continue
+
+            if term_id in self._general_hpo_terms:
+                results[idx] = PhenotypeMtcResult.fail("Skipping general term")
+                continue
+            
+            if not self._hpo.graph.is_ancestor_of(
+                hpotk.constants.hpo.base.PHENOTYPIC_ABNORMALITY, term_id
+            ):
+                results[idx] = PhenotypeMtcResult.fail("Skipping non phenotype term")
+                continue
+
+            # get total number of observations
+            # if term_id not in all_counts:
+            #     reason_for_filtering_out["Skipping non-target term"] += 1
+            #     continue
+
+            contingency_matrix = counts[idx]
+            total = contingency_matrix.sum().sum()
+            max_freq = HpoMtcFilter.get_maximum_group_observed_HPO_frequency(
+                contingency_matrix
+            )
+            if max_freq < self._hpo_term_frequency_filter:
+                reason = (
+                    "Skipping term with maximum frequency "
+                    f"that was less than threshold {self._hpo_term_frequency_filter}"
+                )
+                results[idx] = PhenotypeMtcResult.fail(reason)
+                continue
+
+            if contingency_matrix.shape == (2, 2) and total < 7:
+                reason = f"Skipping term with only {total} observations (not powered for 2x2)"
+                results[idx] = PhenotypeMtcResult.fail(reason)
+                continue
+
+            # todo -- similar for (3,2)
+            if not HpoMtcFilter.some_cell_has_greater_than_one_count(contingency_matrix):
+                reason = "Skipping term because no genotype has more than one observed HPO count"
+                results[idx] = PhenotypeMtcResult.fail(reason)
+                continue
+
+            elif HpoMtcFilter.genotypes_have_same_hpo_proportions(
+                contingency_matrix,
+                categories,
+            ):
+                reason = "Skipping term because all genotypes have same HPO observed proportions"
+                results[idx] = PhenotypeMtcResult.fail(reason)
+                continue
+
+            elif HpoMtcFilter.one_genotype_has_zero_hpo_observations(
+                contingency_matrix,
+                categories,
+            ):
+                reason = "Skipping term because one genotype had zero observations"
+                results[idx] = PhenotypeMtcResult.fail(reason)
+                continue
+
+            # TODO: the code below actually did not work.
+            # for child_term_id in self._hpo.graph.get_children(term_id):
+            #     if child_term_id in tested_counts_pf:
+            #         if tested_counts_pf[child_term_id].equals(contingency_matrix):
+            #             # TODO: should we make the match fuzzier by adding a tolerance instead of the exact matches?
+            #             reason = "Child term with same counts previously tested"
+            #             reason_for_filtering_out[reason] += 1
+            #             continue
+
+            # The term should be tested if we get here.
+            results[idx] = self._ok
+
+        # There should be no `None` elements in `results` at this point.
+        if any(r is None for r in results):
+            missing = []
+            for i, result in enumerate(results):
+                if result is None:
+                    term_name = self._hpo.get_term_name(phenotypes[i])
+                    missing.append(term_name)
+            
+            print('BLAAAAAA')
+            msg = 'Missing results for {}'.format(', '.join(missing))
+            raise ValueError(msg)
+
+        return tuple(results)
 
     def filter_terms_to_test(
         self,
@@ -275,6 +483,7 @@ def filter_terms_to_test(
         )  # key is an HP id, value is a tuple with counts, i.e.,
 
         # iterate through the terms in the sorted order, starting from the leaves of the induced graph.
+        categories = tuple(gt_predicate.get_categories())
         for term_id in self._get_ordered_terms(all_counts.keys()):
             if term_id in self._general_hpo_terms:
                 reason_for_filtering_out["Skipping general term"] += 1
@@ -317,7 +526,7 @@ def filter_terms_to_test(
 
             elif HpoMtcFilter.genotypes_have_same_hpo_proportions(
                 counts_frame,
-                gt_predicate.get_categories(),
+                categories,
             ):
                 reason = "Skipping term because all genotypes have same HPO observed proportions"
                 reason_for_filtering_out[reason] += 1
@@ -325,7 +534,7 @@ def filter_terms_to_test(
 
             elif HpoMtcFilter.one_genotype_has_zero_hpo_observations(
                 counts_frame,
-                gt_predicate.get_categories(),
+                categories,
             ):
                 reason = "Skipping term because one genotype had zero observations"
                 reason_for_filtering_out[reason] += 1
@@ -454,26 +663,12 @@ def _get_ordered_terms(
         ("leaves" in the graph of observed terms, but not necessarily leaves in the entire HPO graph)
         to the most general terms.
         """
-        # When building the induced graph based on the `term_ids`, we do not want to
-        # include the term for *Phenotypic abnormality* and *All*.
-        ancestors_to_ignore = set(
-            self._hpo.graph.get_ancestors(
-                hpotk.constants.hpo.base.PHENOTYPIC_ABNORMALITY
-            )
-        )
-        ancestors_to_ignore.add(hpotk.constants.hpo.base.PHENOTYPIC_ABNORMALITY)
-
         # A "leaf" refers to an annotated term none of whose children is annotated.
         # May or may not be leaf in HPO graph
         all_term_ids = set()
         for term_id in term_ids:
             all_term_ids.add(term_id)
-            all_term_ids.update(
-                filter(
-                    lambda t: t not in ancestors_to_ignore,
-                    self._hpo.graph.get_ancestors(term_id),
-                )
-            )
+            all_term_ids.update(self._hpo.graph.get_ancestors(term_id))
 
         # get all of the leaf terms
         hpo_leaf_term_id_set = set()
@@ -489,7 +684,7 @@ def _get_ordered_terms(
 
         ordered_term_list = list()  # reset, if needed
         queue = deque(hpo_leaf_term_id_set)
-        seen_terms = set(ancestors_to_ignore)
+        seen_terms = set()
         while len(queue) > 0:
             term_id = queue.popleft()
             if term_id not in seen_terms:
diff --git a/src/gpsea/analysis/pcats/__init__.py b/src/gpsea/analysis/pcats/__init__.py
new file mode 100644
index 00000000..6a82d92f
--- /dev/null
+++ b/src/gpsea/analysis/pcats/__init__.py
@@ -0,0 +1,29 @@
+"""
+The `gpsea.analysis.pcats` tests the association between genotype and phenotype groups,
+if the groups can be defined in terms of discrete, unique, and non-overlapping categories.
+
+Each individual is assigned into a genotype and phenotype group
+using :class:`~gpsea.analysis.predicate.genotype.GenotypePolyPredicate`
+and :class:`~gpsea.analysis.predicate.phenotype.PhenotypePolyPredicate` respectively.
+
+A contingency matrix with group counts is prepared
+and the counts are tested for association using :class:`~gpsea.analysis.pcats.stats.CountStatistic`,
+such as :class:`~gpsea.analysis.pcats.stats.ScipyFisherExact`.
+
+It is typical to test several phenotype groups at the same time.
+Therefore, we must correct for multiple testing to prevent false positive findings.
+See :ref:`MTC section <mtc>` for more info.
+
+The results are provided as :class:`MultiPhenotypeAnalysisResult`
+(or more specific :class:`HpoTermAnalysisResult` for :class:`HpoTermAnalysis`).
+"""
+
+from ._impl import MultiPhenotypeAnalysis, MultiPhenotypeAnalysisResult
+from ._impl import DiseaseAnalysis
+from ._impl import HpoTermAnalysis, HpoTermAnalysisResult
+
+__all__ = [
+    'MultiPhenotypeAnalysis', 'MultiPhenotypeAnalysisResult',
+    'DiseaseAnalysis',
+    'HpoTermAnalysis', 'HpoTermAnalysisResult',
+]
diff --git a/src/gpsea/analysis/pcats/_impl.py b/src/gpsea/analysis/pcats/_impl.py
new file mode 100644
index 00000000..61841a51
--- /dev/null
+++ b/src/gpsea/analysis/pcats/_impl.py
@@ -0,0 +1,561 @@
+import abc
+import math
+import typing
+
+from collections import Counter
+
+import hpotk
+import numpy as np
+import pandas as pd
+
+from statsmodels.stats import multitest
+
+from gpsea.model import Patient
+from gpsea.analysis.pcats.stats import CountStatistic
+from ..predicate import PatientCategory
+from ..predicate.genotype import GenotypePolyPredicate
+from ..predicate.phenotype import P, PhenotypePolyPredicate, prepare_predicates_for_terms_of_interest
+from ..mtc_filter import PhenotypeMtcFilter, PhenotypeMtcResult
+
+
+def apply_predicates_on_patients(
+    patients: typing.Iterable[Patient],
+    gt_predicate: GenotypePolyPredicate,
+    pheno_predicates: typing.Sequence[PhenotypePolyPredicate[P]],
+) -> typing.Tuple[
+    typing.Sequence[int],
+    typing.Sequence[pd.DataFrame],
+]:
+    """
+    Apply the phenotype predicates `pheno_predicates` and the genotype predicate `gt_predicate`
+    to bin the `patients` into categories.
+
+    Note, it may not be possible to bin *all* patients with a genotype/phenotype pair,
+    since a predicate is allowed to return `None` (e.g. if it bins the patient into MISSENSE or NONSENSE groups
+    but the patient has no MISSENSE or NONSENSE variants). If this happens, the patient will not be "usable"
+    for the phenotype `P`.
+
+    Args:
+        patients: a sequence of the patients to bin into categories
+        gt_predicate: a genotype predicate to apply
+        pheno_predicates: a sequence with the phenotype predicates to apply
+
+    Returns:
+        a tuple with 2 items:
+        - a sequence with counts of patients that could be binned according to the phenotype `P`.
+        - a sequence with data frames with counts of patients in i-th phenotype category
+          and j-th genotype category where i and j are rows and columns of the data frame.
+    """
+    n_usable_patient_counter = Counter()
+
+    # Apply genotype and phenotype predicates
+    count_dict = {}
+    for ph_predicate in pheno_predicates:
+        if ph_predicate.phenotype not in count_dict:
+            # Make an empty frame for keeping track of the counts.
+            count_dict[ph_predicate.phenotype] = pd.DataFrame(
+                data=0,
+                index=pd.Index(
+                    data=ph_predicate.get_categories(),
+                    name=ph_predicate.get_question(),
+                ),
+                columns=pd.Index(
+                    data=gt_predicate.get_categories(),
+                    name=gt_predicate.get_question(),
+                ),
+            )
+
+        for patient in patients:
+            pheno_cat = ph_predicate.test(patient)
+            geno_cat = gt_predicate.test(patient)
+
+            if pheno_cat is not None and geno_cat is not None:
+                count_dict[pheno_cat.phenotype].loc[
+                    pheno_cat.category, geno_cat.category
+                ] += 1
+                n_usable_patient_counter[pheno_cat.phenotype] += 1
+
+    # Convert dicts to numpy arrays
+    n_usable_patients = [
+        n_usable_patient_counter[ph_predicate.phenotype]
+        for ph_predicate in pheno_predicates
+    ]
+
+    counts = [count_dict[ph_predicate.phenotype] for ph_predicate in pheno_predicates]
+
+    return n_usable_patients, counts
+
+
+class MultiPhenotypeAnalysisResult(typing.Generic[P], metaclass=abc.ABCMeta):
+    """
+    `MultiPhenotypeAnalysisResult` reports the outcome of :class:`MultiPhenotypeAnalysis`.
+
+    The result consists of several arrays with items
+    with the order determined by the phenotype order.
+    """
+
+    @property
+    @abc.abstractmethod
+    def phenotypes(self) -> typing.Sequence[P]:
+        """
+        Get the tested phenotypes.
+        """
+        pass
+
+    @property
+    @abc.abstractmethod
+    def n_usable(self) -> typing.Sequence[int]:
+        """
+        Get a sequence of numbers of patients where the phenotype was assessable,
+        and are, thus, usable for genotype-phenotype correlation analysis.
+        """
+        pass
+
+    @property
+    @abc.abstractmethod
+    def all_counts(self) -> typing.Sequence[pd.DataFrame]:
+        """
+        Get a :class:`~pandas.DataFrame` sequence where each `DataFrame` includes the counts of patients
+        in genotype and phenotype groups.
+
+        An example for a genotype predicate that bins into two categories (`Yes` and `No`) based on presence
+        of a missense variant in transcript `NM_123456.7`, and phenotype predicate that checks
+        presence/absence of `HP:0001166` (a phenotype term)::
+
+                       Has MISSENSE_VARIANT in NM_123456.7
+                       No       Yes
+            Present
+            Yes        1        13
+            No         7        5
+
+        The rows correspond to the phenotype categories, and the columns represent the genotype categories.
+        """
+        pass
+
+    @property
+    @abc.abstractmethod
+    def pvals(self) -> typing.Sequence[float]:
+        """
+        Get a sequence of nominal p values for each tested HPO term.
+        The sequence includes a `NaN` value for each input phenotype that was *not* tested.
+        """
+        pass
+
+    @property
+    @abc.abstractmethod
+    def corrected_pvals(self) -> typing.Optional[typing.Sequence[float]]:
+        """
+        Get a sequence with p values for each tested HPO term after multiple testing correction
+        or `None` if the correction was not applied.
+        The sequence includes a `NaN` value for each input phenotype that was *not* tested.
+        """
+        pass
+
+    @property
+    def total_tests(self) -> int:
+        """
+        Get total count of tests that were run for this analysis.
+        """
+        return sum(1 for pval in self.pvals if not math.isnan(pval))
+
+    def summarize(
+        self,
+        hpo: hpotk.MinimalOntology,
+        target_phenotype_category: PatientCategory,
+    ) -> pd.DataFrame:
+        """
+        Create a data frame with summary of the genotype phenotype analysis.
+
+        The *rows* of the frame correspond to the analyzed HPO terms.
+
+        The columns of the data frame have `Count` and `Percentage` per used genotype predicate.
+
+        **Example**
+
+        If we use :class:`~gpsea.analysis.predicate.genotype.VariantEffectPredicate`
+        which can compare phenotype with and without a missense variant, we will have a data frame
+        that looks like this::
+
+            MISSENSE_VARIANT on `NM_1234.5`                       No                  Yes
+                                                                  Count    Percent    Count    Percent   Corrected p value  p value
+            Arachnodactyly [HP:0001166]                           1/10         10%    13/16        81%   0.020299           0.000781
+            Abnormality of the musculature [HP:0003011]           6/6         100%    11/11       100%   1.000000           1.000000
+            Abnormal nervous system physiology [HP:0012638]       9/9         100%    15/15       100%   1.000000           1.000000
+            ...                                                   ...      ...        ...      ...       ...                ...
+        """
+
+        # TODO: this should be plotted by a formatter.
+        # Row index: a list of tested HPO terms
+        pheno_idx = pd.Index(self.phenotypes)
+        # Column index: multiindex of counts and percentages for all genotype predicate groups
+        gt_idx = pd.MultiIndex.from_product(
+            # TODO: fix the below
+            iterables=(self._gt_predicate.get_categories(), ("Count", "Percent")),
+            names=(self._gt_predicate.get_question(), None),
+        )
+
+        # We'll fill this frame with data
+        df = pd.DataFrame(index=pheno_idx, columns=gt_idx)
+
+        for phenotype, count in zip(self.phenotypes, self.all_counts):
+            gt_totals = (
+                count.sum()
+            )  # Sum across the phenotype categories (collapse the rows).
+            for gt_cat in count.columns:
+                cnt = count.loc[target_phenotype_category, gt_cat]
+                total = gt_totals[gt_cat]
+                df.loc[phenotype, (gt_cat, "Count")] = f"{cnt}/{total}"
+                pct = 0 if total == 0 else round(cnt * 100 / total)
+                df.loc[phenotype, (gt_cat, "Percent")] = f"{pct}%"
+
+        # Add columns with p values and corrected p values (if present)
+        p_val_col_name = "p values"
+        corrected_p_val_col_name = "Corrected p values"
+        if self.corrected_pvals is not None:
+            df.insert(
+                df.shape[1], ("", corrected_p_val_col_name), self.corrected_pvals
+            )
+        df.insert(df.shape[1], ("", p_val_col_name), self.pvals)
+
+        # Format the index values: `HP:0001250` -> `Seizure [HP:0001250]` if the index members are HPO terms
+        # or just use the term ID CURIE otherwise (e.g. `OMIM:123000`).
+        labeled_idx = df.index.map(
+            lambda term_id: MultiPhenotypeAnalysisResult._format_term_id(hpo, term_id)
+        )
+
+        # Last, sort by corrected p value or just p value
+        df = df.set_index(labeled_idx)
+        if self.corrected_pvals is not None:
+            return df.sort_values(
+                by=[("", corrected_p_val_col_name), ("", p_val_col_name)]
+            )
+        else:
+            return df.sort_values(by=("", p_val_col_name))
+
+    @staticmethod
+    def _format_term_id(
+        hpo: hpotk.MinimalOntology,
+        term_id: hpotk.TermId,
+    ) -> str:
+        """
+        Format a `term_id` as a `str`. HPO term ID is formatted as `<name> [<term_id>]` whereas other term IDs
+        are formatted as CURIEs (e.g. `OMIM:123000`).
+        """
+        if term_id.prefix == "HP":
+            term_name = hpo.get_term_name(term_id)
+            return f"{term_name} [{term_id.value}]"
+        else:
+            return term_id.value
+
+
+class MultiPhenotypeAnalysis(typing.Generic[P], metaclass=abc.ABCMeta):
+
+    def __init__(
+        self,
+        count_statistic: CountStatistic,
+        mtc_correction: typing.Optional[str] = None,
+        mtc_alpha: float = 0.05,
+    ):
+        assert isinstance(count_statistic, CountStatistic)
+        self._count_statistic = count_statistic
+        self._mtc_correction = mtc_correction
+        self._mtc_alpha = mtc_alpha
+
+    @abc.abstractmethod
+    def compare_genotype_vs_phenotypes(
+        self,
+        cohort: typing.Iterable[Patient],
+        gt_predicate: GenotypePolyPredicate,
+        pheno_predicates: typing.Iterable[PhenotypePolyPredicate[P]],
+    ) -> MultiPhenotypeAnalysisResult[P]:
+        pass
+
+    def _compute_nominal_pvals(
+        self,
+        n_usable: typing.Iterable[int],
+        all_counts: typing.Iterable[pd.DataFrame],
+    ) -> np.ndarray:
+        pvals = []
+        for i, (usable, count) in enumerate(zip(n_usable, all_counts)):
+            if usable == 0:
+                pvals.append(np.nan)
+            else:
+                try:
+                    pval = self._count_statistic.compute_pval(count)
+                    pvals.append(pval)
+                except ValueError as ve:
+                    # TODO: add more context to the exception?
+                    raise ve
+
+        return np.array(pvals)
+
+    def _apply_mtc(
+        self,
+        pvals: typing.Sequence[float],
+    ) -> typing.Optional[typing.Sequence[float]]:
+        _, corrected_pvals, _, _ = multitest.multipletests(
+            pvals=pvals,
+            alpha=self._mtc_alpha,
+            method=self._mtc_correction,
+            is_sorted=False,
+            returnsorted=False,
+        )
+        return corrected_pvals
+
+
+class BaseMultiPhenotypeAnalysisResult(typing.Generic[P], MultiPhenotypeAnalysisResult[P]):
+
+    def __init__(
+        self,
+        phenotypes: typing.Sequence[P],
+        n_usable: typing.Sequence[int],
+        all_counts: typing.Sequence[pd.DataFrame],
+        pvals: typing.Sequence[float],
+        corrected_pvals: typing.Optional[typing.Sequence[float]],
+        gt_predicate: GenotypePolyPredicate,
+    ):
+        self._phenotypes = tuple(phenotypes)
+        self._n_usable = tuple(n_usable)
+        self._all_counts = tuple(all_counts)
+        self._pvals = tuple(pvals)
+        self._corrected_pvals = (
+            None if corrected_pvals is None else tuple(corrected_pvals)
+        )
+        assert isinstance(gt_predicate, GenotypePolyPredicate)
+        self._gt_predicate = gt_predicate
+
+    @property
+    def phenotypes(self) -> typing.Sequence[P]:
+        return self._phenotypes
+
+    @property
+    def n_usable(self) -> typing.Sequence[int]:
+        return self._n_usable
+
+    @property
+    def all_counts(self) -> typing.Sequence[pd.DataFrame]:
+        return self._all_counts
+
+    @property
+    def pvals(self) -> typing.Sequence[float]:
+        return self._pvals
+
+    @property
+    def corrected_pvals(self) -> typing.Optional[typing.Sequence[float]]:
+        return self._corrected_pvals
+
+    def __eq__(self, other):
+        return isinstance(other, BaseMultiPhenotypeAnalysisResult) \
+            and self._phenotypes == other._phenotypes \
+            and self._n_usable == other._n_usable \
+            and self._all_counts == other._all_counts \
+            and self._pvals == other._pvals \
+            and self._corrected_pvals == other._corrected_pvals\
+            and self._gt_predicate == other._gt_predicate
+
+    def __hash__(self):
+        # NOTE: if a field is added, the hash method of the subclasses must be updated as well.
+        return hash((
+            self._phenotypes, self._n_usable, self._all_counts,
+            self._pvals, self._corrected_pvals, self._gt_predicate,
+        ))
+
+
+class DiseaseAnalysis(MultiPhenotypeAnalysis[hpotk.TermId]):
+
+    def compare_genotype_vs_phenotypes(
+        self,
+        cohort: typing.Iterable[Patient],
+        gt_predicate: GenotypePolyPredicate,
+        pheno_predicates: typing.Iterable[PhenotypePolyPredicate[hpotk.TermId]],
+    ) -> MultiPhenotypeAnalysisResult[hpotk.TermId]:
+        pheno_predicates = tuple(pheno_predicates)
+        if len(pheno_predicates) == 0:
+            raise ValueError("No phenotype predicates were provided")
+
+        # 1 - Count the patients
+        n_usable, all_counts = apply_predicates_on_patients(
+            patients=cohort,
+            gt_predicate=gt_predicate,
+            pheno_predicates=pheno_predicates,
+        )
+
+        # 2 - Compute nominal p values
+        pvals = self._compute_nominal_pvals(n_usable=n_usable, all_counts=all_counts)
+
+        # 3 - Apply Multiple Testing Correction
+        if self._mtc_correction is None:
+            corrected_pvals = None
+        else:
+            corrected_pvals = self._apply_mtc(pvals=pvals)
+
+        phenotypes = tuple(p.phenotype for p in pheno_predicates)
+
+        return BaseMultiPhenotypeAnalysisResult(
+            phenotypes=phenotypes,
+            n_usable=n_usable,
+            all_counts=all_counts,
+            pvals=pvals,
+            corrected_pvals=corrected_pvals,
+            gt_predicate=gt_predicate,
+        )
+
+
+class HpoTermAnalysisResult(BaseMultiPhenotypeAnalysisResult[hpotk.TermId]):
+    """
+    `HpoTermAnalysisResult` includes the :class:`HpoTermAnalysis` results.
+
+    On top of the attributes of :class:`MultiPhenotypeAnalysisResult` parent,
+    the results include the outcome of :class:`PhenotypeMtcFilter`.
+    """
+
+    def __init__(
+        self,
+        phenotypes: typing.Sequence[hpotk.TermId],
+        n_usable: typing.Sequence[int],
+        all_counts: typing.Sequence[pd.DataFrame],
+        pvals: typing.Sequence[float],
+        corrected_pvals: typing.Optional[typing.Sequence[float]],
+        gt_predicate: GenotypePolyPredicate,
+        mtc_filter_name: str,
+        mtc_filter_results: typing.Sequence[PhenotypeMtcResult],
+        mtc_name: typing.Optional[str],
+    ):
+        super().__init__(
+            phenotypes=phenotypes,
+            n_usable=n_usable,
+            all_counts=all_counts,
+            pvals=pvals,
+            corrected_pvals=corrected_pvals,
+            gt_predicate=gt_predicate,
+        )
+        self._mtc_filter_name = mtc_filter_name
+        self._mtc_filter_results = tuple(mtc_filter_results)
+        self._mtc_name = mtc_name
+
+    @property
+    def mtc_filter_name(self) -> str:
+        """
+        Get the MTC filter name.
+        """
+        return self._mtc_filter_name
+
+    @property
+    def mtc_filter_results(self) -> typing.Sequence[PhenotypeMtcResult]:
+        """
+        Get a :class:`PhenotypeMtcResult` for each of the :attr:`phenotypes`.
+        """
+        return self._mtc_filter_results
+
+    @property
+    def mtc_name(self) -> typing.Optional[str]:
+        """
+        Get the name of the multiple testing correction (MTC) procedure (e.g. `bonferroni`, `fdr_bh`, ...)
+        or `None` if no MTC was performed.
+        """
+        return self._mtc_name
+
+    def __eq__(self, other):
+        return isinstance(other, HpoTermAnalysisResult) \
+            and BaseMultiPhenotypeAnalysisResult.__eq__(self, other) \
+            and self._mtc_filter_name == other._mtc_filter_name \
+            and self._mtc_filter_results == other._mtc_filter_results \
+            and self._mtc_name == other._mtc_name
+
+    def __hash__(self):
+        return hash((
+            self._phenotypes, self._n_usable, self._all_counts,
+            self._pvals, self._corrected_pvals, self._gt_predicate,
+            self._mtc_filter_name, self._mtc_filter_results, self._mtc_name,
+        ))
+
+
+class HpoTermAnalysis(MultiPhenotypeAnalysis[hpotk.TermId]):
+    """
+    `HpoTermAnalysis` can be applied if the individual phenotypes are represented as HPO terms.
+
+    The analysis applies the genotype and phenotype predicates, computes the nominal p values,
+    and addresses the multiple testing burden by applying the :class:`PhenotypeMtcFilter`
+    followed by the multiple testing correction `mtc_correction` method.
+
+    `PhenotypeMtcFilter` is applied even if no MTC should be applied.
+    """
+
+    def __init__(
+        self,
+        count_statistic: CountStatistic,
+        mtc_filter: PhenotypeMtcFilter,
+        mtc_correction: typing.Optional[str] = None,
+        mtc_alpha: float = 0.05,
+    ):
+        super().__init__(
+            count_statistic=count_statistic,
+            mtc_correction=mtc_correction,
+            mtc_alpha=mtc_alpha,
+        )
+        assert isinstance(mtc_filter, PhenotypeMtcFilter)
+        self._mtc_filter = mtc_filter
+
+    def compare_genotype_vs_phenotypes(
+        self,
+        cohort: typing.Iterable[Patient],
+        gt_predicate: GenotypePolyPredicate,
+        pheno_predicates: typing.Iterable[PhenotypePolyPredicate[hpotk.TermId]],
+    ) -> HpoTermAnalysisResult:
+        pheno_predicates = tuple(pheno_predicates)
+        if len(pheno_predicates) == 0:
+            raise ValueError("No phenotype predicates were provided")
+        phenotypes = tuple(p.phenotype for p in pheno_predicates)
+
+        # 1 - Count the patients
+        n_usable, all_counts = apply_predicates_on_patients(
+            cohort,
+            gt_predicate,
+            pheno_predicates,
+        )
+
+        # 2 - Apply MTC filter and select p values to MTC
+        mtc_filter_results = self._mtc_filter.filter(
+            gt_predicate=gt_predicate,
+            phenotypes=phenotypes,
+            counts=all_counts,
+        )
+        mtc_mask = np.array([r.is_passed() for r in mtc_filter_results])
+
+        # 3 - Compute nominal p values
+        pvals = np.full(shape=(len(n_usable),), fill_value=np.nan)
+        pvals[mtc_mask] = self._compute_nominal_pvals(
+            n_usable=slice_list_in_numpy_style(n_usable, mtc_mask),
+            all_counts=slice_list_in_numpy_style(all_counts, mtc_mask),
+        )
+
+        # 4 - Apply Multiple Testing Correction
+        if self._mtc_correction is None:
+            corrected_pvals = None
+        else:
+            corrected_pvals = np.full(shape=pvals.shape, fill_value=np.nan)
+            # Do not test the p values that have been filtered out.
+            corrected_pvals[mtc_mask] = self._apply_mtc(pvals=pvals[mtc_mask])
+
+        return HpoTermAnalysisResult(
+            phenotypes=phenotypes,
+            n_usable=n_usable,
+            all_counts=all_counts,
+            pvals=pvals,
+            corrected_pvals=corrected_pvals,
+            gt_predicate=gt_predicate,
+            mtc_filter_name=self._mtc_filter.filter_method_name(),
+            mtc_filter_results=mtc_filter_results,
+            mtc_name=self._mtc_correction,
+        )
+
+
+WHATEVER = typing.TypeVar('WHATEVER')
+
+
+def slice_list_in_numpy_style(
+    a: typing.Sequence[WHATEVER],
+    mask: typing.Sequence[bool],
+) -> typing.Sequence[WHATEVER]:
+    assert len(a) == len(mask)
+    return [item for item, mask_is_set in zip(a, mask) if mask_is_set]
diff --git a/src/gpsea/analysis/pcats/stats/__init__.py b/src/gpsea/analysis/pcats/stats/__init__.py
new file mode 100644
index 00000000..4d7e0543
--- /dev/null
+++ b/src/gpsea/analysis/pcats/stats/__init__.py
@@ -0,0 +1,5 @@
+from ._stats import CountStatistic, ScipyFisherExact, PythonMultiFisherExact
+
+__all__ = [
+    'CountStatistic', 'ScipyFisherExact', 'PythonMultiFisherExact',
+]
diff --git a/src/gpsea/analysis/pcats/stats/_stats.py b/src/gpsea/analysis/pcats/stats/_stats.py
new file mode 100644
index 00000000..8df07fdc
--- /dev/null
+++ b/src/gpsea/analysis/pcats/stats/_stats.py
@@ -0,0 +1,177 @@
+import abc
+import math
+from decimal import Decimal
+
+
+import numpy as np
+import pandas as pd
+
+import scipy
+
+
+class CountStatistic(metaclass=abc.ABCMeta):
+    """
+    `CountStatistic` calculates a p value for a contingency table
+    produced by a pair of discrete random variables.
+
+    The `counts` table is usually `2x2` or `2x3`.
+    """
+
+    @abc.abstractmethod
+    def compute_pval(
+        self,
+        counts: pd.DataFrame,
+    ) -> float:
+        pass
+
+
+class ScipyFisherExact(CountStatistic):
+    """
+    `ScipyFisherExact` performs Fisher Exact Test on a `2x2` contingency table.
+
+    The class is a thin wrapper around Scipy :func:`~scipy.stats.fisher_exact` function.
+    The two-sided :math:`H_1` is considered.
+    """
+
+    def compute_pval(
+        self,
+        counts: pd.DataFrame,
+    ) -> float:
+        assert counts.shape == (
+            2,
+            2,
+        ), f"Cannot run Fisher exact test on an array with {counts.shape} shape"
+        _, pval = scipy.stats.fisher_exact(counts.values, alternative="two-sided")
+        return pval
+
+
+class PythonMultiFisherExact(CountStatistic):
+    """
+    `PythonMultiFisherExact` is a Python implementation of Fisher Exact Test to compute
+    p value for a `2x3` contingency table.
+    """
+
+    def compute_pval(
+        self,
+        counts: pd.DataFrame,
+    ) -> float:
+        PythonMultiFisherExact._check_input(counts)
+        return self._fisher_exact(counts.values)
+
+    @staticmethod
+    def _check_input(a: pd.DataFrame):
+        if not isinstance(a, pd.DataFrame):
+            raise ValueError(f"Expected a pandas DataFrame but got {type(a)}")
+        if not a.shape == (2, 3):
+            raise ValueError(f"Shape of the array must be (2, 3) but got {a.shape}")
+        if np.array_equal(a.values, np.zeros_like(a)):
+            raise ValueError("Data frame is all zeros, cannot run analysis")
+
+    def _fisher_exact(
+        self,
+        table: np.ndarray,
+    ):
+        row_sum = []
+        col_sum = []
+
+        for i in range(len(table)):
+            temp = 0
+            for j in range(len(table[0])):
+                temp += table[i][j]
+            row_sum.append(temp)
+
+        for j in range(len(table[0])):
+            temp = 0
+            for i in range(len(table)):
+                temp += table[i][j]
+            col_sum.append(temp)
+
+        mat = [[0] * len(col_sum)] * len(row_sum)
+        pos = (0, 0)
+
+        p_0 = 1
+
+        for x in row_sum:
+            p_0 *= math.factorial(x)
+        for y in col_sum:
+            p_0 *= math.factorial(y)
+
+        n = 0
+        for x in row_sum:
+            n += x
+        p_0 /= Decimal(math.factorial(n))
+
+        for i in range(len(table)):
+            for j in range(len(table[0])):
+                p_0 /= Decimal(math.factorial(table[i][j]))
+
+        p = [0]
+        self._dfs(mat, pos, row_sum, col_sum, p_0, p)
+
+        return float(p[0])
+
+    def _dfs(self, mat, pos, r_sum, c_sum, p_0, p):
+
+        (xx, yy) = pos
+        (r, c) = (len(r_sum), len(c_sum))
+
+        mat_new = []
+
+        for i in range(len(mat)):
+            temp = []
+            for j in range(len(mat[0])):
+                temp.append(mat[i][j])
+            mat_new.append(temp)
+
+        if xx == -1 and yy == -1:
+            for i in range(r - 1):
+                temp = r_sum[i]
+                for j in range(c - 1):
+                    temp -= mat_new[i][j]
+                mat_new[i][c - 1] = temp
+            for j in range(c - 1):
+                temp = c_sum[j]
+                for i in range(r - 1):
+                    temp -= mat_new[i][j]
+                mat_new[r - 1][j] = temp
+            temp = r_sum[r - 1]
+            for j in range(c - 1):
+                temp -= mat_new[r - 1][j]
+            if temp < 0:
+                return
+            mat_new[r - 1][c - 1] = temp
+
+            p_1 = 1
+            for x in r_sum:
+                p_1 *= math.factorial(x)
+            for y in c_sum:
+                p_1 *= math.factorial(y)
+
+            n = 0
+            for x in r_sum:
+                n += x
+            p_1 /= Decimal(math.factorial(n))
+
+            for i in range(len(mat_new)):
+                for j in range(len(mat_new[0])):
+                    p_1 /= Decimal(math.factorial(mat_new[i][j]))
+            if p_1 <= p_0 + Decimal(0.00000001):
+                # print(mat_new)
+                # print(p_1)
+                p[0] += p_1
+        else:
+            max_1 = r_sum[xx]
+            max_2 = c_sum[yy]
+            for j in range(c):
+                max_1 -= mat_new[xx][j]
+            for i in range(r):
+                max_2 -= mat_new[i][yy]
+            for k in range(min(max_1, max_2) + 1):
+                mat_new[xx][yy] = k
+                if xx == r - 2 and yy == c - 2:
+                    pos_new = (-1, -1)
+                elif xx == r - 2:
+                    pos_new = (0, yy + 1)
+                else:
+                    pos_new = (xx + 1, yy)
+                self._dfs(mat_new, pos_new, r_sum, c_sum, p_0, p)
diff --git a/src/gpsea/analysis/pcats/stats/_test__stats.py b/src/gpsea/analysis/pcats/stats/_test__stats.py
new file mode 100644
index 00000000..aa254306
--- /dev/null
+++ b/src/gpsea/analysis/pcats/stats/_test__stats.py
@@ -0,0 +1,33 @@
+import numpy as np
+import pandas as pd
+import pytest
+
+from ._stats import PythonMultiFisherExact
+
+
+class TestPythonMultiFisherExact:
+
+    @pytest.fixture
+    def fisher_exact(self) -> PythonMultiFisherExact:
+        return PythonMultiFisherExact()
+
+    @pytest.mark.parametrize(
+        "counts, expected",
+        (
+            [[[2, 1, 0], [3, 0, 2]], 0.6428571428571429],
+            [[[5, 5, 5], [5, 5, 5]], 1],
+            [[[10, 2, 3], [1, 3, 4]], 0.03952977071835599],
+            [[[6, 0, 0], [6, 5, 8]], 0.02334274421230943],
+        ),
+    )
+    def test_compute_pval(
+        self,
+        counts,
+        expected,
+        fisher_exact: PythonMultiFisherExact,
+    ):
+        contingency_matrix = pd.DataFrame(np.array(counts, dtype=np.int64))
+
+        final_pval = fisher_exact.compute_pval(contingency_matrix)
+        assert final_pval == pytest.approx(expected)
+
diff --git a/src/gpsea/analysis/predicate/__init__.py b/src/gpsea/analysis/predicate/__init__.py
index a2e0cb54..b4f3b28b 100644
--- a/src/gpsea/analysis/predicate/__init__.py
+++ b/src/gpsea/analysis/predicate/__init__.py
@@ -1,12 +1,7 @@
-from . import genotype
-from . import phenotype
-
 from ._api import PatientCategory, PatientCategories, Categorization, C
 from ._api import PolyPredicate
-from ._api import GenotypePolyPredicate, GenotypeBooleanPredicate
 
 __all__ = [
     'PatientCategory', 'PatientCategories', 'Categorization', 'C',
     'PolyPredicate',
-    'GenotypePolyPredicate', 'GenotypeBooleanPredicate',
 ]
diff --git a/src/gpsea/analysis/predicate/_api.py b/src/gpsea/analysis/predicate/_api.py
index 2f1a62ea..8c62e191 100644
--- a/src/gpsea/analysis/predicate/_api.py
+++ b/src/gpsea/analysis/predicate/_api.py
@@ -148,11 +148,38 @@ def get_categorizations(self) -> typing.Sequence[C]:
         """
         pass
 
-    def get_categories(self) -> typing.Sequence[PatientCategory]:
+    def get_categories(self) -> typing.Iterator[PatientCategory]:
         """
-        Get a sequence with :class:`PatientCategory` instances that the predicate can produce.
+        Get an iterator with :class:`PatientCategory` instances that the predicate can produce.
         """
-        return tuple(c.category for c in self.get_categorizations())
+        return (c.category for c in self.get_categorizations())
+
+    def get_category(
+        self,
+        cat_id: int,
+    ) -> PatientCategory:
+        """
+        Get the category name for a :class:`PatientCategory.cat_id`.
+
+        :param cat_id: an `int` with the id.
+        :raises: ValueError if there is no such category was defined.
+        """
+        for ctg in self.get_categories():
+            if ctg.cat_id == cat_id:
+                return ctg.name
+        raise ValueError(f'No category for {cat_id} was found')
+
+    def get_category_name(
+        self,
+        cat_id: int,
+    ) -> str:
+        """
+        Get the category name for a :class:`PatientCategory.cat_id`.
+
+        :param cat_id: an `int` with the id.
+        :raises: ValueError if there is no such category was defined.
+        """
+        return self.get_category(cat_id).name
 
     @abc.abstractmethod
     def get_question(self) -> str:
@@ -183,35 +210,3 @@ def _check_patient(patient: Patient):
         """
         if not isinstance(patient, Patient):
             raise ValueError(f"patient must be type Patient but was type {type(patient)}")
-
-
-class GenotypePolyPredicate(PolyPredicate[Categorization], metaclass=abc.ABCMeta):
-    """
-    `GenotypePolyPredicate` constrains `PolyPredicate` to investigate the genotype aspects
-    of patients.
-    """
-    pass
-
-
-class GenotypeBooleanPredicate(GenotypePolyPredicate, metaclass=abc.ABCMeta):
-    """
-    `GenotypeBooleanPredicate` tests if a :class:`~gpsea.model.Patient` belongs to a genotype group
-     and returns a boolean binning.
-    """
-    YES = Categorization(PatientCategories.YES)
-    NO = Categorization(PatientCategories.NO)
-
-    def get_categorizations(self) -> typing.Sequence[Categorization]:
-        """
-        The predicate bins a patient into :class:`BooleanPredicate.NO` or :class:`BooleanPredicate.YES` category.
-        """
-        return GenotypeBooleanPredicate.YES, GenotypeBooleanPredicate.NO
-
-
-class RecessiveGroupingPredicate(GenotypePolyPredicate, metaclass=abc.ABCMeta):
-    BOTH = Categorization(PatientCategory(0, 'Both', 'The patient belongs in both groups.'))
-    ONE = Categorization(PatientCategory(1, 'One', 'The patient belongs in one of the two groups.'))
-    NEITHER = Categorization(PatientCategory(2, 'Neither', 'The patient does not belong in either group.'))
-
-    def get_categorizations(self) -> typing.Sequence[Categorization]:
-        return RecessiveGroupingPredicate.BOTH, RecessiveGroupingPredicate.ONE, RecessiveGroupingPredicate.NEITHER
diff --git a/src/gpsea/analysis/predicate/genotype/__init__.py b/src/gpsea/analysis/predicate/genotype/__init__.py
index feff1703..a8a2ba91 100644
--- a/src/gpsea/analysis/predicate/genotype/__init__.py
+++ b/src/gpsea/analysis/predicate/genotype/__init__.py
@@ -1,9 +1,11 @@
+from ._api import GenotypePolyPredicate
 from ._api import VariantPredicate
 from ._counter import AlleleCounter
 from ._gt_predicates import boolean_predicate, groups_predicate, recessive_predicate
 from ._variant import VariantPredicates, ProteinPredicates
 
 __all__ = [
+    'GenotypePolyPredicate',
     'boolean_predicate', 'groups_predicate', 'recessive_predicate',
     'AlleleCounter', 'VariantPredicate',
     'VariantPredicates', 'ProteinPredicates',
diff --git a/src/gpsea/analysis/predicate/genotype/_api.py b/src/gpsea/analysis/predicate/genotype/_api.py
index 5e1a201f..39cf020b 100644
--- a/src/gpsea/analysis/predicate/genotype/_api.py
+++ b/src/gpsea/analysis/predicate/genotype/_api.py
@@ -2,13 +2,31 @@
 import typing
 
 from gpsea.model import Variant
+from .._api import PolyPredicate, Categorization, PatientCategory
+
+
+class GenotypePolyPredicate(PolyPredicate[Categorization], metaclass=abc.ABCMeta):
+    """
+    `GenotypePolyPredicate` is a base class for all :class:`PolyPredicate`
+    that test the genotype axis.
+    """
+    pass
+
+
+class RecessiveGroupingPredicate(GenotypePolyPredicate, metaclass=abc.ABCMeta):
+    BOTH = Categorization(PatientCategory(0, 'Both', 'The patient belongs in both groups.'))
+    ONE = Categorization(PatientCategory(1, 'One', 'The patient belongs in one of the two groups.'))
+    NEITHER = Categorization(PatientCategory(2, 'Neither', 'The patient does not belong in either group.'))
+
+    def get_categorizations(self) -> typing.Sequence[Categorization]:
+        return RecessiveGroupingPredicate.BOTH, RecessiveGroupingPredicate.ONE, RecessiveGroupingPredicate.NEITHER
 
 
 class VariantPredicate(metaclass=abc.ABCMeta):
     """
     `VariantPredicate` tests if a variant meets a certain criterion.
 
-    The subclasses are expected to implement all abstract methods of this class 
+    The subclasses are expected to implement all abstract methods of this class
     *plus* ``__eq__`` and ``__hash__``, to support building of compound predicates.
 
     We *strongly* recommend implementing ``__str__`` and ``__repr__`` as well.
@@ -43,7 +61,7 @@ def __and__(self, other):
                 return self
     
             if isinstance(self, AllVariantPredicate) and isinstance(other, AllVariantPredicate):
-                # Merging two *all* variant predicates is equivalent 
+                # Merging two *all* variant predicates is equivalent
                 # to chaining their predicates
                 return AllVariantPredicate(*self.predicates, *other.predicates)
             else:
@@ -60,7 +78,7 @@ def __or__(self, other):
                 return self
             
             if isinstance(self, AnyVariantPredicate) and isinstance(other, AnyVariantPredicate):
-                # Merging two any variant predicates is equivalent 
+                # Merging two any variant predicates is equivalent
                 # to chaining their predicates
                 return AnyVariantPredicate(*self.predicates, *other.predicates)
             else:
@@ -71,7 +89,7 @@ def __or__(self, other):
     def __invert__(self):
         """
         Create a variant predicate that passes if the underlying predicate fails.
-        """    
+        """
         if isinstance(self, InvVariantPredicate):
             # Unwrap to prevent double negation
             return self._inner
@@ -89,7 +107,6 @@ def __init__(
         if len(predicates) == 0:
             raise ValueError('Predicates must not be empty!')
         self._predicates = tuple(predicates)
-        
 
     @property
     def predicates(self) -> typing.Sequence[VariantPredicate]:
@@ -169,4 +186,3 @@ def __str__(self) -> str:
     
     def __repr__(self) -> str:
         return f'NotVariantPredicate(inner={self._inner})'
-    
\ No newline at end of file
diff --git a/src/gpsea/analysis/predicate/genotype/_gt_predicates.py b/src/gpsea/analysis/predicate/genotype/_gt_predicates.py
index be0251d2..3f54a7e0 100644
--- a/src/gpsea/analysis/predicate/genotype/_gt_predicates.py
+++ b/src/gpsea/analysis/predicate/genotype/_gt_predicates.py
@@ -2,17 +2,19 @@
 
 from gpsea.model import Patient
 
-from .._api import Categorization
-from .._api import GenotypePolyPredicate, GenotypeBooleanPredicate, RecessiveGroupingPredicate
+from .._api import Categorization, PatientCategories
+from ._api import GenotypePolyPredicate, RecessiveGroupingPredicate
 from ._api import VariantPredicate
 from ._counter import AlleleCounter
 
 
-class AlleleCountingGenotypeBooleanPredicate(GenotypeBooleanPredicate):
+class AlleleCountingGenotypeBooleanPredicate(GenotypePolyPredicate):
     # NOT PART OF THE PUBLIC API
     """
     The predicate tests presence of at least one matching allele in the patient.
     """
+    YES = Categorization(PatientCategories.YES)
+    NO = Categorization(PatientCategories.NO)
 
     @staticmethod
     def for_variant_predicate(predicate: VariantPredicate):
@@ -22,6 +24,14 @@ def for_variant_predicate(predicate: VariantPredicate):
     def __init__(self, allele_counter: AlleleCounter):
         self._allele_counter = allele_counter
 
+    def get_categorizations(self) -> typing.Sequence[Categorization]:
+        """
+        The predicate bins a patient into
+        :attr:`AlleleCountingGenotypeBooleanPredicate.NO`
+        or :class:`AlleleCountingGenotypeBooleanPredicate.YES` category.
+        """
+        return AlleleCountingGenotypeBooleanPredicate.YES, AlleleCountingGenotypeBooleanPredicate.NO
+
     def get_question(self) -> str:
         return self._allele_counter.get_question()
 
@@ -30,16 +40,17 @@ def test(self, patient: Patient) -> typing.Optional[Categorization]:
 
         allele_count = self._allele_counter.count(patient)
         if allele_count > 0:
-            return GenotypeBooleanPredicate.YES
+            return AlleleCountingGenotypeBooleanPredicate.YES
         elif allele_count == 0:
-            return GenotypeBooleanPredicate.NO
+            return AlleleCountingGenotypeBooleanPredicate.NO
         else:
             raise ValueError(
                 f"Allele counter should return a non-negative allele count: {allele_count}"
             )
 
     def __eq__(self, value: object) -> bool:
-        return isinstance(value, AlleleCountingGenotypeBooleanPredicate) and self._allele_counter == value._allele_counter
+        return isinstance(value, AlleleCountingGenotypeBooleanPredicate) \
+            and self._allele_counter == value._allele_counter
     
     def __hash__(self) -> int:
         return hash((self._allele_counter,))
@@ -52,7 +63,7 @@ def __repr__(self) -> str:
 
 
 # TODO: write AD, AR, XLR, XLD
-def boolean_predicate(variant_predicate: VariantPredicate) -> GenotypeBooleanPredicate:
+def boolean_predicate(variant_predicate: VariantPredicate) -> GenotypePolyPredicate:
     """
     Create a genotype boolean predicate from given `variant_predicate`
     to test for presence of at least one matching allele in the patient.
diff --git a/src/gpsea/analysis/predicate/genotype/_variant.py b/src/gpsea/analysis/predicate/genotype/_variant.py
index 517f5970..aeaf6e1d 100644
--- a/src/gpsea/analysis/predicate/genotype/_variant.py
+++ b/src/gpsea/analysis/predicate/genotype/_variant.py
@@ -30,12 +30,12 @@ def all(predicates: typing.Iterable[VariantPredicate]) -> VariantPredicate:
         Build a predicate to test if variant has a functional annotation to genes *SURF1* and *SURF2*:
 
         >>> from gpsea.analysis.predicate.genotype import VariantPredicates
-        
+
         >>> genes = ('SURF1', 'SURF2',)
         >>> predicate = VariantPredicates.all(VariantPredicates.gene(g) for g in genes)
         >>> predicate.get_question()
         '(impacts SURF1 AND impacts SURF2)'
-        
+
         Args:
             predicates: an iterable of predicates to test
         """
@@ -55,11 +55,12 @@ def any(predicates: typing.Iterable[VariantPredicate]) -> VariantPredicate:
 
         **Example**
 
-        Build a predicate to test if variant leads to a missense or non-sense change on a fictive transcript `NM_123456.7`:
+        Build a predicate to test if variant leads to a missense
+        or nonsense change on a fictional transcript `NM_123456.7`:
 
         >>> from gpsea.model import VariantEffect
         >>> from gpsea.analysis.predicate.genotype import VariantPredicates
-        
+
         >>> tx_id = 'NM_123456.7'
         >>> effects = (VariantEffect.MISSENSE_VARIANT, VariantEffect.STOP_GAINED,)
         >>> predicate = VariantPredicates.any(VariantPredicates.variant_effect(e, tx_id) for e in effects)
@@ -140,7 +141,8 @@ def exon(
         Prepare a :class:`VariantPredicate` that tests if the variant overlaps with an exon of a specific transcript.
 
         Args:
-            exon: a non-negative `int` with the index of the target exon (e.g. `0` for the 1st exon, `1` for the 2nd, ...)
+            exon: a non-negative `int` with the index of the target exon
+                (e.g. `0` for the 1st exon, `1` for the 2nd, ...)
             tx_id: a `str` with the accession ID of the target transcript (e.g. `NM_123.4`)
         """
         return VariantExonPredicate(exon, tx_id)
@@ -148,10 +150,12 @@ def exon(
     @staticmethod
     def region(region: Region, tx_id: str) -> VariantPredicate:
         """
-        Prepare a :class:`VariantPredicate` that tests if the variant overlaps with a region on a protein of a specific transcript.
+        Prepare a :class:`VariantPredicate` that tests if the variant
+        overlaps with a region on a protein of a specific transcript.
 
         Args:
-            region: a :class:`Region` that gives the start and end coordinate of the region of interest on a protein strand.
+            region: a :class:`Region` that gives the start and end coordinate
+                of the region of interest on a protein strand.
         """
         return ProteinRegionPredicate(region, tx_id)
 
@@ -173,15 +177,20 @@ def is_structural_variant(
         SVs are usually defined as variant affecting more than a certain number of base pairs.
         The thresholds vary in the literature, but here we use 50bp as a default.
 
-        Any variant that affects at least `threshold` base pairs is considered an SV. 
-        Large SVs with unknown breakpoint coordinates or translocations (:class:`VariantClass.BND`) 
+        Any variant that affects at least `threshold` base pairs is considered an SV.
+        Large SVs with unknown breakpoint coordinates or translocations (:class:`VariantClass.BND`)
         are always considered as an SV.
 
         Args:
             threshold: a non-negative `int` with the number of base pairs that must be affected
         """
-        assert threshold >= 0, '`threshold` must be non-negative!'
-        return VariantPredicates.change_length('<=', -threshold) | VariantPredicates.change_length('>=', threshold) | VariantPredicates.is_large_imprecise_sv() | IS_BND
+        assert threshold >= 0, "`threshold` must be non-negative!"
+        return (
+            VariantPredicates.change_length("<=", -threshold)
+            | VariantPredicates.change_length(">=", threshold)
+            | VariantPredicates.is_large_imprecise_sv()
+            | IS_BND
+        )
 
     @staticmethod
     def structural_type(
@@ -190,7 +199,8 @@ def structural_type(
         """
         Prepare a :class:`VariantPredicate` for testing if the variant has a certain structural type.
 
-        We recommend using a descendant of `structural_variant` (`SO:0001537 <https://purl.obolibrary.org/obo/SO_0001537>`_)
+        We recommend using a descendant of `structural_variant`
+        (`SO:0001537 <https://purl.obolibrary.org/obo/SO_0001537>`_)
         as the structural type.
 
         **Example**
@@ -237,51 +247,12 @@ def ref_length(
         length: int,
     ) -> VariantPredicate:
         """
-        Prepare a :class:`VariantPredicate` for testing if the reference (REF) allele 
+        Prepare a :class:`VariantPredicate` for testing if the reference (REF) allele
         of variant is above, below, or (not) equal to certain `length`.
 
-        The length of the REF allele corresponds to the length of the genomic region affected by the variant.
-        Let's show a few examples.
-
-        >>> from gpsea.model import VariantCoordinates
-        >>> from gpsea.model.genome import GRCh38
-        >>> chr1 = GRCh38.contig_by_name("chr1")
-
-        The length of the reference allele of a missense variant is 1 
-        because the variant affects a 1-bp region spanned by the ``C`` nucleotide:
-
-        >>> missense = VariantCoordinates.from_vcf_literal(chr1, 1001, 'C', 'T')
-        >>> len(missense)
-        1
-
-        The length of a "small" deletion is the same as the length of the ref allele `str`:
-        (``'CCC'`` in the example below):
-
-        >>> deletion = VariantCoordinates.from_vcf_literal(chr1, 1001, 'CCC', 'C')
-        >>> len(deletion)
-        3
-
-        This is because the literal notation spells out the alleles. 
-        However, this simple rule does not apply in symbolic notation. 
-        Here, the REF length corresponds to the length of the allele region.
-
-        For instance, for the following structural variant
-
-        .. code::
-           
-           #CHROM   POS    ID   REF  ALT     QUAL   FILTER   INFO 
-           1        1001   .    C    <DEL>   6      PASS     SVTYPE=DEL;END=1003;SVLEN=-3
-
-        the length of the REF allele is `3`:
-
-        >>> sv_deletion = VariantCoordinates.from_vcf_symbolic(
-        ...     chr1, pos=1001, end=1003, 
-        ...     ref='C', alt='<DEL>', svlen=-3,
-        ... )
-        >>> len(sv_deletion)
-        3
+        .. seealso::
 
-        because the deletion removes 3 base pairs at the coordinates :math:`[1001, 1003]`.
+            See :ref:`length-of-the-reference-allele` for more info.
 
         **Example**
 
@@ -291,13 +262,12 @@ def ref_length(
         >>> predicate = VariantPredicates.ref_length('>', 5)
         >>> predicate.get_question()
         'ref allele length > 5'
-        
+
         Args:
             operator: a `str` with the desired test. Must be one of ``{ '<', '<=', '==', '!=', '>=', '>' }``.
             length: a non-negative `int` with the length threshold.
         """
         return RefAlleleLengthPredicate(operator, length)
-        
 
     @staticmethod
     def change_length(
@@ -310,11 +280,11 @@ def change_length(
 
         .. seealso::
 
-            See :meth:`gpsea.model.VariantCoordinates.change_length` for more info on change length.
+            See :ref:`change-length-of-an-allele` for more info.
 
         **Example**
 
-        Make a predicate for testing if the change length is less than or equal to `-10`, 
+        Make a predicate for testing if the change length is less than or equal to `-10`,
         e.g. to test if a variant is a *deletion* leading to removal of at least 10 base pairs:
 
         >>> from gpsea.analysis.predicate.genotype import VariantPredicates
@@ -333,15 +303,15 @@ def is_structural_deletion(
         threshold: int = -50,
     ) -> VariantPredicate:
         """
-        Prepare a :class:`VariantPredicate` for testing if the variant 
+        Prepare a :class:`VariantPredicate` for testing if the variant
         is a `chromosomal deletion <https://purl.obolibrary.org/obo/SO_1000029>`_ or a structural variant deletion
         that leads to removal of at least *n* base pairs (50bp by default).
-        
+
         .. note::
 
             The predicate uses :meth:`~gpsea.model.VariantCoordinates.change_length`
             to determine if the length of the variant is above or below `threshold`.
-            
+
             **IMPORTANT**: the change lengths of deletions are *negative*, since the alternate allele
             is shorter than the reference allele. See the method's documentation for more info.
 
@@ -378,9 +348,7 @@ def __init__(
         self._protein_metadata_service = protein_metadata_service
 
     def protein_feature_type(
-        self,
-        feature_type: FeatureType,
-        tx_id: str
+        self, feature_type: FeatureType, tx_id: str
     ) -> VariantPredicate:
         """
         Prepare a :class:`VariantPredicate` that tests if the variant affects a protein feature type.
@@ -392,11 +360,7 @@ def protein_feature_type(
             feature_type, tx_id, self._protein_metadata_service
         )
 
-    def protein_feature(
-        self,
-        feature_id: str,
-        tx_id: str
-    ) -> VariantPredicate:
+    def protein_feature(self, feature_id: str, tx_id: str) -> VariantPredicate:
         """
         Prepare a :class:`VariantPredicate` that tests if the variant affects a protein feature type.
 
diff --git a/src/gpsea/analysis/predicate/phenotype/__init__.py b/src/gpsea/analysis/predicate/phenotype/__init__.py
index f87a9f92..2efa1e55 100644
--- a/src/gpsea/analysis/predicate/phenotype/__init__.py
+++ b/src/gpsea/analysis/predicate/phenotype/__init__.py
@@ -1,9 +1,19 @@
-from ._pheno import PhenotypePolyPredicate, PropagatingPhenotypePredicate, CountingPhenotypeScorer
+"""
+The `gpsea.analysis.predicate.phenotype` package provides the :class:`PhenotypePolyPredicate`
+for assigning :class:`~gpsea.model.Patient` into a phenotype group.
+
+An individual can be assigned based on presence/absence of a disease diagnosis (:class:`DiseasePresencePredicate`)
+or using the phenotype features encoded into HPO terms (:class:`PropagatingPhenotypePredicate`).
+"""
+
+from ._pheno import PhenotypePolyPredicate, PropagatingPhenotypePredicate
 from ._pheno import DiseasePresencePredicate
 from ._pheno import PhenotypeCategorization, P
+from ._util import prepare_predicates_for_terms_of_interest, prepare_hpo_terms_of_interest
 
 __all__ = [
-    'PhenotypePolyPredicate', 'PropagatingPhenotypePredicate', 'CountingPhenotypeScorer',
+    'PhenotypePolyPredicate', 'PropagatingPhenotypePredicate',
     'DiseasePresencePredicate',
     'PhenotypeCategorization', 'P',
+    'prepare_predicates_for_terms_of_interest', 'prepare_hpo_terms_of_interest',
 ]
diff --git a/src/gpsea/analysis/predicate/phenotype/_pheno.py b/src/gpsea/analysis/predicate/phenotype/_pheno.py
index 4b13f766..c2646f72 100644
--- a/src/gpsea/analysis/predicate/phenotype/_pheno.py
+++ b/src/gpsea/analysis/predicate/phenotype/_pheno.py
@@ -69,9 +69,9 @@ class PhenotypePolyPredicate(
 
     @property
     @abc.abstractmethod
-    def phenotypes(self) -> typing.Sequence[P]:
+    def phenotype(self) -> P:
         """
-        Get the phenotype entities of interest.
+        Get the phenotype entity of interest.
         """
         pass
 
@@ -97,7 +97,7 @@ def __init__(
         self._query = hpotk.util.validate_instance(
             query, hpotk.TermId, "phenotypic_feature"
         )
-        self._query_label = self._hpo.get_term(query)
+        self._query_label = self._hpo.get_term_name(query)
         assert self._query_label is not None, f"Query {query} is in HPO"
         self._missing_implies_phenotype_excluded = hpotk.util.validate_instance(
             missing_implies_phenotype_excluded,
@@ -117,9 +117,8 @@ def get_question(self) -> str:
         return f"Is {self._query_label} present in the patient?"
 
     @property
-    def phenotypes(self) -> typing.Sequence[hpotk.TermId]:
-        # We usually test just a single HPO term, so we return a tuple with a single member.
-        return (self._query,)
+    def phenotype(self) -> hpotk.TermId:
+        return self._query
 
     def get_categorizations(
         self,
@@ -205,9 +204,8 @@ def get_question(self) -> str:
         return f"Was {self._query} diagnosed in the patient"
 
     @property
-    def phenotypes(self) -> typing.Sequence[hpotk.TermId]:
-        # We usually test just a single Disease, so we return a tuple with a single member.
-        return (self._query,)
+    def phenotype(self) -> hpotk.TermId:
+        return self._query
 
     def get_categorizations(
         self,
@@ -240,131 +238,3 @@ def test(
 
     def __repr__(self):
         return f"DiseasePresencePredicate(query={self._query})"
-
-
-class CountingPhenotypeScorer:
-    """
-    `CountingPhenotypeScorer` assigns the patient with a phenotype score
-    that is equivalent to the count of present phenotypes that are either
-    an exact match to the `query` terms or their descendants.
-    For instance, we may want to count whether an individual has brain, liver, kidney, and skin abormalities.
-    In the case, the query would include the corresponding terms (e.g., Abnormal brain morphology HP:0012443).
-    An individual can then have between 0 and 4 phenotype group abnormalities.
-    This predicate is intended to be used with the Mann Whitney U test.
-
-    Example
-    ^^^^^^^
-    We first need to load HPO using HPO toolkit:
-    
-        >>> import hpotk
-        >>> store = hpotk.configure_ontology_store()
-        >>> hpo = store.load_minimal_hpo(release='v2023-10-09')
-        
-    Now we can create `CountingPhenotypeScorer` to test for presence of brain, liver, kidney and skin abnormalities:
-
-        >>> from gpsea.analysis.predicate.phenotype import CountingPhenotypeScorer
-        >>> phenotype_groups = (
-        ...     "HP:0012443",  # Abnormal brain morphology
-        ...     "HP:0410042",  # Abnormal liver morphology
-        ...     "HP:0012210",  # Abnormal renal morphology
-        ...     "HP:0011121",  # Abnormal skin morphology
-        ... )
-        >>> scorer = CountingPhenotypeScorer.from_query_curies(
-        ...     hpo=hpo,
-        ...     query=phenotype_groups,
-        ... )
-
-    """
-
-    @staticmethod
-    def from_query_curies(
-        hpo: hpotk.MinimalOntology,
-        query: typing.Iterable[typing.Union[str, hpotk.TermId]],
-    ):
-        """
-        Create a scorer to test for the number of phenotype terms that fall into the phenotype groups.
-
-        :param hpo: HPO as represented by :class:`~hpotk.ontology.MinimalOntology` of HPO toolkit.
-        :param query: an iterable of the top-level terms, either represented as CURIEs (`str`)
-          or as term IDs.
-        """
-        query_term_ids = set()
-        for q in query:
-            # First check if the query items are Term IDs or curies.
-            if isinstance(q, str):
-                q = hpotk.TermId.from_curie(q)
-            elif isinstance(q, hpotk.TermId):
-                pass
-            else:
-                raise ValueError(
-                    f"query argument must be iterable of hpotk TermId's or strings but we found {type(q)}"
-                )
-
-            # Now chack that the term IDs are HPO term IDs.
-            if q not in hpo:
-                raise ValueError(f"The query {q} was not found in the HPO")
-            query_term_ids.add(q)
-
-        if len(query_term_ids) == 0:
-            raise ValueError("`query` must not be empty")
-
-        # the query terms must not include a term and its ancestor
-        for q in query_term_ids:
-            for anc in hpo.graph.get_ancestors(q):
-                if anc in query_term_ids:
-                    raise ValueError(
-                        f"Both {q} and its ancestor term {anc} were found in the query, "
-                        + "but query terms must not include a term and its ancestor"
-                    )
-
-        return CountingPhenotypeScorer(
-            hpo=hpo,
-            query=query_term_ids,
-        )
-
-    def __init__(
-        self,
-        hpo: hpotk.MinimalOntology,
-        query: typing.Iterable[hpotk.TermId],
-    ):
-        self._hpo = hpo
-        self._query = set(query)
-
-    def get_question(self) -> str:
-        return "How many of the query HPO terms (or their descendants) does the individual display"
-
-    def score(
-        self,
-        patient: Patient,
-    ) -> float:
-        """
-        Get the count (number) of terms in the query set
-        that have matching terms (exact matches or descendants) in the patient.
-        Do not double count if the patient has two terms
-        (e.g., two different descendants) of one of the query terms.
-        """
-        count = 0
-        for q in self._query:
-            for pf in patient.present_phenotypes():
-                hpo_id = pf.identifier
-                if hpo_id == q or any(
-                    anc == q for anc in self._hpo.graph.get_ancestors(hpo_id)
-                ):
-                    count += 1
-                    # We break the inner loop to continue the outer.
-                    break
-
-        # A sanity check - we cannot produce more counts than there are categories!
-        assert 0 <= count <= len(self._query)
-
-        return count
-
-    def __call__(
-        self,
-        *args: typing.Any,
-        **kwds: typing.Any,
-    ) -> float:
-        # TODO: move to `PhenotypeScorer` API.
-        assert len(args) == 1 and isinstance(args[0], Patient), 'The first argument must be an instance of `Patient`'
-        assert len(kwds) == 0, 'We do not take any key-word arguments'
-        return self.score(args[0])
diff --git a/src/gpsea/analysis/predicate/phenotype/_util.py b/src/gpsea/analysis/predicate/phenotype/_util.py
new file mode 100644
index 00000000..ac73dd02
--- /dev/null
+++ b/src/gpsea/analysis/predicate/phenotype/_util.py
@@ -0,0 +1,88 @@
+import typing
+
+from collections import Counter
+
+import hpotk
+
+from ._pheno import PhenotypePolyPredicate, PropagatingPhenotypePredicate
+
+from gpsea.model import Patient
+
+
+def prepare_predicates_for_terms_of_interest(
+    cohort: typing.Iterable[Patient],
+    hpo: hpotk.graph.GraphAware,
+    missing_implies_excluded: bool = False,
+    min_n_of_patients_with_term: int = 2,
+) -> typing.Sequence[PhenotypePolyPredicate[hpotk.TermId]]:
+    """
+    A convenience method for creating a battery of :class:`PropagatingPhenotypePredicate` predicates
+    for testing all phenotypes of interest.
+
+    :param cohort: a cohort of individuals to investigate.
+    :param hpo: an entity with an HPO graph (e.g. :class:`~hpotk.MinimalOntology`).
+    :param missing_implies_excluded: `True` if absence of an annotation should be counted as its explicit exclusion.
+    :param min_n_of_patients_with_term: the minimum number of patients that must feature an HPO term
+        (either directly or indirectly) for the term to be included in the analysis.
+    """
+    return tuple(
+        PropagatingPhenotypePredicate(
+            hpo=hpo,
+            query=term,
+            missing_implies_phenotype_excluded=missing_implies_excluded,
+        ) for term in prepare_hpo_terms_of_interest(
+            cohort, hpo, min_n_of_patients_with_term,
+        )
+    )
+
+
+def prepare_hpo_terms_of_interest(
+    cohort: typing.Iterable[Patient],
+    hpo: hpotk.graph.GraphAware,
+    min_n_of_patients_with_term: int = 2,
+) -> typing.Sequence[hpotk.TermId]:
+    """
+    Prepare a collection of HPO terms to test.
+
+    This includes the direct HPO patient annotations
+    as well as the ancestors of the present terms and the descendants of the excluded terms.
+
+    :param cohort: a cohort of individuals to investigate.
+    :param hpo: an entity with an HPO graph (e.g. :class:`~hpotk.MinimalOntology`).
+    :param min_n_of_patients_with_term: the minimum number of patients that must feature an HPO term
+        (either directly or indirectly) for the term to be included in the analysis.
+    """
+    present_count = Counter()
+    excluded_count = Counter()
+
+    for patient in cohort:
+        for pf in patient.phenotypes:
+            if pf.is_present:
+                # A present phenotypic feature must be counted in.
+                present_count[pf.identifier] += 1
+                # and it also implies presence of its ancestors.
+                for anc in hpo.graph.get_ancestors(pf):
+                    present_count[anc] += 1
+            else:
+                # An excluded phenotypic feature
+                excluded_count[pf.identifier] += 1
+                for desc in hpo.graph.get_descendants(pf):
+                    # implies exclusion of its descendants.
+                    excluded_count[desc] += 1
+
+    total_count = Counter()
+    for term_id, count in present_count.items():
+        total_count[term_id] += count
+
+    for term_id, count in excluded_count.items():
+        total_count[term_id] += count
+
+    final_hpo = []
+    for term_id in present_count:
+        # Keep the term if it is mentioned at least *n* times (incl. being excluded)
+        # in the cohort
+        n_all = total_count[term_id]
+        if n_all >= min_n_of_patients_with_term:
+            final_hpo.append(term_id)
+
+    return tuple(final_hpo)
diff --git a/src/gpsea/analysis/pscore/__init__.py b/src/gpsea/analysis/pscore/__init__.py
new file mode 100644
index 00000000..d03f6fd9
--- /dev/null
+++ b/src/gpsea/analysis/pscore/__init__.py
@@ -0,0 +1,7 @@
+from ._api import PhenotypeScorer, PhenotypeScoreAnalysis, PhenotypeScoreAnalysisResult
+from ._impl import CountingPhenotypeScorer
+
+__all__ = [
+    'PhenotypeScorer', 'PhenotypeScoreAnalysis', 'PhenotypeScoreAnalysisResult',
+    'CountingPhenotypeScorer',
+]
diff --git a/src/gpsea/analysis/pscore/_api.py b/src/gpsea/analysis/pscore/_api.py
new file mode 100644
index 00000000..a2899734
--- /dev/null
+++ b/src/gpsea/analysis/pscore/_api.py
@@ -0,0 +1,162 @@
+import abc
+import typing
+
+import pandas as pd
+
+from gpsea.model import Patient
+from ..predicate.genotype import GenotypePolyPredicate
+from .stats import PhenotypeScoreStatistic
+
+
+class PhenotypeScorer(metaclass=abc.ABCMeta):
+    """
+    `PhenotypeScorer` assigns the patient with a phenotype score.
+    """
+
+    def score(self, patient: Patient) -> float:
+        """
+        Compute the score for the `patient`.
+        """
+        pass
+
+
+class PhenotypeScoreAnalysisResult:
+    """
+    `PhenotypeScoreAnalysisResult` is a container for :class:`PhenotypeScoreAnalysis` results.
+    """
+
+    def __init__(
+        self,
+        genotype_phenotype_scores: pd.DataFrame,
+        pval: float,
+    ):
+        self._genotype_phenotype_scores = genotype_phenotype_scores
+        self._pval = float(pval)
+
+    @property
+    def genotype_phenotype_scores(self) -> pd.DataFrame:
+        """
+        Get the DataFrame with the genotype group and the phenotype score for each patient.
+
+        The DataFrame has the following structure:
+
+        ==========  ========  =========
+        patient_id  genotype  phenotype
+        ==========  ========  =========
+        patient_1   0         1
+        patient_2   0         3
+        patient_3   None      2
+        patient_4   1         2
+        ...         ...       ...
+        ==========  ========  =========
+
+        The DataFrame index includes the patient IDs, and then there are 2 columns
+        with the `genotype` group id (:attr:`~gpsea.analysis.predicate.PatientCategory.cat_id`)
+        and the `phenotype` score. A `genotype` value may be missing if the patient
+        cannot be assigned into any genotype category.
+        """
+        return self._genotype_phenotype_scores
+
+    @property
+    def pval(self) -> float:
+        """
+        Get the p value of the test.
+        """
+        return self._pval
+
+    def plot_boxplots(
+        self,
+        gt_predicate: GenotypePolyPredicate,
+        ax,
+        colors=["darksalmon", "honeydew"],
+    ):
+        """
+        Draw box plots with distributions of phenotype scores for genotype groups
+        """
+        # skip the patients with unassigned genotype group
+        not_na_gts = self._genotype_phenotype_scores["genotype"].notna()
+        data = self._genotype_phenotype_scores.loc[not_na_gts]
+        actual = set(data["genotype"].unique())
+        expected = gt_predicate.get_categorizations()
+        x = [
+            data.loc[data["genotype"] == c.category.cat_id, "phenotype"].to_list()
+            for c in gt_predicate.get_categorizations()
+        ]
+        
+        gt_cat_names = [
+            c.category.name for c in gt_predicate.get_categorizations()
+        ]
+        bplot = ax.boxplot(
+            x=x,
+            patch_artist=True,
+            tick_labels=gt_cat_names,
+        )
+
+        for patch, color in zip(bplot["boxes"], colors):
+            patch.set_facecolor(color)
+
+
+class PhenotypeScoreAnalysis:
+    """
+    `PhenotypeScoreAnalysis` tests the association between two or more genotype groups
+    and a phenotype score.
+
+    The genotype groups are created by a :class:`~gpsea.analysis.predicate.genotype.GenotypePolyPredicate`
+    and the phenotype score is computed with :class:`~gpsea.analysis.pscore.PhenotypeScorer`.
+
+    The association is tested with a :class:`~gpsea.analysis.pscore.PhenotypeScoreStatistic`
+    and the results are reported as a :class:`PhenotypeScoreAnalysisResult`.
+    """
+
+    def __init__(
+        self,
+        score_statistic: PhenotypeScoreStatistic,
+    ):
+        assert isinstance(score_statistic, PhenotypeScoreStatistic)
+        self._statistic = score_statistic
+
+    def compare_genotype_vs_phenotype_score(
+        self,
+        cohort: typing.Iterable[Patient],
+        gt_predicate: GenotypePolyPredicate,
+        pheno_scorer: PhenotypeScorer,
+    ) -> PhenotypeScoreAnalysisResult:
+        """
+        Compute the association between genotype groups and phenotype score.
+
+        :param cohort: the cohort to analyze.
+        :param gt_predicate: a predicate for assigning an individual into a genotype group.
+        :param pheno_scorer: the scorer to compute phenotype score.
+        """
+        assert (
+            gt_predicate.n_categorizations() == 2
+        ), "We only support 2 genotype categories at this point"
+
+        idx = pd.Index((patient.patient_id for patient in cohort), name="patient_id")
+        data = pd.DataFrame(
+            None,
+            index=idx,
+            columns=["genotype", "phenotype"],
+        )
+
+        # Apply the predicates on the patients
+        for patient in cohort:
+            gt_cat = gt_predicate.test(patient)
+            if gt_cat is None:
+                data.loc[patient.patient_id, "genotype"] = None
+            else:
+                data.loc[patient.patient_id, "genotype"] = gt_cat.category.cat_id
+            
+            data.loc[patient.patient_id, "phenotype"] = pheno_scorer.score(patient)
+
+        # Sort by PatientCategory.cat_id and unpack.
+        # For now, we only allow to have up to 2 groups.
+        x_key, y_key = sorted(data["genotype"].dropna().unique())
+        x = data.loc[data["genotype"] == x_key, "phenotype"].to_numpy(dtype=float)
+        y = data.loc[data["genotype"] == y_key, "phenotype"].to_numpy(dtype=float)
+        pval = self._statistic.compute_pval(scores=(x, y))
+
+        return PhenotypeScoreAnalysisResult(
+            genotype_phenotype_scores=data,
+            pval=pval,
+        )
diff --git a/src/gpsea/analysis/pscore/_impl.py b/src/gpsea/analysis/pscore/_impl.py
new file mode 100644
index 00000000..b2ce8bd2
--- /dev/null
+++ b/src/gpsea/analysis/pscore/_impl.py
@@ -0,0 +1,135 @@
+import typing
+
+import hpotk
+
+from gpsea.model import Patient
+from ._api import PhenotypeScorer
+
+
+class CountingPhenotypeScorer(PhenotypeScorer):
+    """
+    `CountingPhenotypeScorer` assigns the patient with a phenotype score
+    that is equivalent to the count of present phenotypes that are either
+    an exact match to the `query` terms or their descendants.
+
+    For instance, we may want to count whether an individual has brain, liver, kidney, and skin abormalities.
+    In the case, the query would include the corresponding terms (e.g., Abnormal brain morphology HP:0012443).
+    An individual can then have between 0 and 4 phenotype group abnormalities.
+    This predicate is intended to be used with the Mann Whitney U test.
+
+    Example
+    ^^^^^^^
+    We first need to load HPO using HPO toolkit:
+
+        >>> import hpotk
+        >>> store = hpotk.configure_ontology_store()
+        >>> hpo = store.load_minimal_hpo(release='v2024-07-01')
+
+    Now we can create `CountingPhenotypeScorer` to test for presence of brain, liver, kidney and skin abnormalities:
+
+        >>> from gpsea.analysis.pscore import CountingPhenotypeScorer
+        >>> phenotype_groups = (
+        ...     "HP:0012443",  # Abnormal brain morphology
+        ...     "HP:0410042",  # Abnormal liver morphology
+        ...     "HP:0012210",  # Abnormal renal morphology
+        ...     "HP:0011121",  # Abnormal skin morphology
+        ... )
+        >>> scorer = CountingPhenotypeScorer.from_query_curies(
+        ...     hpo=hpo,
+        ...     query=phenotype_groups,
+        ... )
+
+    """
+
+    @staticmethod
+    def from_query_curies(
+        hpo: hpotk.MinimalOntology,
+        query: typing.Iterable[typing.Union[str, hpotk.TermId]],
+    ):
+        """
+        Create a scorer to test for the number of phenotype terms that fall into the phenotype groups.
+
+        :param hpo: HPO as represented by :class:`~hpotk.ontology.MinimalOntology` of HPO toolkit.
+        :param query: an iterable of the top-level terms, either represented as CURIEs (`str`)
+          or as term IDs.
+        """
+        query_term_ids = set()
+        for q in query:
+            # First check if the query items are Term IDs or curies.
+            if isinstance(q, str):
+                q = hpotk.TermId.from_curie(q)
+            elif isinstance(q, hpotk.TermId):
+                pass
+            else:
+                raise ValueError(
+                    f"query argument must be iterable of hpotk TermId's or strings but we found {type(q)}"
+                )
+
+            # Now chack that the term IDs are HPO term IDs.
+            if q not in hpo:
+                raise ValueError(f"The query {q} was not found in the HPO")
+            query_term_ids.add(q)
+
+        if len(query_term_ids) == 0:
+            raise ValueError("`query` must not be empty")
+
+        # the query terms must not include a term and its ancestor
+        for q in query_term_ids:
+            for anc in hpo.graph.get_ancestors(q):
+                if anc in query_term_ids:
+                    raise ValueError(
+                        f"Both {q} and its ancestor term {anc} were found in the query, "
+                        + "but query terms must not include a term and its ancestor"
+                    )
+
+        return CountingPhenotypeScorer(
+            hpo=hpo,
+            query=query_term_ids,
+        )
+
+    def __init__(
+        self,
+        hpo: hpotk.MinimalOntology,
+        query: typing.Iterable[hpotk.TermId],
+    ):
+        self._hpo = hpo
+        self._query = set(query)
+
+    def get_question(self) -> str:
+        return "How many of the query HPO terms (or their descendants) does the individual display"
+
+    def score(
+        self,
+        patient: Patient,
+    ) -> float:
+        """
+        Get the count (number) of terms in the query set
+        that have matching terms (exact matches or descendants) in the patient.
+        Do not double count if the patient has two terms
+        (e.g., two different descendants) of one of the query terms.
+        """
+        count = 0
+        for q in self._query:
+            for pf in patient.present_phenotypes():
+                hpo_id = pf.identifier
+                if hpo_id == q or any(
+                        anc == q for anc in self._hpo.graph.get_ancestors(hpo_id)
+                ):
+                    count += 1
+                    # We break the inner loop to continue the outer.
+                    break
+
+        # A sanity check - we cannot produce more counts than there are categories!
+        assert 0 <= count <= len(self._query)
+
+        return count
+
+    # def __call__(
+    #         self,
+    #         *args: typing.Any,
+    #         **kwds: typing.Any,
+    # ) -> float:
+    #     # TODO: move to `PhenotypeScorer` API.
+    #     assert len(args) == 1 and isinstance(args[0], Patient), 'The first argument must be an instance of `Patient`'
+    #     assert len(kwds) == 0, 'We do not take any key-word arguments'
+    #     return self.score(args[0])
diff --git a/src/gpsea/analysis/pscore/stats/__init__.py b/src/gpsea/analysis/pscore/stats/__init__.py
new file mode 100644
index 00000000..e46bfb5e
--- /dev/null
+++ b/src/gpsea/analysis/pscore/stats/__init__.py
@@ -0,0 +1,7 @@
+from ._stats import PhenotypeScoreStatistic
+from ._stats import MannWhitneyStatistic
+
+__all__ = [
+    'PhenotypeScoreStatistic',
+    'MannWhitneyStatistic',
+]
diff --git a/src/gpsea/analysis/pscore/stats/_stats.py b/src/gpsea/analysis/pscore/stats/_stats.py
new file mode 100644
index 00000000..01cfa4d9
--- /dev/null
+++ b/src/gpsea/analysis/pscore/stats/_stats.py
@@ -0,0 +1,44 @@
+import abc
+import typing
+
+from scipy.stats import mannwhitneyu
+
+
+class PhenotypeScoreStatistic(metaclass=abc.ABCMeta):
+    """
+    `PhenotypeScoreStatistic` calculates a p value
+    for 2 or more phenotype score groups
+    computed by a :class:`~gpsea.analysis.pscore.PhenotypeScorer`.
+    """
+
+    @abc.abstractmethod
+    def compute_pval(
+        self,
+        scores: typing.Collection[typing.Sequence[float]],
+    ) -> float:
+        pass
+
+
+class MannWhitneyStatistic(PhenotypeScoreStatistic):
+    """
+    `MannWhitneyStatistic` is a wrapper around SciPy's
+    :func:`~scipy.stats.mannwhitneyu` function to apply
+    Mann-Whitney U rank test on 2 phenotype scores.
+
+    See :ref:`phenotype-score-stats` for an example usage.
+    """
+    
+    def compute_pval(
+        self,
+        scores: typing.Collection[typing.Sequence[float]],
+    ) -> float:
+        assert len(scores) == 2, 'Mann-Whitney U rank test only supports 2 categories at this time'
+        
+        x, y = scores
+        _, pval = mannwhitneyu(
+            x=x,
+            y=y,
+            alternative='two-sided',
+        )
+
+        return pval
diff --git a/src/gpsea/model/_cohort.py b/src/gpsea/model/_cohort.py
index 270ebc03..3209748e 100644
--- a/src/gpsea/model/_cohort.py
+++ b/src/gpsea/model/_cohort.py
@@ -107,7 +107,7 @@ def __hash__(self) -> int:
         return hash((self._labels, self._variants, self._phenotypes, self._diseases))
 
 
-class Cohort(typing.Sized):
+class Cohort(typing.Sized, typing.Iterable[Patient]):
 
     @staticmethod
     def from_patients(
@@ -141,7 +141,7 @@ def __init__(
         members: typing.Iterable[Patient],
         excluded_member_count: int,
     ):
-        self._patient_set = frozenset(members)
+        self._members = tuple(set(members))
         self._excluded_count = excluded_member_count
 
     @property
@@ -149,25 +149,25 @@ def all_patients(self) -> typing.Collection[Patient]:
         """
         Get a collection of all patients in the cohort.
         """
-        return self._patient_set
+        return self._members
 
     def all_phenotypes(self) -> typing.Set[Phenotype]:
         """
         Get a set of all phenotypes (observed or excluded) in the cohort members.
         """
-        return set(itertools.chain(phenotype for patient in self._patient_set for phenotype in patient.phenotypes))
+        return set(itertools.chain(phenotype for patient in self._members for phenotype in patient.phenotypes))
 
     def all_diseases(self) -> typing.Set[Disease]:
         """
         Get a set of all diseases (observed or excluded) in the cohort members.
         """
-        return set(itertools.chain(disease for patient in self._patient_set for disease in patient.diseases))
+        return set(itertools.chain(disease for patient in self._members for disease in patient.diseases))
 
     def all_variants(self) -> typing.Set[Variant]:
         """
         Get a set of all variants observed in the cohort members.
         """
-        return set(itertools.chain(variant for patient in self._patient_set for variant in patient.variants))
+        return set(itertools.chain(variant for patient in self._members for variant in patient.variants))
 
     @property
     def all_transcript_ids(self) -> typing.Set[str]:
@@ -181,13 +181,13 @@ def total_patient_count(self):
         """
         Get the total number of cohort members.
         """
-        return len(self._patient_set)
+        return len(self._members)
 
     def get_patient_ids(self) -> typing.Set[str]:
         """
         Get a set of the patient IDs.
         """
-        return set(pat.patient_id for pat in self._patient_set)
+        return set(pat.patient_id for pat in self._members)
 
     def list_present_phenotypes(
         self,
@@ -205,7 +205,7 @@ def list_present_phenotypes(
                 number of patients with that phenotype)
         """
         counter = Counter()
-        for patient in self._patient_set:
+        for patient in self._members:
             counter.update(p.identifier.value for p in patient.phenotypes if p.is_present)
         return counter.most_common(top)
 
@@ -214,7 +214,7 @@ def list_all_diseases(
             top=None,
     ) -> typing.Sequence[typing.Tuple[hpotk.TermId, int]]:
         counter = Counter()
-        for patient in self._patient_set:
+        for patient in self._members:
             counter.update(d.identifier for d in patient.diseases)
         return counter.most_common(top)
 
@@ -230,7 +230,7 @@ def list_all_variants(
             list: A sequence of tuples, formatted (variant key, number of patients with that variant)
         """
         counter = Counter()
-        for patient in self._patient_set:
+        for patient in self._members:
             counter.update(variant.variant_info.variant_key for variant in patient.variants)
         return counter.most_common(top)
 
@@ -246,7 +246,7 @@ def list_all_proteins(
             list: A list of tuples, formatted (protein ID string, the count of variants that affect the protein)
         """
         counter = Counter()
-        for patient in self._patient_set:
+        for patient in self._members:
             counter.update(txa.protein_id for variant in patient.variants for txa in variant.tx_annotations)
         return counter.most_common(top)
 
@@ -287,13 +287,16 @@ def get_variant_by_key(self, variant_key) -> Variant:
             raise ValueError(f"Variant key {variant_key} not found in cohort.")
 
     def __eq__(self, other):
-        return isinstance(other, Cohort) and self._patient_set == other._patient_set
+        return isinstance(other, Cohort) and self._members == other._members
+
+    def __iter__(self) -> typing.Iterator[Patient]:
+        return iter(self._members)
 
     def __len__(self) -> int:
-        return len(self._patient_set)
+        return len(self._members)
 
     def __repr__(self):
-        return f'Cohort(members={self._patient_set}, excluded_count={self._excluded_count})'
+        return f'Cohort(members={self._members}, excluded_count={self._excluded_count})'
 
     def __str__(self):
         return repr(self)
diff --git a/src/gpsea/model/_gt.py b/src/gpsea/model/_gt.py
index 354d4b23..1940adbb 100644
--- a/src/gpsea/model/_gt.py
+++ b/src/gpsea/model/_gt.py
@@ -41,9 +41,11 @@ class Genotypes(typing.Sized, typing.Iterable):
     >>> b = SampleLabels('B')
 
     We can use one of the static methods to create an instance. Either a single genotype:
+    
     >>> gt = Genotypes.single(a, Genotype.HETEROZYGOUS)
 
-    Or genotypes of several samples:
+    or genotypes of several samples:
+
     >>> gts = Genotypes.from_mapping({a: Genotype.HETEROZYGOUS, b: Genotype.HOMOZYGOUS_ALTERNATE})
 
     There are 2 genotypes in the container:
diff --git a/src/gpsea/model/_variant.py b/src/gpsea/model/_variant.py
index e73e818e..ebc8ae07 100644
--- a/src/gpsea/model/_variant.py
+++ b/src/gpsea/model/_variant.py
@@ -439,8 +439,7 @@ def change_length(self) -> int:
         """
         Get the change of length between the `ref` and `alt` alleles due to the variant presence.
 
-        SNVs lead to change length of zero, deletions and insertions/duplications lead to negative
-        and positive change lengths, respectively.
+        See :ref:`change-length-of-an-allele` for more info.
         """
         return self._change_length
 
@@ -869,9 +868,8 @@ def __init__(
     @property
     def variant_info(self) -> VariantInfo:
         """
-        Returns:
-            VariantInfo: A representation of the variant data for sequence and symbolic variants,
-                as well as for large imprecise SVs.
+        Get the representation of the variant data for sequence and symbolic variants, 
+        as well as for large imprecise SVs.
         """
         return self._variant_info
 
diff --git a/src/gpsea/preprocessing/_config.py b/src/gpsea/preprocessing/_config.py
index 91fc7856..67c14eab 100644
--- a/src/gpsea/preprocessing/_config.py
+++ b/src/gpsea/preprocessing/_config.py
@@ -359,7 +359,7 @@ def load_phenopackets(
     """
     Map phenopacket JSON file into patient, validate the patient data, and assemble the patients into a cohort.
 
-    :param pp_directory: path to a folder with phenopacket JSON files. An error is raised if the path does not point to
+    :param phenopackets: path to a folder with phenopacket JSON files. An error is raised if the path does not point to
       a directory with at least one phenopacket.
     :param cohort_creator: cohort creator for turning a sequence of phenopacket
       into a :class:`~gpsea.model.Cohort`.
diff --git a/src/gpsea/view/_stats.py b/src/gpsea/view/_stats.py
index a7bdb1af..b232ad15 100644
--- a/src/gpsea/view/_stats.py
+++ b/src/gpsea/view/_stats.py
@@ -1,7 +1,10 @@
 import typing
 
+from collections import Counter
+
 from jinja2 import Environment, PackageLoader
-from gpsea.analysis import HpoMtcReport
+
+from gpsea.analysis.pcats import HpoTermAnalysisResult
 
 
 class MtcStatsViewer:
@@ -16,45 +19,47 @@ def __init__(self):
 
     def process(
         self,
-        hpo_mtc_report: HpoMtcReport,
+        result: HpoTermAnalysisResult,
     ) -> str:
         """
-        Create an HTML that should be shown with `display(HTML(..))` of the IPython package.
+        Create an HTML to present MTC part of the :class:`HpoTermAnalysisResult`.
+
+        Use the `display(HTML(..))` functions of the IPython package.
 
         Args:
-            hpo_mtc_report (HpoMtcReport): summary of heuristic term filtering procedure
+            result (HpoTermAnalysisResult): the result to show
 
         Returns:
             str: an HTML string with parameterized template for rendering or writing into a standalone HTML file.
         """
-        context = self._prepare_context(hpo_mtc_report)
+        assert isinstance(result, HpoTermAnalysisResult)
+        context = self._prepare_context(result)
         return self._cohort_template.render(context)
 
     @staticmethod
     def _prepare_context(
-        hpo_mtc_report: HpoMtcReport,
+        report: HpoTermAnalysisResult,
     ) -> typing.Mapping[str, typing.Any]:
-        results_map = hpo_mtc_report.skipped_terms_dict
-        if not isinstance(results_map, dict):
-            raise ValueError(
-                "hpo_mtc_report.skipped_terms_dict must be dictionary "
-                f"but was {type(hpo_mtc_report.skipped_terms_dict)}"
-            )
+        counts = Counter()
+        for result in report.mtc_filter_results:
+            if result.is_filtered_out():
+                counts[result.reason] += 1
 
         n_skipped = 0
         reason_to_count = list()
-        for reason, count in sorted(results_map.items(), key=lambda item: item[1], reverse=True):
+        for reason, count in sorted(counts.items(), key=lambda item: item[1], reverse=True):
             reason_to_count.append({"reason": reason, "count": count})
             n_skipped += count
 
-        n_tested = hpo_mtc_report.n_terms_before_filtering - n_skipped
+        n_all = len(report.phenotypes)
+        n_tested = n_all - n_skipped
 
         # The following dictionary is used by the Jinja2 HTML template
         return {
-            "mtc_name": hpo_mtc_report.mtc_method,
-            "hpo_mtc_filter_name": hpo_mtc_report.filter_method,
+            "mtc_name": report.mtc_name,
+            "hpo_mtc_filter_name": report.mtc_filter_name,
             "skipped_hpo_count": n_skipped,
             "tested_hpo_count": n_tested,
-            "total_hpo_count": hpo_mtc_report.n_terms_before_filtering,
+            "total_hpo_count": n_all,
             "reason_to_count": reason_to_count,
         }
diff --git a/tests/analysis/pcats/__init__.py b/tests/analysis/pcats/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/analysis/pcats/test_disease.py b/tests/analysis/pcats/test_disease.py
new file mode 100644
index 00000000..48acece3
--- /dev/null
+++ b/tests/analysis/pcats/test_disease.py
@@ -0,0 +1,50 @@
+import hpotk
+import pytest
+
+from gpsea.model import Cohort
+
+from gpsea.analysis.pcats import DiseaseAnalysis
+from gpsea.analysis.pcats.stats import CountStatistic, ScipyFisherExact
+from gpsea.analysis.predicate.genotype import GenotypePolyPredicate
+from gpsea.analysis.predicate.phenotype import DiseasePresencePredicate
+
+
+class TestDiseaseAnalysis:
+
+    @pytest.fixture(scope='class')
+    def count_statistic(self) -> CountStatistic:
+        return ScipyFisherExact()
+
+    @pytest.fixture(scope='class')
+    def suox_disease(self) -> DiseasePresencePredicate:
+        sulfite_oxidase_deficiency = hpotk.TermId.from_curie('OMIM:272300')
+        return DiseasePresencePredicate(
+            disease_id_query=sulfite_oxidase_deficiency,
+        )
+
+    @pytest.fixture
+    def analysis(
+        self,
+        count_statistic: CountStatistic,
+    ) -> DiseaseAnalysis:
+        return DiseaseAnalysis(
+            count_statistic=count_statistic,
+            mtc_correction='fdr_bh',
+            mtc_alpha=.05,
+        )
+
+    def test_compare_genotype_vs_phenotypes(
+        self,
+        analysis: DiseaseAnalysis,
+        suox_cohort: Cohort,
+        suox_gt_predicate: GenotypePolyPredicate,
+        suox_disease: DiseasePresencePredicate,
+    ):
+        results = analysis.compare_genotype_vs_phenotypes(
+            cohort=suox_cohort.all_patients,
+            gt_predicate=suox_gt_predicate,
+            pheno_predicates=(suox_disease,),
+        )
+
+        assert results is not None
+        # TODO: improve testing
diff --git a/tests/analysis/pcats/test_hpo_term_analysis.py b/tests/analysis/pcats/test_hpo_term_analysis.py
new file mode 100644
index 00000000..516bf9fc
--- /dev/null
+++ b/tests/analysis/pcats/test_hpo_term_analysis.py
@@ -0,0 +1,58 @@
+import typing
+
+import hpotk
+import pytest
+
+from gpsea.model import Cohort
+
+from gpsea.analysis.mtc_filter import PhenotypeMtcFilter, HpoMtcFilter
+from gpsea.analysis.pcats import HpoTermAnalysis
+from gpsea.analysis.pcats.stats import CountStatistic, ScipyFisherExact
+from gpsea.analysis.predicate.genotype import GenotypePolyPredicate
+from gpsea.analysis.predicate.phenotype import PhenotypePolyPredicate
+
+
+class TestHpoTermAnalysis:
+
+    @pytest.fixture(scope='class')
+    def count_statistic(self) -> CountStatistic:
+        return ScipyFisherExact()
+
+    @pytest.fixture(scope='class')
+    def phenotype_mtc_filter(
+        self,
+        hpo: hpotk.MinimalOntology,
+    ) -> PhenotypeMtcFilter:
+        return HpoMtcFilter.default_filter(
+            hpo=hpo,
+            term_frequency_threshold=.2,
+        )
+
+    @pytest.fixture
+    def analysis(
+        self,
+        count_statistic: CountStatistic,
+        phenotype_mtc_filter: PhenotypeMtcFilter,
+    ) -> HpoTermAnalysis:
+        return HpoTermAnalysis(
+            count_statistic=count_statistic,
+            mtc_filter=phenotype_mtc_filter,
+            mtc_correction='fdr_bh',
+            mtc_alpha=.05,
+        )
+
+    def test_compare_genotype_vs_phenotypes(
+        self,
+        analysis: HpoTermAnalysis,
+        suox_cohort: Cohort,
+        suox_gt_predicate: GenotypePolyPredicate,
+        suox_pheno_predicates: typing.Sequence[PhenotypePolyPredicate[hpotk.TermId]]
+    ):
+        results = analysis.compare_genotype_vs_phenotypes(
+            cohort=suox_cohort.all_patients,
+            gt_predicate=suox_gt_predicate,
+            pheno_predicates=suox_pheno_predicates,
+        )
+
+        assert results is not None
+        # TODO: improve testing
diff --git a/tests/analysis/predicate/genotype/test_gt_predicates.py b/tests/analysis/predicate/genotype/test_gt_predicates.py
index eb8a0ca6..3e71283b 100644
--- a/tests/analysis/predicate/genotype/test_gt_predicates.py
+++ b/tests/analysis/predicate/genotype/test_gt_predicates.py
@@ -1,8 +1,8 @@
 import pytest
 
 from gpsea.model import *
-from gpsea.analysis.predicate import GenotypePolyPredicate
 from gpsea.analysis.predicate.genotype import (
+    GenotypePolyPredicate,
     groups_predicate,
     VariantPredicates,
 )
diff --git a/tests/analysis/test_analysis.py b/tests/analysis/test_analysis.py
index a6888300..5db24621 100644
--- a/tests/analysis/test_analysis.py
+++ b/tests/analysis/test_analysis.py
@@ -14,9 +14,9 @@
 
 from gpsea.model import *
 from gpsea.model.genome import *
-from gpsea.analysis.predicate import GenotypePolyPredicate, PatientCategories
+from gpsea.analysis.predicate import PatientCategories
+from gpsea.analysis.predicate.genotype import GenotypePolyPredicate, VariantPredicate
 from gpsea.analysis.predicate.phenotype import PhenotypePolyPredicate
-from gpsea.analysis.predicate.genotype import VariantPredicate
 
 
 def test_apply_predicates_on_patients(
diff --git a/tests/analysis/test_mtc_filter.py b/tests/analysis/test_mtc_filter.py
index 1eb4d8e8..0b060516 100644
--- a/tests/analysis/test_mtc_filter.py
+++ b/tests/analysis/test_mtc_filter.py
@@ -5,8 +5,10 @@
 import pandas as pd
 import pytest
 
-from gpsea.analysis import HpoMtcFilter, SpecifiedTermsMtcFilter, apply_predicates_on_patients
-from gpsea.analysis.predicate import PatientCategories, GenotypePolyPredicate
+from gpsea.analysis import apply_predicates_on_patients
+from gpsea.analysis.mtc_filter import HpoMtcFilter, SpecifiedTermsMtcFilter
+from gpsea.analysis.predicate import PatientCategories
+from gpsea.analysis.predicate.genotype import GenotypePolyPredicate
 from gpsea.analysis.predicate.phenotype import PhenotypePolyPredicate
 from gpsea.model import Cohort
 
@@ -154,7 +156,7 @@ def test_filter_terms_to_test(
         filtered_n_usable, filtered_all_counts, reason_for_filtering_out = mtc_report
 
         assert reason_for_filtering_out['Skipping term because all genotypes have same HPO observed proportions'] == 1
-        assert reason_for_filtering_out['Skipping general term'] == 14
+        assert reason_for_filtering_out['Skipping general term'] == 16
         assert reason_for_filtering_out['Skipping non-target term'] == 5
         assert reason_for_filtering_out['Skipping top level term'] == 0
 
diff --git a/tests/analysis/predicate/phenotype/test_predicate.py b/tests/analysis/test_pscore.py
similarity index 97%
rename from tests/analysis/predicate/phenotype/test_predicate.py
rename to tests/analysis/test_pscore.py
index 345fa016..9b632902 100644
--- a/tests/analysis/predicate/phenotype/test_predicate.py
+++ b/tests/analysis/test_pscore.py
@@ -4,7 +4,7 @@
 import pytest
 
 from gpsea.model import Patient, Phenotype, SampleLabels
-from gpsea.analysis.predicate.phenotype import CountingPhenotypeScorer
+from gpsea.analysis.pscore import CountingPhenotypeScorer
 
 
 class TestCountingPhenotypeScorer:
diff --git a/tests/conftest.py b/tests/conftest.py
index c3d6387b..54f012ec 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -5,8 +5,7 @@
 import hpotk
 import pytest
 
-from gpsea.analysis.predicate import GenotypePolyPredicate
-from gpsea.analysis.predicate.genotype import VariantPredicates, VariantPredicate, boolean_predicate
+from gpsea.analysis.predicate.genotype import GenotypePolyPredicate, VariantPredicates, boolean_predicate
 from gpsea.analysis.predicate.phenotype import PhenotypePolyPredicate, PropagatingPhenotypePredicate
 from gpsea.io import GpseaJSONEncoder, GpseaJSONDecoder
 from gpsea.model import *
diff --git a/tests/test_tutorial.py b/tests/test_tutorial.py
new file mode 100644
index 00000000..52924eff
--- /dev/null
+++ b/tests/test_tutorial.py
@@ -0,0 +1,90 @@
+import pytest
+import ppktstore
+import hpotk
+
+from gpsea.model import Cohort, VariantEffect
+from gpsea.preprocessing import configure_caching_cohort_creator, CohortCreator, load_phenopackets
+from gpsea.analysis.pcats import HpoTermAnalysis
+from gpsea.analysis.mtc_filter import HpoMtcFilter
+from gpsea.analysis.pcats.stats import ScipyFisherExact
+from gpsea.analysis.predicate.genotype import VariantPredicates, groups_predicate
+from gpsea.analysis.predicate.phenotype import prepare_predicates_for_terms_of_interest
+
+
+class TestTutorial:
+
+    @pytest.fixture(scope='class')
+    def cohort_creator(
+        self,
+        hpo: hpotk.MinimalOntology,
+    ) -> CohortCreator:
+        return configure_caching_cohort_creator(
+            hpo=hpo,
+        )
+
+    @pytest.fixture(scope='class')
+    def cohort(
+        self,
+        cohort_creator: CohortCreator,
+    ) -> Cohort:
+        registry = ppktstore.registry.configure_phenopacket_registry()
+        with registry.open_phenopacket_store('0.1.18') as ps:
+            phenopackets = tuple(ps.iter_cohort_phenopackets('TBX5'))
+
+        cohort, _ = load_phenopackets(
+            phenopackets=phenopackets,
+            cohort_creator=cohort_creator,
+        )
+        return cohort
+
+    @pytest.fixture
+    def mtc_filter(
+        self,
+        hpo: hpotk.MinimalOntology,
+    ) -> HpoMtcFilter:
+        return HpoMtcFilter.default_filter(
+            hpo=hpo,
+            term_frequency_threshold=0.2,
+        )
+
+    @pytest.fixture
+    def hpo_term_analysis(
+        self,
+        mtc_filter,
+    ) -> HpoTermAnalysis:
+        return HpoTermAnalysis(
+            count_statistic=ScipyFisherExact(),
+            mtc_filter=mtc_filter,
+            mtc_correction='fdr_bh',
+            mtc_alpha=0.05,
+        )
+
+    @pytest.mark.skip('Just for manual debugging')
+    def test_compare_genotype_vs_phenotype(
+        self,
+        hpo: hpotk.MinimalOntology,
+        cohort: Cohort,
+        hpo_term_analysis: HpoTermAnalysis,
+    ):
+        tx_id = 'NM_181486.4'
+
+        gt_predicate = groups_predicate(
+            predicates=(
+                VariantPredicates.variant_effect(VariantEffect.MISSENSE_VARIANT, tx_id),
+                VariantPredicates.variant_effect(VariantEffect.FRAMESHIFT_VARIANT, tx_id),
+            ),
+            group_names=('Missense', 'Frameshift',),
+        )
+        pheno_predicates = prepare_predicates_for_terms_of_interest(
+            cohort=cohort,
+            hpo=hpo,
+            min_n_of_patients_with_term=2,
+        )
+
+        result = hpo_term_analysis.compare_genotype_vs_phenotypes(
+            cohort=cohort,
+            gt_predicate=gt_predicate,
+            pheno_predicates=pheno_predicates,
+        )
+
+        assert result is not None
diff --git a/tests/view/test_stats.py b/tests/view/test_stats.py
index 0cdacc3a..691f4f6a 100644
--- a/tests/view/test_stats.py
+++ b/tests/view/test_stats.py
@@ -1,11 +1,52 @@
+import hpotk
+import math
 import pytest
 
-from gpsea.analysis import HpoMtcReport
+import pandas as pd
+
+from gpsea.analysis.pcats import HpoTermAnalysisResult
+from gpsea.analysis.predicate import PatientCategories
+from gpsea.analysis.predicate.genotype import GenotypePolyPredicate
+from gpsea.analysis.mtc_filter import PhenotypeMtcResult
 from gpsea.view import MtcStatsViewer
 
 
 class TestStatsViewable:
 
+    @pytest.fixture(scope='class')
+    def hpo_term_analysis_result(
+        self,
+        suox_gt_predicate: GenotypePolyPredicate,
+    ) -> HpoTermAnalysisResult:
+        return HpoTermAnalysisResult(
+            phenotypes=(
+                hpotk.TermId.from_curie('HP:0001166'),  # Arachnodactyly
+                hpotk.TermId.from_curie('HP:0001250'),  # Seizure
+            ),
+            n_usable=(40, 20),
+            all_counts=(
+                pd.DataFrame(
+                    data=[[10, 5], [10, 15]],
+                    index=pd.Index((PatientCategories.YES, PatientCategories.NO,)),
+                    columns=pd.Index(suox_gt_predicate.get_categories())
+                ),
+                pd.DataFrame(
+                    data=[[5, 0], [5, 10]],
+                    index=pd.Index((PatientCategories.YES, PatientCategories.NO,)),
+                    columns=pd.Index(suox_gt_predicate.get_categories())
+                ),
+            ),
+            pvals=(math.nan, .005,),
+            corrected_pvals=(math.nan, .01),
+            gt_predicate=suox_gt_predicate,
+            mtc_filter_name='Random MTC filter',
+            mtc_filter_results=(
+                PhenotypeMtcResult.fail("Not too interesting"),
+                PhenotypeMtcResult.ok(),
+            ),
+            mtc_name='fdr_bh',
+        )
+
     @pytest.fixture
     def stats_viewer(self) -> MtcStatsViewer:
         return MtcStatsViewer()
@@ -14,20 +55,8 @@ def stats_viewer(self) -> MtcStatsViewer:
     def test_process(
         self,
         stats_viewer: MtcStatsViewer,
+        hpo_term_analysis_result: HpoTermAnalysisResult
     ):
-        mtc_report = HpoMtcReport(
-            filter_name='identity filter',
-            mtc_name='bonferroni',
-            filter_results_map={
-                # The reason for skipping a phenotype -> the number of phenotypes skipped for the reason
-                'I slept bad tonight': 0,
-                'The sun is too bright today': 5,
-                'Life is a conspiracy': 80,
-                'I need coffee': 7,
-            },
-            n_terms_before_filtering=100,  # The filtered out (80 + 7 + 5) + the unfiltered
-        )
-
-        report = stats_viewer.process(hpo_mtc_report=mtc_report)
+        report = stats_viewer.process(result=hpo_term_analysis_result)
         with open('mtc_stats.html', 'w') as fh:
             fh.write(report)