diff --git a/data/simulate_AlphaSimR.R b/data/simulate_AlphaSimR.R
deleted file mode 100644
index 74f3a21..0000000
--- a/data/simulate_AlphaSimR.R
+++ /dev/null
@@ -1,109 +0,0 @@
-if (!require("AlphaSimR")) install.packages("AlphaSimR", repos='http://cran.us.r-project.org')
-library(AlphaSimR)
-if (!require("reticulate")) install.packages("reticulate", repos='http://cran.us.r-project.org')
-library(reticulate)
-
-# This code is used from verification.py to simulate quantitative traits
-# by using AlphaSimR.
-#
-# The basic simulation step is the following:
-# 1. Use the tskit Python package through the R package tskit and load the tree
-#   sequence data as a founder population in AlphaSimR. The codes of this step are
-#   largely adapted from
-#   https://github.com/ ynorr/AlphaSimR_Examples/blob/master/misc/msprime.R
-# 2. Simulate quantitative traits of the founder population in AlphaSimR
-
-# The commandline input has 11 elements
-# [num_causal, temporary_directory_name, tree_filename, phenotype_filename,
-# trait_filename, corA, num_trait, h2, h2_2, num_rep, random_seed]
-
-myArgs <- commandArgs(trailingOnly = TRUE)
-# Convert to numerics
-num_causal <- as.numeric(myArgs[1])
-directory_name <- myArgs[2]
-tree_filename <- myArgs[3]
-phenotype_filename <- myArgs[4]
-trait_filename <- myArgs[5]
-corA <- as.numeric(myArgs[6])
-num_trait <- as.numeric(myArgs[7])
-h2 <- as.numeric(myArgs[8])
-h2_2 <- as.numeric(myArgs[9])
-num_rep <- as.numeric(myArgs[10])
-random_seed <- as.numeric(myArgs[11])
-
-set.seed(random_seed)
-
-tskit <- import("tskit")
-
-tree_filename <- paste0(directory_name,"/", tree_filename,".tree")
-ts <- tskit$load(tree_filename)
-
-sites <- ts$tables$sites$asdict()
-pos <- sites$position * 1e-8 # Convert to Morgans
-pos <- pos - pos[1] # Set first position to zero
-
-# Extract haplotypes
-haplo <- t(ts$genotype_matrix())
-
-# Create an AlphaSimR founder population
-founderPop <- newMapPop(genMap=list(pos), haplotypes=list(haplo))
-
-num_ind <- nrow(haplo) / 2
-
-if (num_trait == 1){
-  mean <- 0
-  var <- 1
-  corA <- NULL
-  H2 <- h2
-} else if (num_trait == 2){
-  mean <- c(0,0)
-  var <- c(1,1)
-  corA <- matrix(c(1,corA,corA,1),nrow=2,ncol=2)
-  H2 <- c(h2,h2_2)
-}
-
-phenotype_result <- c()
-trait_result <- c()
-
-for (i in 1:num_rep) {
-  SP <- SimParam$
-    new(founderPop)$
-    addTraitA(
-      nQtlPerChr=num_causal,
-      mean=mean,
-      var=var,
-      corA=corA
-    )$
-    setVarE(H2=H2)
-
-  individuals <- newPop(founderPop)
-
-  trait_df <- c()
-  phenotype_df <- c()
-
-  for (trait_id in 1:num_trait){
-    qtl_site <- SP$traits[[trait_id]]@lociLoc - 1
-    effect_size <- SP$traits[[trait_id]]@addEff
-    trait_id_df <- data.frame(
-      effect_size = effect_size,
-      site_id = qtl_site,
-      trait_id = rep(trait_id-1, length(effect_size))
-    )
-    trait_df <- rbind(trait_df, trait_id_df)
-    phenotype <- individuals@pheno[,trait_id]
-    phenotype_id_df <- data.frame(
-      phenotype=phenotype,
-      individual_id = 0:(num_ind-1),
-      trait_id = rep(trait_id-1, num_ind)
-    )
-    phenotype_df <- rbind(phenotype_df, phenotype_id_df)
-  }
-  phenotype_result <- rbind(phenotype_result, phenotype_df)
-  trait_result <- rbind(trait_result, trait_df)
-}
-
-phenotype_filename <- paste0(directory_name,"/",phenotype_filename,".csv")
-write.csv(phenotype_result, phenotype_filename, row.names=FALSE)
-
-trait_filename <- paste0(directory_name,"/",trait_filename,".csv")
-write.csv(trait_result, trait_filename, row.names=FALSE)
diff --git a/data/simulate_simplePHENOTYPES.R b/data/simulate_simplePHENOTYPES.R
deleted file mode 100644
index fd94c78..0000000
--- a/data/simulate_simplePHENOTYPES.R
+++ /dev/null
@@ -1,50 +0,0 @@
-if (!require("simplePHENOTYPES")) install.packages("simplePHENOTYPES", repos='http://cran.us.r-project.org')
-library(simplePHENOTYPES)
-if (!require("reticulate")) install.packages("reticulate", repos='http://cran.us.r-project.org')
-library(reticulate)
-
-# This code is used from verification.py to simulate quantitative traits
-# by using simplePHENOTYPES.
-
-# This code loads the vcf file and simulates quantitative traits
-
-# The commandline input has 7 elements
-# [num_causal, num_trait, add_effect, add_effect_2, directory_name,
-# vcf_filename, random_seed]
-myArgs <- commandArgs(trailingOnly = TRUE)
-
-num_causal <- as.numeric(myArgs[1])
-num_trait <- as.numeric(myArgs[2])
-add_effect <- as.numeric(myArgs[3])
-add_effect_2 <- as.numeric(myArgs[4])
-directory_name <- myArgs[5]
-vcf_filename <- myArgs[6]
-random_seed <- as.numeric(myArgs[7])
-
-if (num_trait == 1){
-  effect <- add_effect
-  mean <- 0
-  h2 <- 1
-} else if (num_trait == 2){
-  effect <- c(add_effect, add_effect_2)
-  mean <- c(0,0)
-  h2 <- c(1,1)
-}
-
-suppressMessages(create_phenotypes(
-  geno_file = paste0(directory_name, "/", vcf_filename, ".vcf"),
-  add_QTN_num = num_causal,
-  add_effect = effect,
-  rep = 1,
-  h2 = h2,
-  model = "A",
-  seed = random_seed,
-  vary_QTN = FALSE,
-  to_r = FALSE,
-  sim_method = "geometric",
-  quiet = TRUE,
-  home_dir = directory_name,
-  verbose = FALSE,
-  mean = mean,
-  ntraits = num_trait
-))
\ No newline at end of file
diff --git a/data/simulate_simplePHENOTYPES_multiple.R b/data/simulate_simplePHENOTYPES_multiple.R
deleted file mode 100644
index 287628f..0000000
--- a/data/simulate_simplePHENOTYPES_multiple.R
+++ /dev/null
@@ -1,75 +0,0 @@
-if (!require("simplePHENOTYPES")) install.packages("simplePHENOTYPES", repos='http://cran.us.r-project.org')
-library(simplePHENOTYPES)
-if (!require("reticulate")) install.packages("reticulate", repos='http://cran.us.r-project.org')
-library(reticulate)
-
-# This code is used from verification.py to simulate quantitative traits
-# by using simplePHENOTYPES.
-
-# This code loads the vcf file that is generated in `verification.py`
-# and uses the effect size from a normal distribution to simulate
-# additive traits.
-
-# The commandline input has 9 elements
-# [num_causal, h2, directory_name,
-# vcf_filename, num_rep, output_filename, 
-# mean, var, random_seed]
-myArgs <- commandArgs(trailingOnly = TRUE)
-
-num_causal <- as.numeric(myArgs[1])
-h2 <- as.numeric(myArgs[2])
-directory_name <- myArgs[3]
-vcf_filename <- myArgs[4]
-num_rep <- myArgs[5]
-output_filename <- myArgs[6]
-mean <- as.numeric(myArgs[7])
-var <- as.numeric(myArgs[8])
-random_seed <- as.numeric(myArgs[9])
-
-set.seed(random_seed)
-
-sd <- sqrt(var)
-
-# Function to simulate phenotypes from simplePHENOTYPES
-# The effect sizes are simulated from a normal distribution,
-# as the geometric series is the only effect size distribution
-# supported in simplePHENOTYPES.
-simulate_simplePHENOTYPE <- function(
-    num_causal, vcf_filename, random_seed
-    ) {
-  effect_size <- list(rnorm(n=num_causal, mean=mean, sd=sd))
-  phenotypes <- suppressMessages(create_phenotypes(
-    geno_file = paste0(directory_name, "/", vcf_filename, ".vcf"),
-    add_QTN_num = num_causal,
-    add_effect = effect_size,
-    rep = 1,
-    h2 = h2,
-    model = "A",
-    seed = random_seed,
-    vary_QTN = FALSE,
-    to_r = TRUE,
-    sim_method = "custom",
-    quiet = TRUE,
-    home_dir = directory_name,
-    verbose = FALSE,
-    mean = 0
-  ))
-  # The mean is centered at 0 from simplePHENOTYPES simulation
-  # so we will divide it by the standard deviation to normalise
-  # the data
-  phenotypes[,2] <- phenotypes[,2] / sd(phenotypes[,2])
-  names(phenotypes)[1:2] <- c("individual_id", "phenotype")
-  return(phenotypes)
-}
-
-phenotype_result <- c()
-
-for (i in 1:num_rep) {
-  simulated_result <- simulate_simplePHENOTYPE(
-    num_causal=num_causal, vcf_filename=vcf_filename, random_seed=random_seed+i
-  )
-  phenotype_result <- rbind(phenotype_result, simulated_result)
-}
-
-filename = paste0(directory_name, "/", output_filename,".csv")
-write.csv(phenotype_result, filename, row.names=FALSE)
diff --git a/data/simulation_codes.md b/data/simulation_codes.md
deleted file mode 100644
index 51f5bc0..0000000
--- a/data/simulation_codes.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Simulation Codes
-
-This 
-This directory is used to store the R simulation codes.
-
-R simulation codes:
-
-- simulate_AlphaSimR.R: This R code uses AlphaSimR to simulate quantitative traits based on a simulated tree sequence data.
-- simulate_simplePHENOTYPES.R: This R code uses simplePHENOTYPES to simulate quantitative traits based on a VCF file.
\ No newline at end of file
diff --git a/verification.py b/verification.py
index 8756e34..d191970 100644
--- a/verification.py
+++ b/verification.py
@@ -140,45 +140,72 @@ class SimulationResult:
     trait: pd.DataFrame
 
 
-class PhenotypeSimulator:
+def _simulate_simplePHENOTYPES_qqplot(
+    ts, num_causal, h2, random_seed, num_rep, mean=0, var=1
+):
     """
-    Class to simulate traits based on 3 models, ALphaSimR, simplePHENOTYPES,
-    and the simulation framework described in the ARG-Needle paper.
+    The input of this function is a tree sequence and the output is a phenotype
+    dataframe. This function is used for comparing the simulation output in
+    a QQ-plot.
+    The output of this function will be a phenotype dataframe.
     """
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        with open(f"{tmpdirname}/tree_seq.vcf", "w") as vcf_file:
+            ts.write_vcf(vcf_file)
+        cmd = ["Rscript", "scripts/simulate_simplePHENOTYPES_qqplot.R"]
+        args = [
+            str(num_causal),
+            str(h2),
+            tmpdirname,
+            str(num_rep),
+            str(mean),
+            str(var),
+            str(random_seed),
+        ]
+        input_cmd = cmd + args
+        subprocess.check_output(input_cmd)
 
-    def __init__(self, model):
-        self.model = model
+        phenotype_df = pd.read_csv(f"{tmpdirname}/simplePHENOTYPES.csv")
 
-    def _simulate_simplePHENOTYPES_single(
-        self, ts, num_causal, random_seed, add_effect=1, num_trait=1, add_effect_2=1
-    ):
-        """
-        The function to simulate quantitative traits by using simplePHENOTYPES.
-        We will specify the number of causal sites and the parameter for the
-        geometric series where the effect sizes are determined.
-        """
+        num_ind = ts.num_individuals
+        # Change the individual ID in simplePHENOTYPES output to be consistent with the
+        # tstrait output
+        for i in range(num_ind):
+            phenotype_df = phenotype_df.replace(f"tsk_{i}", i)
+
+    return phenotype_df
 
-        directory = tempfile.TemporaryDirectory()
 
-        vcf_filename = "vcf_comparison_simplePHENOTYPES"
-        with open(f"{directory.name}/{vcf_filename}.vcf", "w") as vcf_file:
+def _simulate_simplePHENOTYPES_exact(
+    ts, num_causal, random_seed, add_effect=1, num_trait=1, add_effect_2=1
+):
+    """
+    The function to simulate quantitative traits by using simplePHENOTYPES.
+    We will specify the number of causal sites and the parameter for the
+    geometric series where the effect sizes are determined.
+    The output of this function will be a SimulationResult object.
+    """
+
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        with open(f"{tmpdirname}/tree_seq.vcf", "w") as vcf_file:
             ts.write_vcf(vcf_file)
-        cmd = ["Rscript", "data/simulate_simplePHENOTYPES.R"]
+        cmd = ["Rscript", "data/simulate_simplePHENOTYPES_exact.R"]
         args = [
             str(num_causal),
             str(num_trait),
             str(add_effect),
             str(add_effect_2),
-            directory.name,
-            vcf_filename,
+            tmpdirname,
             str(random_seed),
         ]
         input_cmd = cmd + args
         subprocess.check_output(input_cmd)
 
+        qtn_df = pd.read_csv(f"{tmpdirname}/Additive_Selected_QTNs.txt", sep="\t")
+        
         if num_trait == 1:
             phenotype_df = pd.read_csv(
-                f"{directory.name}/Simulated_Data_1_Reps_Herit_1.txt", sep="\t"
+                f"{tmpdirname}/Simulated_Data_1_Reps_Herit_1.txt", sep="\t"
             )
             del phenotype_df["reps"]
             phenotype_df = phenotype_df.rename(
@@ -186,7 +213,7 @@ def _simulate_simplePHENOTYPES_single(
             )
         else:
             phenotype_df = pd.read_csv(
-                f"{directory.name}/Simulated_Data_1_Reps_Herit_1_1.txt", sep="\t"
+                f"{tmpdirname}/Simulated_Data_1_Reps_Herit_1_1.txt", sep="\t"
             )
             del phenotype_df["Rep"]
             phenotype_df = pd.melt(
@@ -203,154 +230,56 @@ def _simulate_simplePHENOTYPES_single(
             )
             phenotype_df = phenotype_df.replace({"Trait_1_H2_1": 0, "Trait_2_H2_1": 1})
 
-        num_ind = ts.num_individuals
-        # Change the individual ID in simplePHENOTYPES output to be consistent with the
-        # tstrait output
-        for i in range(num_ind):
-            phenotype_df = phenotype_df.replace(f"tsk_{i}", i)
-
-        qtn_df = pd.read_csv(f"{directory.name}/Additive_Selected_QTNs.txt", sep="\t")
-
-        # Obtain the list of causal allele
-        causal_allele = []
-        effect_size = []
-        effect_size_2 = []
-        for i, site_id in enumerate(qtn_df["snp"].values, start=1):
-            # simplePHENOTYPES uses ancestral state as a causal allele
-            allele = ts.site(site_id).ancestral_state
-            causal_allele.append(allele)
-            effect_size.append(add_effect**i)
-            effect_size_2.append(add_effect_2**i)
-
-        if num_trait == 2:
-            effect_size = np.append(effect_size, effect_size_2)
-
-        trait_df = pd.DataFrame(
-            {
-                "site_id": np.tile(qtn_df["snp"].values, num_trait),
-                "causal_allele": np.tile(causal_allele, num_trait),
-                "effect_size": effect_size,
-                "trait_id": np.repeat(np.arange(num_trait), len(causal_allele)),
-            }
-        )
-
-        simulation_result = SimulationResult(phenotype=phenotype_df, trait=trait_df)
-
-        directory.cleanup()
-
-        return simulation_result
-
-    def _simulate_simplePHENOTYPES_multiple(
-        self, ts, num_causal, h2, random_seed, num_rep, mean=0, var=1
-    ):
-        directory = tempfile.TemporaryDirectory()
-
-        output_filename = "simplePHENOTYPES_multiple"
-
-        vcf_filename = "vcf_comparison_simplePHENOTYPES"
-        with open(f"{directory.name}/{vcf_filename}.vcf", "w") as vcf_file:
-            ts.write_vcf(vcf_file)
-        cmd = ["Rscript", "data/simulate_simplePHENOTYPES_multiple.R"]
-        args = [
-            str(num_causal),
-            str(h2),
-            directory.name,
-            vcf_filename,
-            str(num_rep),
-            output_filename,
-            str(mean),
-            str(var),
-            str(random_seed),
-        ]
-        input_cmd = cmd + args
-        subprocess.check_output(input_cmd)
-
-        phenotype_df = pd.read_csv(f"{directory.name}/{output_filename}.csv")
-
-        num_ind = ts.num_individuals
-        # Change the individual ID in simplePHENOTYPES output to be consistent with the
-        # tstrait output
-        for i in range(num_ind):
-            phenotype_df = phenotype_df.replace(f"tsk_{i}", i)
-
-        directory.cleanup()
-
-        return phenotype_df
-
-    def _simulate_simplePHENOTYPES(
-        self, ts, num_causal, random_seed, h2=1, num_rep=1, normalize=False, **kwargs
-    ):
-        """
-        For single simulation (num_rep=1), we will be simulating effect sizes from
-        a geometric series, which is the only distribution supported in simplePHENOTYPES.
-        For multiple simulations (num_rep>=1), we will be simulating effect sizes from
-        a standard normal distribution.
-        """
-
-        phenotype_result = pd.DataFrame(
-            {"individual_id": [], "trait_id": [], "phenotype": []}
-        )
-        trait_result = pd.DataFrame(
-            {"site_id": [], "causal_allele": [], "effect_size": [], "trait_id": []}
-        )
-
-        if num_rep == 1:
-            simulation_output = self._simulate_simplePHENOTYPES_single(
-                ts=ts, num_causal=num_causal, random_seed=random_seed, **kwargs
-            )
-            if normalize:
-                simulation_output.phenotype = tstrait.normalise_phenotypes(
-                    simulation_output.phenotype
-                )
-            phenotype_result = pd.concat(
-                [phenotype_result, simulation_output.phenotype]
-            )
-            trait_result = pd.concat([trait_result, simulation_output.trait])
-            trait_result = trait_result.astype({"site_id": int})
-
-        else:
-            # We are not examining the trait information in multiple simulation, and
-            # accurately extracting trait information in a few lines of Python codes
-            # is challenging, so I wrote a new R code to do the simulation multiple
-            # times.
-            # Phenotypes are normalized in multiple simulation
-            phenotype_result = self._simulate_simplePHENOTYPES_multiple(
-                ts=ts,
-                num_causal=num_causal,
-                random_seed=random_seed,
-                num_rep=num_rep,
-                h2=h2,
-                mean=kwargs["mean"],
-                var=kwargs["var"],
-            )
-        simulation_result = SimulationResult(
-            phenotype=phenotype_result, trait=trait_result
-        )
+    num_ind = ts.num_individuals
+    # Change the individual ID in simplePHENOTYPES output to be consistent with the
+    # tstrait output
+    for i in range(num_ind):
+        phenotype_df = phenotype_df.replace(f"tsk_{i}", i)
+
+    # Obtain the list of causal allele
+    causal_allele = []
+    effect_size = []
+    effect_size_2 = []
+    for i, site_id in enumerate(qtn_df["snp"].values, start=1):
+        # simplePHENOTYPES uses ancestral state as a causal allele
+        allele = ts.site(site_id).ancestral_state
+        causal_allele.append(allele)
+        effect_size.append(add_effect**i)
+        effect_size_2.append(add_effect_2**i)
+
+    if num_trait == 2:
+        effect_size = np.append(effect_size, effect_size_2)
+
+    trait_df = pd.DataFrame(
+        {
+            "site_id": np.tile(qtn_df["snp"].values, num_trait),
+            "causal_allele": np.tile(causal_allele, num_trait),
+            "effect_size": effect_size,
+            "trait_id": np.repeat(np.arange(num_trait), len(causal_allele)),
+        }
+    )
 
-        return simulation_result
+    simulation_result = SimulationResult(phenotype=phenotype_df, trait=trait_df)
 
-    def _simulate_AlphaSimR(
-        self, ts, num_causal, h2, random_seed, num_rep=1, h2_2=1, corA=1, num_trait=1
-    ):
-        """
-        The function to simulate quantitative traits by using AlphaSimR. We will
-        specify the number of causal sites, such that the AlphaSimR simulation
-        will be conducted randomly. corA is used to specify the correlation
-        coefficient of pleiotropic traits.
-        """
+    return simulation_result
 
-        directory = tempfile.TemporaryDirectory()
-        tree_filename = "tree_comparison_AlphaSimR"
-        ts.dump(f"{directory.name}/{tree_filename}.tree")
-        phenotype_filename = "phenotype_comparison_AlphaSimR"
-        trait_filename = "trait_comparison_AlphaSimR"
-        cmd = ["Rscript", "data/simulate_AlphaSimR.R"]
+def _simulate_AlphaSimR(
+    ts, num_causal, h2, random_seed, num_rep=1, h2_2=1, corA=1, num_trait=1
+):
+    """
+    The function to simulate quantitative traits by using AlphaSimR. We will
+    specify the number of causal sites, such that the AlphaSimR simulation
+    will be conducted randomly. corA is used to specify the correlation
+    coefficient of pleiotropic traits. The output of this function is a
+    PhenotypeResult object.
+    """
+    
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        ts.dump(f"{tmpdirname}/tree_seq.tree")
+        cmd = ["Rscript", "scripts/simulate_AlphaSimR.R"]
         args = [
             str(num_causal),
-            directory.name,
-            tree_filename,
-            phenotype_filename,
-            trait_filename,
+            tmpdirname,
             str(corA),
             str(num_trait),
             str(h2),
@@ -361,121 +290,103 @@ def _simulate_AlphaSimR(
         input_cmd = cmd + args
         subprocess.check_output(input_cmd)
 
-        phenotype_df = pd.read_csv(f"{directory.name}/{phenotype_filename}.csv")
-        trait_df = pd.read_csv(f"{directory.name}/{trait_filename}.csv")
+        phenotype_df = pd.read_csv(f"{tmpdirname}/phenotype_alphasimr.csv")
+        trait_df = pd.read_csv(f"{tmpdirname}/trait_alphasimr.csv")
 
-        # Obtain the list of causal allele
-        causal_allele = []
-        for site_id in trait_df["site_id"]:
-            allele = ts.mutation(site_id).derived_state
-            causal_allele.append(allele)
-
-        trait_df["causal_allele"] = causal_allele
+    # Obtain the list of causal allele
+    causal_allele = []
+    for site_id in trait_df["site_id"]:
+        allele = ts.mutation(site_id).derived_state
+        causal_allele.append(allele)
 
-        simulation_result = SimulationResult(phenotype=phenotype_df, trait=trait_df)
+    trait_df["causal_allele"] = causal_allele
 
-        directory.cleanup()
+    simulation_result = SimulationResult(phenotype=phenotype_df, trait=trait_df)
 
-        return simulation_result
+    return simulation_result
 
-    def _simulate_arg_needle(self, ts, alpha, h2, num_rep, random_seed):
-        """
-        Simulate phenotypes by using the quantitative trait simulation framework that is
-        adapted from the ARG-Needle paper.
-        https://zenodo.org/records/7745746
-        """
 
-        rng = np.random.default_rng(random_seed)
-        num_ind = ts.num_individuals
+def _simulate_arg_needle(ts, alpha, h2, random_seed, num_rep=1):
+    """
+    Simulate phenotypes by using the quantitative trait simulation framework that is
+    adapted from the ARG-Needle paper.
+    https://zenodo.org/records/7745746
+    """
 
-        phenotype_result = pd.DataFrame({"individual_id": [], "phenotype": []})
-        trait_result = pd.DataFrame(
-            {"site_id": [], "causal_allele": [], "effect_size": [], "trait_id": []}
-        )
+    rng = np.random.default_rng(random_seed)
+    num_ind = ts.num_individuals
 
-        for _ in range(num_rep):
-            phenotypes = np.zeros(num_ind)
-            beta_list = []
-            causal_allele = []
-
-            for variant in ts.variants():
-                row = variant.genotypes.astype("float64")
-                row = row.reshape((num_ind, 2)).sum(axis=-1)
-                std = np.std(row, ddof=1)
-                beta = rng.normal()
-                beta_list.append(beta)
-                causal_allele.append(variant.alleles[1])
-                phenotypes += row * (beta * std**alpha)
-
-            # normalize pheno to have mean 0 and var = h2
-            phenotypes -= np.mean(phenotypes)
-            phenotypes /= np.std(phenotypes, ddof=1)
-            phenotypes *= np.sqrt(h2)
-
-            # sample environmental component with variance 1-h2 and add it to phenotype
-            phenotypes += rng.normal(size=num_ind) * np.sqrt(1 - h2)
-
-            # normalize it all
-            phenotypes -= np.mean(phenotypes)
-            phenotypes /= np.std(phenotypes, ddof=1)
-
-            phenotype_df = pd.DataFrame(
-                {"individual_id": np.arange(len(phenotypes)), "phenotype": phenotypes}
-            )
-            trait_df = pd.DataFrame(
-                {
-                    "site_id": np.arange(len(causal_allele)),
-                    "causal_allele": causal_allele,
-                    "effect_size": beta_list,
-                    "trait_id": np.zeros(len(causal_allele)),
-                }
-            )
+    phenotype_result = pd.DataFrame({"individual_id": [], "phenotype": []})
+    trait_result = pd.DataFrame(
+        {"site_id": [], "causal_allele": [], "effect_size": [], "trait_id": []}
+    )
 
-            phenotype_result = pd.concat(
-                [phenotype_result, phenotype_df], ignore_index=True
-            )
-            trait_result = pd.concat([trait_result, trait_df], ignore_index=True)
+    for _ in range(num_rep):
+        phenotypes = np.zeros(num_ind)
+        beta_list = []
+        causal_allele = []
 
-        trait_result = trait_result.astype({"site_id": int})
-        simulation_result = SimulationResult(
-            phenotype=phenotype_result, trait=trait_result
+        for variant in ts.variants():
+            row = variant.genotypes.astype("float64")
+            row = row.reshape((num_ind, 2)).sum(axis=-1)
+            std = np.std(row, ddof=1)
+            beta = rng.normal()
+            beta_list.append(beta)
+            causal_allele.append(variant.alleles[1])
+            phenotypes += row * (beta * std**alpha)
+
+        # normalize pheno to have mean 0 and var = h2
+        phenotypes -= np.mean(phenotypes)
+        phenotypes /= np.std(phenotypes, ddof=1)
+        phenotypes *= np.sqrt(h2)
+
+        # sample environmental component with variance 1-h2 and add it to phenotype
+        phenotypes += rng.normal(size=num_ind) * np.sqrt(1 - h2)
+
+        # normalize it all
+        phenotypes -= np.mean(phenotypes)
+        phenotypes /= np.std(phenotypes, ddof=1)
+
+        phenotype_df = pd.DataFrame(
+            {"individual_id": np.arange(len(phenotypes)), "phenotype": phenotypes}
+        )
+        trait_df = pd.DataFrame(
+            {
+                "site_id": np.arange(len(causal_allele)),
+                "causal_allele": causal_allele,
+                "effect_size": beta_list,
+                "trait_id": np.zeros(len(causal_allele)),
+            }
         )
 
-        return simulation_result
-
-    def _run(self, ts, h2, random_seed, num_rep=1, **kwargs):
-        if self.model == "simplePHENOTYPES":
-            simulation_output = self._simulate_simplePHENOTYPES(
-                ts=ts, h2=h2, random_seed=random_seed, num_rep=num_rep, **kwargs
-            )
-
-        elif self.model == "AlphaSimR":
-            simulation_output = self._simulate_AlphaSimR(
-                ts=ts, h2=h2, random_seed=random_seed, num_rep=num_rep, **kwargs
-            )
-
-        elif self.model == "ARG-Needle":
-            simulation_output = self._simulate_arg_needle(
-                ts=ts, h2=h2, random_seed=random_seed, num_rep=num_rep, **kwargs
-            )
+        phenotype_result = pd.concat(
+            [phenotype_result, phenotype_df], ignore_index=True
+        )
+        trait_result = pd.concat([trait_result, trait_df], ignore_index=True)
 
-        return simulation_output
+    trait_result = trait_result.astype({"site_id": int})
+    simulation_result = SimulationResult(
+        phenotype=phenotype_result, trait=trait_result
+    )
 
+    return simulation_result
 
 class ExactTest(Test):
     """
-    Compare tstrait against simplePHENOTYPES, AlphaSimR and the simulation framework
-    in ARG-Needle paper. For all benchmarking, we simulate a genetic data of 100
-    individuals with 100 kb sequence length and mutations are simulated from an
-    infinite-sites model.
-
-    We assume that the following in all quantitative trait simulation:
-    - Narrow-sense heritability is 1 to compare the simulated genetic values.
-    - Genetic effect sizes are simulated from an external simulator and they are used to
-    simulate quantitative traits in tstrait.
+    Test class that conducts exact test. Phenotype and trait information are
+    simulated by using an external simulator. Afterwards, the trait information is used
+    to compute the genetic values by using tstrait, and their values are compared
+    to examine if the tstrait package can compute the accurate genetic values.
+    
+    The numericalization input is used to modify the numericalization of genotypes.
+    The standard numericalization of tstrait is (AA=2, Aa=1, aa=0), where A is the
+    causal allele, but when numericalization is True, it will be (AA=1, Aa=0, aa=-1).
     """
-
-    def _run(self, model, random_seed, **kwargs):
+    
+    def _simulate_tree_seq(self, random_seed):
+        """
+        Simulates a tree sequence from an infinite-sites model.
+        """
         ts = msprime.sim_ancestry(
             samples=100,
             recombination_rate=1e-8,
@@ -486,90 +397,299 @@ def _run(self, model, random_seed, **kwargs):
         ts = msprime.sim_mutations(
             ts, rate=1e-8, random_seed=random_seed, discrete_genome=False
         )
-
-        simulator = PhenotypeSimulator(model=model)
-        simulation_output = simulator._run(ts, h2=1, random_seed=random_seed, **kwargs)
-
-        trait_df = simulation_output.trait.sort_values(by=["site_id"])
+        
+        return ts
+        
+    def _compute_tstrait(self, ts, trait_df, numericalization=False):
+        """
+        This method computes genetic values from the trait dataframe.
+        There is an option to change the numericalization of genetic values.
+        No other post-processing is conducted to the computed genetic values.
+        """
+        trait_df=trait_df.sort_values(by=["site_id"])
         genetic_df = tstrait.genetic_value(ts, trait_df)
+        if numericalization:
+            trait_id_array = trait_df["trait_id"].unique()
+            for trait_id in trait_id_array:
+                effect_size_sum = trait_df[trait_df["trait_id"]==trait_id]["effect_size"].sum()
+                genetic_df.loc[genetic_df["trait_id"]==trait_id, ["genetic_value"]] += effect_size_sum
         phenotype_df = tstrait.sim_env(genetic_df, h2=1, random_seed=1)
-
-        if model == "simplePHENOTYPES":
-            """
-            simplePHENOTYPES just makes the mean to be 0 and there is no scaling
-            to the simulation output based on its variance
-            """
-            grouped = phenotype_df.groupby("trait_id")[["phenotype"]]
-            phenotype_df = grouped.transform(lambda x: (x - x.mean()))
-        elif model == "AlphaSimR":
-            """
-            AlphaSimR normalizes the simulation output with ddof=0
-            """
-            phenotype_df = tstrait.normalise_phenotypes(
-                phenotype_df, mean=0, var=1, ddof=0
-            )
-        elif model == "ARG-Needle":
-            """
-            Simulation framework in the ARG-Needle paper normalizes the simulation
-            output with ddof=0
-            """
-            phenotype_df = tstrait.normalise_phenotypes(phenotype_df, mean=0, var=1)
-
-        tstrait_phenotype = phenotype_df["phenotype"]
-        simulated_phenotype = simulation_output.phenotype["phenotype"].values
-
-        np.testing.assert_array_almost_equal(tstrait_phenotype, simulated_phenotype)
-
-    def test_exact_simplePHENOTYPES_single(self):
-        self._run(
-            model="simplePHENOTYPES",
+        
+        return phenotype_df
+        
+class ExactTestSimplePHENOTYPES(ExactTest):
+    """
+    Exact test between simplePHENOTYPES and tstrait.
+    """
+            
+    def _compare_simplePHENOTYPES(self, num_causal, random_seed, add_effect=1, num_trait=1, add_effect_2=1, numericalization=False):
+        ts = self._simulate_tree_seq(random_seed)
+        simulation_output=_simulate_simplePHENOTYPES_exact(
+            ts=ts, num_causal=num_causal, random_seed=random_seed, add_effect=add_effect,
+            add_effect_2=add_effect_2, num_trait=num_trait
+        )
+        phenotype_df = self._compute_tstrait(ts=ts, trait_df=simulation_output.trait, numericalization=numericalization)
+        grouped = phenotype_df.groupby("trait_id")[["phenotype"]]
+        phenotype_df = grouped.transform(lambda x: (x - x.mean()))
+        
+        np.testing.assert_array_almost_equal(phenotype_df["phenotype"].values,
+                                             simulation_output.phenotype["phenotype"].values)
+        
+    def test_exact_simplePHENOTYPES_single_causal_30_add_09(self):
+        self._compare_simplePHENOTYPES(
             num_causal=30,
             add_effect=0.9,
             random_seed=100,
             num_trait=1,
-            normalize=False,
+            numericalization=False,
         )
-
-    def test_exact_simplePHENOTYPES_pleiotropic(self):
-        self._run(
-            model="simplePHENOTYPES",
-            num_causal=30,
+        
+    def test_exact_simplePHENOTYPES_single_causal_50_add_09(self):
+        self._compare_simplePHENOTYPES(
+            num_causal=50,
             add_effect=0.9,
             random_seed=101,
+            num_trait=1,
+            numericalization=False,
+        )
+        
+    def test_exact_simplePHENOTYPES_single_causal_30_add_11(self):
+        self._compare_simplePHENOTYPES(
+            num_causal=30,
+            add_effect=1.1,
+            random_seed=102,
+            num_trait=1,
+            numericalization=False,
+        )
+        
+    def test_exact_simplePHENOTYPES_single_causal_50_add_11(self):
+        self._compare_simplePHENOTYPES(
+            num_causal=50,
+            add_effect=1.1,
+            random_seed=103,
+            num_trait=1,
+            numericalization=False,
+        )
+        
+    def test_exact_simplePHENOTYPES_single_causal_30_add_09_numer(self):
+        self._compare_simplePHENOTYPES(
+            num_causal=30,
+            add_effect=0.9,
+            random_seed=104,
+            num_trait=1,
+            numericalization=True,
+        )
+        
+    def test_exact_simplePHENOTYPES_single_causal_30_add_11_numer(self):
+        self._compare_simplePHENOTYPES(
+            num_causal=30,
+            add_effect=1.1,
+            random_seed=106,
+            num_trait=1,
+            numericalization=True,
+        )
+        
+    def test_exact_simplePHENOTYPES_single_causal_50_add_11_numer(self):
+        self._compare_simplePHENOTYPES(
+            num_causal=50,
+            add_effect=1.1,
+            random_seed=107,
+            num_trait=1,
+            numericalization=True,
+        )
+        
+    def test_exact_simplePHENOTYPES_pleio_causal_30_add_09_08(self):
+        self._compare_simplePHENOTYPES(
+            num_causal=30,
+            add_effect=0.9,
+            random_seed=108,
+            num_trait=2,
+            add_effect_2=0.8,
+            numericalization=False,
+        )
+        
+    def test_exact_simplePHENOTYPES_pleio_causal_50_add_11_08(self):
+        self._compare_simplePHENOTYPES(
+            num_causal=50,
+            add_effect=1.1,
+            random_seed=110,
+            num_trait=2,
+            add_effect_2=0.8,
+            numericalization=False,
+        )
+        
+    def test_exact_simplePHENOTYPES_pleio_causal_30_add_08_08(self):
+        self._compare_simplePHENOTYPES(
+            num_causal=30,
+            add_effect=0.8,
+            random_seed=111,
+            num_trait=2,
+            add_effect_2=0.8,
+            numericalization=False,
+        )
+        
+    def test_exact_simplePHENOTYPES_pleio_causal_50_add_09_08_numer(self):
+        self._compare_simplePHENOTYPES(
+            num_causal=50,
+            add_effect=0.9,
+            random_seed=112,
+            num_trait=2,
+            add_effect_2=0.8,
+            numericalization=True,
+        )
+        
+    def test_exact_simplePHENOTYPES_pleio_causal_30_add_08_08_numer(self):
+        self._compare_simplePHENOTYPES(
+            num_causal=30,
+            add_effect=0.8,
+            random_seed=113,
             num_trait=2,
             add_effect_2=0.8,
-            normalize=False,
+            numericalization=True,
         )
 
-    def test_exact_AlphaSimR_single(self):
-        self._run(model="AlphaSimR", num_causal=100, random_seed=200)
 
-    def test_exact_AlphaSimR_pleiotropic(self):
-        self._run(
-            model="AlphaSimR", num_causal=100, random_seed=201, corA=0.8, num_trait=2
+class ExactTestAlphaSimR(ExactTest):
+    """
+    Exact test between AlphaSimR and tstrait.
+    """
+
+    def _compare_AlphaSimR(self, num_causal, random_seed, corA=1, numericalization=False):
+        ts = self._simulate_tree_seq(random_seed)
+        simulation_output=_simulate_AlphaSimR(
+            ts=ts, num_causal=num_causal, h2=1, random_seed=random_seed, corA=corA
+        )
+        phenotype_df = self._compute_tstrait(ts=ts, trait_df=simulation_output.trait, numericalization=numericalization)
+        phenotype_df = tstrait.normalise_phenotypes(
+            phenotype_df, mean=0, var=1, ddof=0
+        )
+        
+        np.testing.assert_array_almost_equal(phenotype_df["phenotype"].values,
+                                             simulation_output.phenotype["phenotype"].values)
+
+    def test_exact_AlphaSimR_single_causal_30(self):
+        self._compare_AlphaSimR(num_causal=30, random_seed=200, numericalization=False)
+        
+    def test_exact_AlphaSimR_single_causal_100(self):
+        self._compare_AlphaSimR(num_causal=100, random_seed=201, numericalization=False)
+
+    def test_exact_AlphaSimR_single_causal_30_numer(self):
+        self._compare_AlphaSimR(num_causal=30, random_seed=202, numericalization=True)
+
+    def test_exact_AlphaSimR_single_causal_100_numer(self):
+        self._compare_AlphaSimR(num_causal=100, random_seed=203, numericalization=True)
+
+    def test_exact_AlphaSimR_pleiotropic_causal_30_cor08(self):
+        self._compare_AlphaSimR(
+            model="AlphaSimR", num_causal=30, random_seed=204, corA=0.8, num_trait=2,
+            numericalization=False
         )
 
-    def test_exact_ARG_Needle(self):
-        self._run(model="ARG-Needle", alpha=0, random_seed=300)
+    def test_exact_AlphaSimR_pleiotropic_causal_30_cor01(self):
+        self._compare_AlphaSimR(
+            model="AlphaSimR", num_causal=30, random_seed=205, corA=0.1, num_trait=2,
+            numericalization=False
+        )
+        
+    def test_exact_AlphaSimR_pleiotropic_causal_100_cor01(self):
+        self._compare_AlphaSimR(
+            model="AlphaSimR", num_causal=100, random_seed=206, corA=0.1, num_trait=2,
+            numericalization=False
+        )
+        
+    def test_exact_AlphaSimR_pleiotropic_causal_100_cor08(self):
+        self._compare_AlphaSimR(
+            model="AlphaSimR", num_causal=100, random_seed=207, corA=0.8, num_trait=2, 
+            numericalization=False
+        )
+    
+    def test_exact_AlphaSimR_pleiotropic_causal_30_cor08_numer(self):
+        self._compare_AlphaSimR(
+            model="AlphaSimR", num_causal=30, random_seed=208, corA=0.8, num_trait=2,
+            numericalization=True
+        )
+
+    def test_exact_AlphaSimR_pleiotropic_causal_30_cor08_numer(self):
+        self._compare_AlphaSimR(
+            model="AlphaSimR", num_causal=30, random_seed=209, corA=0.8, num_trait=2,
+            numericalization=True
+        )
+        
+    def test_exact_AlphaSimR_pleiotropic_causal_30_cor01_numer(self):
+        self._compare_AlphaSimR(
+            model="AlphaSimR", num_causal=30, random_seed=205, corA=0.1, num_trait=2,
+            numericalization=True
+        )
+
+
+class ExactTestARGNeedle(ExactTest):
+    """
+    Exact test between the simulation framework described in the
+    ARG-Needle paper and tstrait.
+    """
+
+    def _compare_arg_needle(self, alpha, random_seed, numericalization=False):
+        ts = self._simulate_tree_seq(random_seed)
+        simulation_output=_simulate_arg_needle(
+            ts=ts, alpha=alpha, h2=1, random_seed=random_seed
+        )
+        phenotype_df = self._compute_tstrait(ts=ts, trait_df=simulation_output.trait, numericalization=numericalization)
+        phenotype_df = tstrait.normalise_phenotypes(phenotype_df, mean=0, var=1)
+        
+        np.testing.assert_array_almost_equal(phenotype_df["phenotype"].values,
+                                             simulation_output.phenotype["phenotype"].values)
+
+    def test_exact_ARG_Needle_alpha_0(self):
+        self._compare_arg_needle(alpha=0, random_seed=300, numericalization=False)
+        
+    def test_exact_ARG_Needle_alpha_negative_1(self):
+        self._compare_arg_needle(alpha=-1, random_seed=301, numericalization=False)
+        
+    def test_exact_ARG_Needle_alpha_1(self):
+        self._compare_arg_needle(alpha=1, random_seed=302, numericalization=False)
+        
+    def test_exact_ARG_Needle_alpha_0_numer(self):
+        self._compare_arg_needle(alpha=0, random_seed=303, numericalization=True)
+        
+    def test_exact_ARG_Needle_alpha_negative_1_numer(self):
+        self._compare_arg_needle(alpha=-1, random_seed=304, numericalization=True)
+        
+    def test_exact_ARG_Needle_alpha_1_numer(self):
+        self._compare_arg_needle(alpha=1, random_seed=305, numericalization=True)    
 
 
 class ComparisonTest(Test):
     """
-    Compare tstrait against simplePHENOTYPES, AlphaSimR and the simulation framework
-    in ARG-Needle paper. We assume an infinite sites model and we also assume that
-    all sites are causal.
+    Compare tstrait against currently available simulators and create a QQ-plot. We
+    assume an infinite sites model and we also assume that all sites are causal.
     """
 
-    def _simulate_tstrait(self, ts, num_rep, h2, random_seed=None, **kwargs):
+    def _compare_phenotype(self, data1, data1_name, data2, data2_name, ind_id):
+        """
+        The input of this function should be a pandas dataframe with individual_id and
+        phenotype columns. The values inside the individual_id column must be a number
+        and match with the 2 dataframes that are provided inside this function.
+        `ind_id` is a list of individual IDs that are of interest to compare.
+        """
+        
+        for i in ind_id:
+            data1_phenotype = data1.loc[data1["individual_id"] == i]["phenotype"].values
+            data2_phenotype = data2.loc[data2["individual_id"] == i]["phenotype"].values
+            self._plot_qq_compare(
+                data1=data1_phenotype,
+                data1_name=f"{data1_name}_ind_{i}",
+                data2=data2_phenotype,
+                data2_name=f"{data2_name}_ind_{i}",
+            )
+
+
+    def _simulate_tstrait(self, ts, num_rep, h2, random_seed, mean=0, var=1, alpha=0):
         """
         Simulates trait and phenotype information based on a simulated tree
         sequence and return the simulated phenotypes that are normalised.
         We assume that all sites are causal.
         """
 
-        mean = kwargs.get("mean", 0)
-        var = kwargs.get("var", 1)
-        alpha = kwargs.get("alpha", 0)
         model = tstrait.trait_model(distribution="normal", mean=mean, var=var)
         phenotype_df_list = []
         for i in range(num_rep):
@@ -589,6 +709,11 @@ def _simulate_tstrait(self, ts, num_rep, h2, random_seed=None, **kwargs):
         return phenotype_result
 
     def _simulate_out_of_africa(self, random_seed):
+        """
+        Simulate a tree sequence data from the Out of Africa model and select individuals
+        from different populations.
+        """
+        rng = np.random.default_rng(random_seed)
         species = stdpopsim.get_species("HomSap")
         model = species.get_demographic_model("OutOfAfrica_3G09")
         contig = species.get_contig("chr22", mutation_rate=0, length_multiplier=1e-3)
@@ -598,90 +723,141 @@ def _simulate_out_of_africa(self, random_seed):
         ts = msprime.sim_mutations(
             ts, rate=model.mutation_rate, random_seed=random_seed, discrete_genome=False
         )
-        return ts
-
-    def _compare_phenotype(self, data1, data1_name, data2, data2_name, random_seed):
-        """
-        The input of this function should be a pandas dataframe with individual_id and
-        phenotype columns. The values inside the individual_id column must be a number
-        and match with the 2 dataframes that are provided inside this function.
-        """
-
-        rng = np.random.default_rng(random_seed)
-
-        yri = rng.integers(low=0, high=300)
-        chb = rng.integers(low=301, high=600)
-        ceu = rng.integers(low=601, high=900)
-
-        for i in [yri, chb, ceu]:
-            data1_phenotype = data1.loc[data1["individual_id"] == i]["phenotype"].values
-            data2_phenotype = data2.loc[data2["individual_id"] == i]["phenotype"].values
-            self._plot_qq_compare(
-                data1=data1_phenotype,
-                data1_name=f"{data1_name}_ind_{i}",
-                data2=data2_phenotype,
-                data2_name=f"{data2_name}_ind_{i}",
+        node0 = rng.choice(ts.samples(population=0))
+        node1 = rng.choice(ts.samples(population=1))
+        node2 = rng.choice(ts.samples(population=2))
+        ind_id = np.array([
+            ts.node(node0).individual,
+            ts.node(node1).individual,
+            ts.node(node2).individual,
+        ])
+        return ts, ind_id
+    
+class ComparisonTestSimplePHENOTYPES(ComparisonTest):
+    
+    def _qqplot_simplePHENOTYPES(self, num_rep, h2, random_seed, mean=0, var=1, alpha=0):
+        ts, ind_id = self._simulate_out_of_africa(random_seed=random_seed)
+        tstrait_phenotype = self._simulate_tstrait(
+            ts=ts, num_rep=num_rep, h2=h2, random_seed=random_seed,
+            mean=mean, var=var, alpha=alpha
             )
-
-    def _run(self, model, h2, num_rep=200, random_seed=123, **kwargs):
-        ts = self._simulate_out_of_africa(random_seed)
-        tstrait_phenotype_result = self._simulate_tstrait(
-            ts=ts, num_rep=num_rep, h2=h2, random_seed=random_seed, **kwargs
-        )
-
-        if model in ["simplePHENOTYPES", "AlphaSimR"]:
-            kwargs["num_causal"] = ts.num_sites
-
-        # simplePHENOTYPES uses ancestral state as a causal allele
-        if model == "simplePHENOTYPES":
-            kwargs["mean"] *= -1
-
-        simulator = PhenotypeSimulator(model=model)
-        simulation_output = simulator._run(
-            ts=ts, h2=h2, random_seed=random_seed, num_rep=num_rep, **kwargs
+        # - mean because simplePHENOTYPES uses ancestral state as a causal allele
+        simplePHENOTYPES_phenotype = _simulate_simplePHENOTYPES_qqplot(
+            ts=ts, num_causal=ts.num_sites, h2=h2, random_seed=random_seed,
+            num_rep=num_rep, mean=-mean, var=var
         )
-
         self._compare_phenotype(
-            data1=tstrait_phenotype_result,
+            data1=tstrait_phenotype,
             data1_name="tstrait",
-            data2=simulation_output.phenotype,
-            data2_name=model,
-            random_seed=random_seed,
+            data2=simplePHENOTYPES_phenotype,
+            data2_name="simplePHENOTYPES",
+            ind_id=ind_id,
         )
 
     def test_compare_simplePHENOTYPES_h2_08_mean_0_var_1(self):
-        self._run(
-            model="simplePHENOTYPES",
+        self._qqplot_simplePHENOTYPES(
             num_rep=500,
-            distribution="normal",
-            random_seed=123456,
             h2=0.8,
+            random_seed=1000,
             mean=0,
             var=1,
         )
 
     def test_compare_simplePHENOTYPES_h2_09_mean_1_var_4(self):
-        self._run(
-            model="simplePHENOTYPES",
+        self._qqplot_simplePHENOTYPES(
             num_rep=500,
-            distribution="normal",
-            random_seed=123,
             h2=0.9,
+            random_seed=1001,
             mean=1,
             var=4,
         )
-
+    
+    def test_compare_simplePHENOTYPES_h2_02_mean_negative_1_var_4(self):
+        self._qqplot_simplePHENOTYPES(
+            num_rep=500,
+            h2=0.2,
+            random_seed=1002,
+            mean=-1,
+            var=4,
+        )
+        
+    def test_compare_simplePHENOTYPES_h2_01_mean_0_var_4(self):
+        self._qqplot_simplePHENOTYPES(
+            num_rep=500,
+            h2=0.1,
+            random_seed=1001,
+            mean=0,
+            var=4,
+        )
+        
+        
+class ComparisonTestAlphaSimR(ComparisonTest):
+    
+    def _qqplot_AlphaSimR(self, num_rep, h2, random_seed):
+        ts, ind_id = self._simulate_out_of_africa(random_seed=random_seed)
+        tstrait_phenotype = self._simulate_tstrait(
+            ts=ts, num_rep=num_rep, h2=h2, random_seed=random_seed
+            )
+        alphasimr_phenotype = _simulate_AlphaSimR(
+            ts=ts, num_causal=ts.num_sites, h2=h2, random_seed=random_seed,
+            num_rep=num_rep
+        )
+        self._compare_phenotype(
+            data1=tstrait_phenotype,
+            data1_name="tstrait",
+            data2=alphasimr_phenotype.phenotype,
+            data2_name="AlphaSimR",
+            ind_id=ind_id,
+        )
+        
+    def test_compare_AlphaSimR_h2_08(self):
+        self._qqplot_AlphaSimR(num_rep=500, random_seed=2000, h2=0.8)
+        
+    def test_compare_AlphaSimR_h2_02(self):
+        self._qqplot_AlphaSimR(num_rep=500, random_seed=2001, h2=0.2)
+        
+    def test_compare_AlphaSimR_h2_05(self):
+        self._qqplot_AlphaSimR(num_rep=500, random_seed=2000, h2=0.5)
+        
+
+class ComparisonTestArgNeedle(ComparisonTest):
+    
+    def _qqplot_arg_needle(self, num_rep, h2, alpha, random_seed):
+        ts, ind_id = self._simulate_out_of_africa(random_seed=random_seed)
+        tstrait_phenotype = self._simulate_tstrait(
+            ts=ts, num_rep=num_rep, h2=h2, random_seed=random_seed
+            )
+        argneedle_phenotype = _simulate_arg_needle(
+            ts=ts, alpha=alpha, h2=h2, random_seed=random_seed, num_rep=num_rep
+        )
+        self._compare_phenotype(
+            data1=tstrait_phenotype,
+            data1_name="tstrait",
+            data2=argneedle_phenotype.phenotype,
+            data2_name="AlphaSimR",
+            ind_id=ind_id,
+        )
+        
     def test_compare_arg_needle_alpha_0_h2_08(self):
-        self._run(model="ARG-Needle", num_rep=500, h2=0.8, random_seed=12345, alpha=0)
+        self._qqplot_arg_needle(num_rep=500, h2=0.8, random_seed=3000, alpha=0)
 
     def test_compare_arg_needle_alpha_negative_1_h2_08(self):
-        self._run(model="ARG-Needle", num_rep=500, h2=0.8, random_seed=3333, alpha=-1)
+        self._qqplot_arg_needle(num_rep=500, h2=0.8, random_seed=3001, alpha=-1)
 
     def test_compare_arg_needle_alpha_positive_1_h2_1(self):
-        self._run(model="ARG-Needle", num_rep=500, h2=1, random_seed=3333, alpha=1)
-
-    def test_compare_AlphaSimR_h2_08(self):
-        self._run(model="AlphaSimR", num_rep=500, random_seed=222, h2=0.8)
+        self._qqplot_arg_needle(num_rep=500, h2=1, random_seed=3002, alpha=1)
+        
+    def test_compare_arg_needle_alpha_positive_2_h2_01(self):
+        self._qqplot_arg_needle(num_rep=500, h2=0.1, random_seed=3003, alpha=2)
+        
+    def test_compare_arg_needle_alpha_negative_2_h2_01(self):
+        self._qqplot_arg_needle(num_rep=500, h2=0.1, random_seed=3004, alpha=-2)
+        
+    def test_compare_arg_needle_alpha_0_h2_02(self):
+        self._qqplot_arg_needle(num_rep=500, h2=0.2, random_seed=3005, alpha=0)
+        
+    def test_compare_arg_needle_alpha_negative_1_h2_05(self):
+        self._qqplot_arg_needle(num_rep=500, h2=0.5, random_seed=3006, alpha=-1)
 
 
 def model_list(loc, scale):