From 74e724d7fbf22bd6bb6d2138a063dfafded1906f Mon Sep 17 00:00:00 2001
From: acferris <29984203+acferris@users.noreply.github.com>
Date: Fri, 11 Oct 2024 17:59:48 -0700
Subject: [PATCH] remove evaluateCutHeight

This rand matrix is just not informative for selecting an appropriate cut height value
---
 base.py | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/base.py b/base.py
index d9ff969..972b033 100644
--- a/base.py
+++ b/base.py
@@ -111,29 +111,6 @@ def evaluateEpsilon(embedding, filePrefix):
     rand.randScoreMatrix(embedding, ks, 'DBSCAN')
     plt.savefig(filePrefix+' DBSCAN rand matrix.png', dpi = 300)    
 
-def evaluateCutHeight(snpProportion, sampleMeta, db_communities, admixedCutoff, minRepTogether = 0.0, maxVarietyTogether = 4):
-    '''
-    Evaluate different cut height values for processing the dendrogram
-    
-    Args:
-        snpProportion: processed SNP proportion data
-        sampleMeta: metadata paired with genotyping data
-        db_communities: DBSCAN cluster number for each sample
-        admixedCutoff: clades without a reference and a minimum divergence value above this will be labeled as admixed
-        minRepTogether: the minimum proportion of reference technical replicates that are in the same clade
-        maxVarietyTogether: the maximum average number of varieties in the same clade (for clusters with at least one reference)
-    '''
-
-    #evaluate using the largest cluster 
-    db_cluster, db_counts = np.unique(db_communities, return_counts=True)
-    mainCluster = db_cluster[np.where(db_counts == max(db_counts))[0][0]]
-    clusterSubsetLarge = snpProportion[snpProportion.columns[np.where(db_communities == mainCluster)]]
-    Y_clusterLarge = sch.linkage(clusterSubsetLarge.values.T, metric='correlation') #sort samples
-    rep, avg, totalRef, cuts = rand.cutoffQuality(clusterSubsetLarge, sampleMeta, Y_clusterLarge)
-
-    ks = np.around(np.intersect1d(cuts[np.where(rep > minRepTogether*totalRef)], cuts[np.where(avg < maxVarietyTogether)]),3) 
-    rand.randScoreMatrix(snpProportion, ks, 'HC', sampleMeta = sampleMeta, admixedCutoff = admixedCutoff)
-
 def labelSamples(snpProportion,sampleMeta,db_communities,embedding, cutHeight, admixedCutoff, filePrefix):
     '''
     Evaluate different cut height values for processing the dendrogram