Skip to content

Commit

Permalink
remove evaluateCutHeight
Browse files Browse the repository at this point in the history
This rand matrix is just not informative for selecting an appropriate cut height value
  • Loading branch information
acferris committed Oct 12, 2024
1 parent 8887b5b commit 74e724d
Showing 1 changed file with 0 additions and 23 deletions.
23 changes: 0 additions & 23 deletions base.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,29 +111,6 @@ def evaluateEpsilon(embedding, filePrefix):
rand.randScoreMatrix(embedding, ks, 'DBSCAN')
plt.savefig(filePrefix+' DBSCAN rand matrix.png', dpi = 300)

def evaluateCutHeight(snpProportion, sampleMeta, db_communities, admixedCutoff, minRepTogether = 0.0, maxVarietyTogether = 4):
'''
Evaluate different cut height values for processing the dendrogram
Args:
snpProportion: processed SNP proportion data
sampleMeta: metadata paired with genotyping data
db_communities: DBSCAN cluster number for each sample
admixedCutoff: clades without a reference and a minimum divergence value above this will be labeled as admixed
minRepTogether: the minimum proportion of reference technical replicates that are in the same clade
maxVarietyTogether: the maximum average number of varieties in the same clade (for clusters with at least one reference)
'''

#evaluate using the largest cluster
db_cluster, db_counts = np.unique(db_communities, return_counts=True)
mainCluster = db_cluster[np.where(db_counts == max(db_counts))[0][0]]
clusterSubsetLarge = snpProportion[snpProportion.columns[np.where(db_communities == mainCluster)]]
Y_clusterLarge = sch.linkage(clusterSubsetLarge.values.T, metric='correlation') #sort samples
rep, avg, totalRef, cuts = rand.cutoffQuality(clusterSubsetLarge, sampleMeta, Y_clusterLarge)

ks = np.around(np.intersect1d(cuts[np.where(rep > minRepTogether*totalRef)], cuts[np.where(avg < maxVarietyTogether)]),3)
rand.randScoreMatrix(snpProportion, ks, 'HC', sampleMeta = sampleMeta, admixedCutoff = admixedCutoff)

def labelSamples(snpProportion,sampleMeta,db_communities,embedding, cutHeight, admixedCutoff, filePrefix):
'''
Evaluate different cut height values for processing the dendrogram
Expand Down

0 comments on commit 74e724d

Please sign in to comment.