-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnhood_enrichment_binning.py
144 lines (106 loc) · 4.9 KB
/
nhood_enrichment_binning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import numpy as np
from scipy.sparse import csr_matrix
from scipy.cluster.hierarchy import linkage, leaves_list
from scipy.spatial.distance import pdist
def sort_labels(array):
# Perform hierarchical clustering
# You can choose different methods and metrics if needed
Y = pdist(array)
Z = linkage(Y, method='average')
# Get the order of the leaves of the hierarchical tree
sorted_indices = leaves_list(Z)
return sorted_indices
def label_matrix(labels, dtype=bool, return_uniques=False):
unique_labels, ind = np.unique(labels, return_inverse=True)
n,m = len(labels), len(unique_labels)
values = np.ones(n, dtype=dtype)
y = csr_matrix((values,(np.arange(n), ind)), shape=(n,m))
if return_uniques:
return y, unique_labels
else:
return y
def spatial_binning_matrix(
xy: np.ndarray, bin_width: float
) -> csr_matrix:
# Compute shifted coordinates
mi, ma = xy.min(axis=0, keepdims=True), xy.max(axis=0, keepdims=True)
xys = xy - mi
# Compute grid size
grid = ma - mi
grid = grid.flatten()
# Compute bin index
bin_ids = xys // bin_width
bin_ids = bin_ids.astype("int")
bin_ids = tuple(x for x in bin_ids.T)
# Compute grid size in indices
size = grid // bin_width + 1
size = tuple(x for x in size.astype("int"))
# Convert bin_ids to integers
linear_ind = np.ravel_multi_index(bin_ids, size)
# Create a matrix indicating which markers fall in what bin
bin_matrix = label_matrix([str(i) for i in linear_ind], dtype=bool)
bin_matrix = bin_matrix.T
return bin_matrix
def nhood_enrichment(xy: np.ndarray, labels:np.ndarray, bin_width:float) -> np.ndarray:
"""
Compute the neighborhood enrichment z-score.
This function calculates the z-score for neighborhood enrichment,
a metric to assess the interaction between different types of points (e.g., cells)
in spatial omics data based on their spatial adjacency and labels.
Parameters:
adj (sp.spmatrix): Interaction matrix representing the spatial interaction between points.
Each entry adj[i, j] is non-zero if point i is adjacent to point j.
y (sp.spmatrix): Binary matrix representing the labels for each point.
Each row corresponds to a one-hot-encoded label of each point.
bin_width (float): Width of each bin used for creating the interaction graph.
Returns:
np.ndarray: A tuple containing:
- zscore: A matrix of z-scores, representing the strength of enrichment
between each pair of labels.
- total_neighbors_count: A matrix representing the total count of neighbors
for each cell type.
- unique_labels : List of unique labels matching the rows and columns of the zscore
matrix.
The function computes the z-score by comparing the actual count of neighbors with
specific labels to the expected count under a random distribution of labels.
"""
n = len(labels)
# Create matrix with one-hot-encoded labels
y, unique_labels = label_matrix(labels, dtype='uint32', return_uniques=True)
# Create adjacency matrix using binning
adj = spatial_binning_matrix(xy, bin_width)
adj = adj.T @ adj
adj.setdiag(False)
# Calculate the number of specific neighbors for each cell
neighbors_count = adj @ y
# Total count of specific neighbors for each cell type
total_neighbors_count = y.T @ neighbors_count
total_neighbors_count = total_neighbors_count.A # Convert to dense array
# Number of points (nodes)
n = adj.shape[0]
# Compute mean and variance
mean_interaction = neighbors_count.sum(axis=0).A.flatten() / n
# Squaring the data for variance calculation
neighbors_count.data **= 2
var_interaction = neighbors_count.sum(axis=0).A.flatten() / n - mean_interaction**2
# Total count of points with specific label
total_label_count = y.sum(axis=0).A.flatten()
# Compute z-score for neighborhood enrichment
zscore = (total_neighbors_count - np.outer(total_label_count, mean_interaction)) / np.sqrt(np.outer(total_label_count, var_interaction))
# Transpose to match Squidpy
zscore = zscore.T
total_label_count = total_label_count.T
ind = sort_labels(zscore)
# Sort labels
zscore = zscore[ind,:]
zscore = zscore[:,ind]
total_neighbors_count = total_neighbors_count[ind,:]
total_neighbors_count = total_neighbors_count[:,ind]
unique_labels = unique_labels[ind]
return zscore, total_neighbors_count, unique_labels
if __name__ == '__main__':
import pandas as pd
data = pd.read_csv(r'https://tissuumaps.dckube.scilifelab.se/private/Points2Regions/toy_data.csv')
xy = data[['X','Y']].to_numpy()
labels = data['Genes'].to_numpy()
zscore, count, unique_labels = nhood_enrichment(xy, labels, 100)