Skip to content

Commit

Permalink
Add comment for creating additional synthetic dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
dom-lee committed Dec 12, 2022
1 parent 05f4809 commit 06541f4
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 6 deletions.
20 changes: 19 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,23 @@
# Beyond Spectral Clustering
# EECS-553-Machine Learning (Reproducibility Project)
This repository is for reproducibility project from EECS553 (Machine Leaning)
Course. We verified the paper "A Tighter Analysis of Spectral Clustering, and
Beyond", published in ICML 2022.

## Additional Test that we excecuted
1. **Less-separated Synthetic Dataset**: run
`python experiments.py complete`
-

2. **Test on BSDS dataset with different standard deviation**: run
`python experiments.py bsds`
-

3. **Test on MNIST dataset with different number of eigenvector for embedding**:
run



# Beyond Spectral Clustering
This directory contains the code to reproduce the results in the paper "A Tighter Analysis of Spectral Clustering, and Beyond", published in
ICML 2022.

Expand Down
14 changes: 12 additions & 2 deletions experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,12 @@ def __init__(self, k, n, prob_p, prob_q, queue, num_runs=1, use_grid=False,
self.queue = queue
self.num_runs = num_runs
self.use_grid = use_grid
self.use_complete = use_complete

# [Dongmyeong Lee] Additianl condition to generate synthetic dataset,
# which is less-separated clusters
self.use_complete = use_complete
self.unequal_cluster = unequal_cluster

self.d = self.k
if use_grid:
self.k = self.d * self.d
Expand Down Expand Up @@ -429,14 +433,20 @@ def main():

if args.experiment == 'cycle':
run_sbm_experiment(1000, 5, 0.01)
# [Dongmyeong Lee] Generate Unbalanced Clusters that form Cyclic Structure
elif args.experiment == 'cycle_unequal':
run_sbm_experiment([100, 120, 140, 160, 180, 200], 6, 0.01, unequal_cluster=True)
elif args.experiment == 'grid':
run_sbm_experiment(1000, 5, 0.01, use_grid=True)
# [Dongmyeong Lee] Generate Unbalanced Clusters that form Grid Structure
elif args.experiment == 'grid_unequal':
run_sbm_experiment([100 + 20 * i for i in range(4*4)], 4, 0.01, use_grid=True, unequal_cluster=True)
# [Dongmyeong Lee] Generate Less-separated Clusters that similar to Cyclic Structure
# p: probability of having edges between nodes in same cluster
# q: probability of having edges between nodes in adjacent clusters
# r: probability of having edges between nodes in non-adjacent clusters // Inside SbmCompleteDataset Class
elif args.experiment == 'complete':
run_sbm_experiment(100, 5, 0.2, use_complete=True)
run_sbm_experiment(1000, 5, 0.01, use_complete=True)
elif args.experiment == 'mnist':
run_mnist_experiment()
elif args.experiment == 'usps':
Expand Down
12 changes: 9 additions & 3 deletions pysc/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,8 @@ def __str__(self):
def __repr__(self):
return self.__str__()


# [Dongmyeong Lee] New Dataset Class for generating less-separated Clusters
# Manually changed r {0.1, 0.3, 0.5, 0.7, 0.9}
class SbmCompleteDataset(Dataset):

def __init__(self, *args, k=6, n=50, p=0.3, q=0.05, **kwargs):
Expand All @@ -375,8 +376,10 @@ def load_data(self, data_file):
def load_graph(self, graph_file=None, graph_type="knn10"):
# Generate the graph from the sbm
logger.info(f"Generating {self} graph from sbm...")
r = self.q * 0.9
prob_mat = self.p * sp.sparse.eye(self.k) + \
self.q * sgtl.graph.complete_graph(self.k).adjacency_matrix()
(self.q - r) * sgtl.graph.cycle_graph(self.k).adjacency_matrix() + \
r * sgtl.graph.complete_graph(self.k).adjacency_matrix()
self.graph = sgtl.random.sbm_equal_clusters(self.n * self.k, self.k,
prob_mat.toarray())

Expand All @@ -396,7 +399,8 @@ def __str__(self):
def __repr__(self):
return self.__str__()


# [Dongmyeong Lee] New Synthetic Dataset to generate unbalanaced clusters that
# form Cylcic Structure
class SbmUnequalCycleDataset(Dataset):

def __init__(self, *args, k=6, n=[10, 20, 30, 40, 50, 60], p=0.3, q=0.05, **kwargs):
Expand Down Expand Up @@ -431,6 +435,8 @@ def __repr__(self):
return self.__str__()


# [Dongmyeong Lee] New Synthetic Dataset to generate unbalanaced clusters that
# form Grid Structure
class SbmUnequalGridDataset(Dataset):

def __init__(self, *args, d=3, n=[10, 20, 30, 40, 50, 60, 70, 80, 90], p=0.3, q=0.05, **kwargs):
Expand Down
1 change: 1 addition & 0 deletions results/sbm/complete_results.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
k, n, p, q, poverq, eigenvectors, conductance, rand

0 comments on commit 06541f4

Please sign in to comment.