From 138e5580978485242ffaef9da65f29a1c7690778 Mon Sep 17 00:00:00 2001 From: "Myers, Audun D" Date: Wed, 9 Oct 2024 14:30:26 -0400 Subject: [PATCH] adding_HG_matching code, tests, rst, and notebook --- .../source/algorithms/matching_algorithms.rst | 61 ++ hypernetx/algorithms/matching_algorithms.py | 596 ++++++++++++++++++ tests/algorithms/test_matching.py | 180 ++++++ .../Advanced 7 - Matching algorithms.ipynb | 261 ++++++++ 4 files changed, 1098 insertions(+) create mode 100644 docs/source/algorithms/matching_algorithms.rst create mode 100644 hypernetx/algorithms/matching_algorithms.py create mode 100644 tests/algorithms/test_matching.py create mode 100644 tutorials/advanced/Advanced 7 - Matching algorithms.ipynb diff --git a/docs/source/algorithms/matching_algorithms.rst b/docs/source/algorithms/matching_algorithms.rst new file mode 100644 index 00000000..1a85b58f --- /dev/null +++ b/docs/source/algorithms/matching_algorithms.rst @@ -0,0 +1,61 @@ +Matching Algorithms for Hypergraphs +=================================== + +Introduction +------------ +This module implements various algorithms for finding matchings in hypergraphs. These algorithms are based on the methods described in the paper: + +*Distributed Algorithms for Matching in Hypergraphs* by Oussama Hanguir and Clifford Stein. + +The paper addresses the problem of finding matchings in d-uniform hypergraphs, where each hyperedge contains exactly d vertices. The matching problem is NP-complete for d ≥ 3, making it one of the classic challenges in computational theory. The algorithms described here are designed for the Massively Parallel Computation (MPC) model, which is suitable for processing large-scale hypergraphs. + +Mathematical Foundation +------------------------ +The algorithms in this module provide different trade-offs between approximation ratios, memory usage, and computation rounds: + +1. **O(d²)-approximation algorithm**: + - This algorithm partitions the hypergraph into random subgraphs and computes a matching for each subgraph. The results are combined to obtain a matching for the original hypergraph. + - Approximation ratio: O(d²) + - Rounds: 3 + - Memory: O(√nm) + +2. **d-approximation algorithm**: + - Uses sampling and post-processing to iteratively build a maximal matching. + - Approximation ratio: d + - Rounds: O(log n) + - Memory: O(dn) + +3. **d(d−1 + 1/d)²-approximation algorithm**: + - Utilizes the concept of HyperEdge Degree Constrained Subgraphs (HEDCS) to find an approximate matching. + - Approximation ratio: d(d−1 + 1/d)² + - Rounds: 3 + - Memory: O(√nm) for linear hypergraphs, O(n√nm) for general cases. + +These algorithms are crucial for applications that require scalable parallel processing, such as combinatorial auctions, scheduling, and multi-agent systems. + +Usage Example +------------- +Below is an example of how to use the matching algorithms module. + +```python +from hypernetx.algorithms import matching_algorithms as ma + +# Example hypergraph data +hypergraph = ... # Assume this is a d-uniform hypergraph + +# Compute a matching using the O(d²)-approximation algorithm +matching = ma.matching_approximation_d_squared(hypergraph) + +# Compute a matching using the d-approximation algorithm +matching_d = ma.matching_approximation_d(hypergraph) + +# Compute a matching using the d(d−1 + 1/d)²-approximation algorithm +matching_d_squared = ma.matching_approximation_dd(hypergraph) + +print(matching, matching_d, matching_d_squared) + + +References +------------- + +- Oussama Hanguir, Clifford Stein, Distributed Algorithms for Matching in Hypergraphs, https://arxiv.org/pdf/2009.09605 diff --git a/hypernetx/algorithms/matching_algorithms.py b/hypernetx/algorithms/matching_algorithms.py new file mode 100644 index 00000000..bf80b978 --- /dev/null +++ b/hypernetx/algorithms/matching_algorithms.py @@ -0,0 +1,596 @@ +""" +An implementation of the algorithms in: +"Distributed Algorithms for Matching in Hypergraphs", + by Oussama Hanguir and Clifford Stein (2020), https://arxiv.org/abs/2009.09605v1 +Programmer: Shira Rot, Niv +Date: 22.5.2024 +""" + +from functools import lru_cache +import hypernetx as hnx +from hypernetx.classes.hypergraph import Hypergraph +import math +import random +from concurrent.futures import ThreadPoolExecutor + +def approximation_matching_checking(optimal: list, approx: list) -> bool: + """ + Checks if the approximate list contains at least one element that is a subset of each element in the optimal list. + + Parameters + ---------- + optimal : list of lists + A list of lists representing the optimal solutions. + approx : list of lists + A list of lists representing the approximate solutions. + + Returns + ------- + bool + True if the approximate list contains at least one element that is a subset of each element in the optimal list, False otherwise. + """ + for e in optimal: + count = 0 + e_checks = set(e) + for e_m in approx: + e_m_checks = set(e_m) + common_elements = e_checks.intersection(e_m_checks) + checking = bool(common_elements) + if checking: + count += 1 + if count < 1: + return False + return True + + +def greedy_matching(hypergraph: Hypergraph, k: int) -> list: + """ + Greedy algorithm for hypergraph matching. + + This algorithm constructs a random k-partitioning of G and finds a maximal matching. + + Parameters + ---------- + hypergraph : hnx.Hypergraph + A Hypergraph object. + k : int + The number of partitions. + + Returns + ------- + list + The edges of the graph for the greedy matching. + + Raises + ------ + NonUniformHypergraphError + If the hypergraph is not uniform (i.e., if the edges have different sizes). + + Examples + ------- + >>> import numpy as np + >>> np.random.seed(42) + >>> random.seed(42) + >>> edges = {'e1': [1, 2, 3], 'e2': [2, 3, 4], 'e3': [1, 4, 5]} + >>> hypergraph = Hypergraph(edges) + >>> k = 2 + >>> matching = greedy_matching(hypergraph, k) + >>> matching + [(2, 3, 4)] + + >>> np.random.seed(42) + >>> random.seed(42) + >>> edges_large = {f'e{i}': list(range(i, i + 3)) for i in range(1, 50)} + >>> hypergraph_large = Hypergraph(edges_large) + >>> k = 5 + >>> matching_large = greedy_matching(hypergraph_large, k) + >>> len(matching_large) + 12 + + >>> edges_non_uniform = {'e1': [1, 2, 3], 'e2': [4, 5], 'e3': [6, 7, 8, 9]} + >>> hypergraph_non_uniform = Hypergraph(edges_non_uniform) + >>> try: + ... greedy_matching(hypergraph_non_uniform, k) + ... except NonUniformHypergraphError: + ... print("NonUniformHypergraphError raised") + NonUniformHypergraphError raised + """ + + # Check if the hypergraph is empty + if not hypergraph.incidence_dict: + return [] + + # Check if the hypergraph is d-uniform + edge_sizes = {len(edge) for edge in hypergraph.incidence_dict.values()} + if len(edge_sizes) > 1: + raise NonUniformHypergraphError("The hypergraph is not d-uniform.") + + # Partition the hypergraph into k subgraphs + partitions = partition_hypergraph(hypergraph, k) + + # Find maximum matching for each partition in parallel + with ThreadPoolExecutor() as executor: + MM_list = list(executor.map(maximal_matching, partitions)) + + # Initialize the matching set + M = set() + + # Process each partition's matching + for MM_Gi in MM_list: + # Add edges to M if they do not violate the matching property + for edge in MM_Gi: + if not any(set(edge) & set(matching_edge) for matching_edge in M): + M.add(tuple(edge)) + + return list(M) + + +class MemoryLimitExceededError(Exception): + """Custom exception to indicate memory limit exceeded during hypergraph matching.""" + + pass + + +class NonUniformHypergraphError(Exception): + """Custom exception to indicate non d-uniform hypergraph during matching.""" + + pass + + +# necessary because Python's lru_cache decorator +# requires hashable inputs to cache function results. +def edge_tuple(hypergraph): + """ + Converts hypergraph edges to a hashable tuple. + + Parameters + ---------- + hypergraph : hnx.Hypergraph + A Hypergraph object. + + Returns + ------- + tuple + A tuple representing the hypergraph edges, where each element is a tuple containing the edge name and its sorted vertices. + """ + return tuple( + (edge, tuple(sorted(hypergraph.edges[edge]))) + for edge in sorted(hypergraph.edges) + ) + + +@lru_cache(maxsize=None) # to cache the results of this function +def cached_maximal_matching(edges): + """ + Cached version of maximal matching calculation. + + Parameters + ---------- + edges : tuple + A tuple representing the hypergraph edges, where each element is a tuple containing the edge name and its sorted vertices. + + Returns + ------- + list + A list of matching edges. + """ + hypergraph = hnx.Hypergraph( + dict(edges) + ) # Converts the tuple of edges back into a hypergraph. + matching = [] + matched_vertices = set() # vertices that have already been matched. + + for edge in hypergraph.incidence_dict.values(): + if not any( + vertex in matched_vertices for vertex in edge + ): # Checks if current edge is already matched. + matching.append(sorted(edge)) # Adds the current edge to the matching. + matched_vertices.update(edge) + return matching # Returns the list of matching edges. + + +def maximal_matching(hypergraph: Hypergraph) -> list: + """ + Finds a maximal matching in the hypergraph. + + Parameters + ---------- + hypergraph : hnx.Hypergraph + A Hypergraph object. + + Returns + ------- + list + A list of matching edges. + """ + edges = edge_tuple(hypergraph) + return cached_maximal_matching(edges) + + +def sample_edges(hypergraph: Hypergraph, p: float) -> Hypergraph: + """ + Samples edges from the hypergraph with probability p. + + Parameters + ---------- + hypergraph : hnx.Hypergraph + The input hypergraph. + p : float + The probability of sampling each edge. + + Returns + ------- + hnx.Hypergraph + A new hypergraph containing the sampled edges. + """ + sampled_edges = [ + edge for edge in hypergraph.incidence_dict.values() if random.random() < p + ] + return hnx.Hypergraph( + {f"e{i}": tuple(edge) for i, edge in enumerate(sampled_edges)} + ) + + +def sampling_round(S: Hypergraph, p: float, s: int) -> tuple: + """ + Performs a single sampling round on the hypergraph. + + Parameters + ---------- + hypergraph : hnx.Hypergraph + The input hypergraph. + p : float + The probability of sampling each edge. + s : int + The maximum number of edges to include in the matching. + + Returns + ------- + tuple + A tuple containing the maximal matching and the sampled hypergraph. If the sampled hypergraph has more than s edges, None and the sampled hypergraph are returned. + """ + E_prime = sample_edges(S, p) + if len(E_prime.incidence_dict.values()) > s: + return None, E_prime + matching = maximal_matching(E_prime) + return matching, E_prime + + +def iterated_sampling( + hypergraph: Hypergraph, s: int, max_iterations: int = 100 +) -> list: + """ + Iterated Sampling for Hypergraph Matching. + + Uses iterated sampling to find a maximal matching in a d-uniform hypergraph. + + Parameters + ---------- + hypergraph : hnx.Hypergraph + A Hypergraph object. + s : int + The amount of memory available for the computer. + max_iterations : int, optional + The maximum number of iterations to perform. Defaults to 100. + + Returns + ------- + list + The edges of the graph for the approximate matching. + + Raises + ------ + MemoryLimitExceededError + If the memory limit is exceeded during the matching process. + + Examples + ------- + >>> import numpy as np + >>> np.random.seed(42) + >>> random.seed(42) + >>> hypergraph = Hypergraph({0: (1, 2, 3), 1: (2, 3, 4), 2: (3, 4, 5)}) + >>> result = iterated_sampling(hypergraph, 1) + >>> result + [[2, 3, 4]] + + >>> np.random.seed(42) + >>> random.seed(42) + >>> hypergraph = Hypergraph({0: (1, 2, 3, 4), 1: (2, 3, 4, 5), 2: (3, 4, 5, 6)}) + >>> result = iterated_sampling(hypergraph, 2) + >>> result + [[2, 3, 4, 5]] + + >>> np.random.seed(42) + >>> random.seed(42) + >>> hypergraph = Hypergraph({0: (1, 2, 3), 1: (4, 5, 6)}) + >>> result = None + >>> try: + ... result = iterated_sampling(hypergraph, 0) # Insufficient memory, expect failure + ... except MemoryLimitExceededError: + ... pass + >>> result is None + True + + >>> np.random.seed(42) + >>> random.seed(42) + >>> hypergraph = Hypergraph({0: (1, 2, 3), 1: (4, 5, 6)}) + >>> result = iterated_sampling(hypergraph, 10) # Large enough memory, expect a result + >>> result + [[4, 5, 6], [1, 2, 3]] + + >>> np.random.seed(42) + >>> random.seed(42) + >>> hypergraph = Hypergraph({0: (1, 2, 3), 1: (2, 3, 4), 2: (3, 4, 5), 3: (5, 6, 7), 4: (6, 7, 8), 5: (7, 8, 9)}) + >>> result = iterated_sampling(hypergraph, 3) + >>> result + [[2, 3, 4], [5, 6, 7]] + + >>> np.random.seed(42) + >>> random.seed(42) + >>> s = 10 + >>> edges_d4 = {'e1': [1, 2, 3, 4], 'e2': [2, 3, 4, 5], 'e3': [3, 4, 5, 6], 'e4': [4, 5, 6, 7]} + >>> hypergraph_d4 = Hypergraph(edges_d4) + >>> approximate_matching_d4 = iterated_sampling(hypergraph_d4, s) + >>> approximate_matching_d4 + [[2, 3, 4, 5]] + + >>> edges_d5 = {'e1': [1, 2, 3, 4, 5], 'e2': [2, 3, 4, 5, 6], 'e3': [3, 4, 5, 6, 7]} + >>> hypergraph_d5 = Hypergraph(edges_d5) + >>> approximate_matching_d5 = iterated_sampling(hypergraph_d5, s) + >>> approximate_matching_d5 + [[1, 2, 3, 4, 5]] + + >>> edges_d6 = {'e1': [1, 2, 3, 4, 5, 6], 'e2': [2, 3, 4, 5, 6, 7], 'e3': [3, 4, 5, 6, 7, 8]} + >>> hypergraph_d6 = Hypergraph(edges_d6) + >>> approximate_matching_d6 = iterated_sampling(hypergraph_d6, s) + >>> approximate_matching_d6 + [[1, 2, 3, 4, 5, 6]] + + >>> edges_large = {f'e{i}': [i, i + 1, i + 2] for i in range(1, 101)} + >>> hypergraph_large = Hypergraph(edges_large) + >>> approximate_matching_large = iterated_sampling(hypergraph_large, s) + >>> len(approximate_matching_large) + 26 + """ + + d = max((len(edge) for edge in hypergraph.incidence_dict.values()), default=0) + M = [] + S = hypergraph + p = s / (5 * len(S.edges) * d) if len(S.edges) > 0 else 0 + iterations = 0 + + while iterations < max_iterations: + iterations += 1 + M_prime, E_prime = sampling_round(S, p, s) + if M_prime is None: + raise MemoryLimitExceededError( + "Memory limit exceeded during hypergraph matching" + ) + + M.extend(M_prime) + unmatched_vertices = set(S.nodes) - set(v for edge in M_prime for v in edge) + induced_edges = [ + edge + for edge in S.incidence_dict.values() + if all(v in unmatched_vertices for v in edge) + ] + if len(induced_edges) <= s: + M.extend( + maximal_matching( + hnx.Hypergraph( + {f"e{i}": tuple(edge) for i, edge in enumerate(induced_edges)} + ) + ) + ) + break + S = hnx.Hypergraph( + {f"e{i}": tuple(edge) for i, edge in enumerate(induced_edges)} + ) + p = s / (5 * len(S.edges) * d) if len(S.edges) > 0 else 0 + + if iterations >= max_iterations: + raise MemoryLimitExceededError( + "Max iterations reached without finding a solution" + ) + + return M + + +def check_beta_condition(beta, beta_minus, d): + """ + Checks if the beta condition is satisfied. + + Parameters + ---------- + beta : int + The current beta value. + beta_minus : int + The previous beta value. + d : int + The degree of the hypergraph. + + Returns + ------- + bool + True if the beta condition is satisfied, False otherwise. + """ + return (beta - beta_minus) >= (d - 1) + + +def build_HEDCS(hypergraph, beta, beta_minus): + """ + Constructs a Hyper-Edge Degree Constrained Subgraph (HEDCS) from the given hypergraph. + + Parameters + ---------- + hypergraph : hnx.Hypergraph + The input hypergraph. + beta : int + Degree threshold for adding edges. + beta_minus : int + Complementary degree threshold for adding edges. + + Returns + ------- + hnx.Hypergraph + The constructed HEDCS. + """ + H = hnx.Hypergraph(hypergraph.incidence_dict) # Initialize H to be equal to G + degrees = {node: 0 for node in hypergraph.nodes} # Initialize vertex degrees + + for edge in H.edges: + for node in H.edges[edge]: + degrees[node] += 1 + + while True: + violating_edge = None + for edge in list(H.edges): + edge_degree_sum = sum(degrees[node] for node in H.edges[edge]) + if edge_degree_sum > beta: + violating_edge = edge + H.remove_edge(violating_edge) + for node in H.edges[violating_edge]: + degrees[node] -= 1 + break + + for edge in list(hypergraph.edges): + if edge not in H.edges: + edge_degree_sum = sum(degrees[node] for node in hypergraph.edges[edge]) + if edge_degree_sum < beta_minus: + violating_edge = edge + H.add_edge(violating_edge, hypergraph.edges[violating_edge]) + for node in H.edges[violating_edge]: + degrees[node] += 1 + break + + if violating_edge is None: + break + return H + + +def partition_hypergraph(hypergraph, k): + """ + Partitions a hypergraph into k approximately equal-sized subgraphs. + + Parameters + ---------- + hypergraph : hnx.Hypergraph + The input hypergraph. + k : int + The number of partitions. + + Returns + ------- + list[hnx.Hypergraph] + A list of k partitioned hypergraphs. + """ + edges = list(hypergraph.incidence_dict.items()) + random.shuffle(edges) + partitions = [edges[i::k] for i in range(k)] + return [hnx.Hypergraph(dict(part)) for part in partitions] + + +def HEDCS_matching(hypergraph: Hypergraph, s: int) -> list: + """ + HEDCS-Matching for Approximate Hypergraph Matching. + + This algorithm constructs Hyper-Edge Degree Constrained Subgraphs (HEDCS) + to find an approximate maximal matching in a d-uniform hypergraph. It leverages + parallelization to efficiently handle larger hypergraphs. + + Parameters + ---------- + hypergraph : Hypergraph + The input hypergraph. + s : int + The amount of memory available per machine. + + Returns + ------- + list + The edges of the graph for the approximate matching. + + Raises + ------- + NonUniformHypergraphError + If the hypergraph is not d-uniform (all edges don't have the same size). + ValueError + If the calculated beta and beta_minus values do not satisfy the beta condition. + + Examples + ------- + >>> import numpy as np + >>> np.random.seed(42) + >>> random.seed(42) + >>> hypergraph = Hypergraph({0: (1, 2)}) + >>> result = HEDCS_matching(hypergraph, 10) + >>> result + [[1, 2]] + + >>> np.random.seed(42) + >>> random.seed(42) + >>> hypergraph = Hypergraph({0: (1, 2), 1: (3, 4)}) + >>> result = HEDCS_matching(hypergraph, 10) + >>> result + [[1, 2], [3, 4]] + + >>> np.random.seed(42) + >>> random.seed(42) + >>> edges = {'e1': [1, 2, 3], 'e2': [2, 3, 4], 'e3': [1, 4, 5]} + >>> hypergraph = Hypergraph(edges) + >>> s = 10 + >>> approximate_matching = HEDCS_matching(hypergraph, s) + >>> approximate_matching + [[1, 2, 3]] + + >>> np.random.seed(42) + >>> random.seed(42) + >>> edges_large = {f'e{i}': [i, i + 1, i + 2] for i in range(1, 101)} + >>> hypergraph_large = Hypergraph(edges_large) + >>> approximate_matching_large = HEDCS_matching(hypergraph_large, s) + >>> len(approximate_matching_large) + 34 + """ + + edge_sizes = {len(edge) for edge in hypergraph.incidence_dict.values()} + if len(edge_sizes) > 1: + raise NonUniformHypergraphError("The hypergraph is not d-uniform.") + + d = next(iter(edge_sizes)) + n = len(hypergraph.nodes) + m = len(hypergraph.edges) + + beta = 500 * d * 3 * n * 2 * (math.log(n) * 3) + gamma = 1 / (2 * n * math.log(n)) + k = math.ceil(m / (s * math.log(n))) + beta_minus = (1 - gamma) * beta + + if not check_beta_condition(beta, beta_minus, d): + raise ValueError(f"beta - beta_minus must be >= {d - 1}") + + # Partition the hypergraph + partitions = partition_hypergraph(hypergraph, k) + + # Build HEDCS for each partition in parallel + with ThreadPoolExecutor() as executor: + HEDCS_list = list( + executor.map(lambda part: build_HEDCS(part, beta, beta_minus), partitions) + ) + + # Combine all the edges from the HEDCS subgraphs + combined_edges = {} + for H in HEDCS_list: + combined_edges.update(H.incidence_dict) + + combined_hypergraph = hnx.Hypergraph(combined_edges) + + # Find the maximum matching in the combined hypergraph + max_matching = maximal_matching(combined_hypergraph) + + return max_matching + + +if __name__ == "__main__": + import doctest + + doctest.testmod() diff --git a/tests/algorithms/test_matching.py b/tests/algorithms/test_matching.py new file mode 100644 index 00000000..4b38fc7f --- /dev/null +++ b/tests/algorithms/test_matching.py @@ -0,0 +1,180 @@ +""" +An implementation of the algorithms in: +"Distributed Algorithms for Matching in Hypergraphs", by Oussama Hanguir and Clifford Stein (2020), https://arxiv.org/abs/2009.09605v1 +Programmer: Shira Rot, Niv +Date: 22.5.2024 +""" + +import pytest +from hypernetx.classes.hypergraph import Hypergraph +from hypernetx.algorithms.matching_algorithms import ( + greedy_matching, + HEDCS_matching, + MemoryLimitExceededError, + approximation_matching_checking, +) +from hypernetx.algorithms.matching_algorithms import iterated_sampling + + +def test_greedy_d_approximation_empty_input(): + """ + Test for an empty input hypergraph. + """ + k = 2 + empty_hypergraph = Hypergraph({}) + assert greedy_matching(empty_hypergraph, k) == [] + + +def test_greedy_d_approximation_small_inputs(): + """ + Test for small input hypergraphs. + """ + k = 2 + hypergraph_1 = Hypergraph({"e1": {1, 2, 3}, "e2": {4, 5, 6}}) + assert greedy_matching(hypergraph_1, k) == [(1, 2, 3), (4, 5, 6)] + + hypergraph_2 = Hypergraph( + { + "e1": {1, 2, 3}, + "e2": {4, 5, 6}, + "e3": {7, 8, 9}, + "e4": {1, 4, 7}, + "e5": {2, 5, 8}, + "e6": {3, 6, 9}, + } + ) + result = greedy_matching(hypergraph_2, k) + assert len(result) == 3 + assert all(edge in [(1, 2, 3), (4, 5, 6), (7, 8, 9)] for edge in result) + + +def test_greedy_d_approximation_large_input(): + """ + Test for a large input hypergraph. + """ + k = 2 + large_hypergraph = Hypergraph( + {f"e{i}": {i, i + 1, i + 2} for i in range(1, 100, 3)} + ) + result = greedy_matching(large_hypergraph, k) + assert len(result) == len(large_hypergraph.edges) + assert all(edge in [(i, i + 1, i + 2) for i in range(1, 100, 3)] for edge in result) + + +def test_iterated_sampling_single_edge(): + """ + Test for a hypergraph with a single edge. + It checks if the result is not None and if all edges in the result have at least 2 vertices. + """ + hypergraph = Hypergraph({0: (1, 2, 3)}) + result = iterated_sampling(hypergraph, 10) + assert result is not None and all(len(edge) >= 2 for edge in result) + + +def test_iterated_sampling_two_disjoint_edges(): + """ + Test for a hypergraph with two disjoint edges. + It checks if the result is not None and if all edges in the result have at least 2 vertices. + """ + hypergraph = Hypergraph({0: (1, 2), 1: (3, 4)}) + result = iterated_sampling(hypergraph, 10) + assert result is not None and all(len(edge) >= 2 for edge in result) + + +def test_iterated_sampling_insufficient_memory(): + """ + Test for a hypergraph with insufficient memory. + It checks if the function raises a MemoryLimitExceededError when memory is set to 0. + """ + hypergraph = Hypergraph({0: (1, 2, 3)}) + with pytest.raises(MemoryLimitExceededError): + iterated_sampling(hypergraph, 0) + + +def test_iterated_sampling_large_memory(): + """ + Test for a hypergraph with sufficient memory. + It checks if the result is not None when memory is set to 10. + """ + hypergraph = Hypergraph({0: (1, 2, 3), 1: (4, 5, 6)}) + result = iterated_sampling(hypergraph, 10) + assert result is not None + + +def test_iterated_sampling_max_iterations(): + """ + Test for a hypergraph reaching maximum iterations. + """ + hypergraph = Hypergraph( + { + 0: (1, 2, 3), + 1: (2, 3, 4), + 2: (3, 4, 5), + 3: (5, 6, 7), + 4: (6, 7, 8), + 5: (7, 8, 9), + } + ) + result = iterated_sampling(hypergraph, 3) + assert result is None or all(len(edge) >= 2 for edge in result) + + +def test_iterated_sampling_large_hypergraph(): + """ + Test for a large hypergraph. + """ + edges_large = {f"e{i}": [i, i + 1, i + 2] for i in range(1, 101)} + hypergraph_large = Hypergraph(edges_large) + optimal_matching_large = [edges_large[f"e{i}"] for i in range(1, 101, 3)] + result = iterated_sampling(hypergraph_large, 10) + assert result is not None and approximation_matching_checking( + optimal_matching_large, result + ) + + +def test_HEDCS_matching_single_edge(): + """ + Test for a hypergraph with a single edge. + """ + hypergraph = Hypergraph({0: (1, 2)}) + result = HEDCS_matching(hypergraph, 10) + assert result is not None and all(len(edge) >= 2 for edge in result) + + +def test_HEDCS_matching_two_edges(): + """ + Test for a hypergraph with two disjoint edges. + """ + hypergraph = Hypergraph({0: (1, 2), 1: (3, 4)}) + result = HEDCS_matching(hypergraph, 10) + assert result is not None and all(len(edge) >= 2 for edge in result) + + +def test_HEDCS_matching_with_optimal_matching(): + """ + Test with a hypergraph where the optimal matching is known. + """ + edges = {"e1": [1, 2, 3], "e2": [2, 3, 4], "e3": [1, 4, 5]} + hypergraph = Hypergraph(edges) + s = 10 + optimal_matching = [[1, 2, 3]] # Assuming we know the optimal matching + approximate_matching = HEDCS_matching(hypergraph, s) + assert approximation_matching_checking(optimal_matching, approximate_matching) + + +def test_HEDCS_matching_large_hypergraph(): + """ + Test with a larger hypergraph. + """ + edges_large = {f"e{i}": [i, i + 1, i + 2] for i in range(1, 101)} + hypergraph_large = Hypergraph(edges_large) + s = 10 + optimal_matching_large = [edges_large[f"e{i}"] for i in range(1, 101, 3)] + approximate_matching_large = HEDCS_matching(hypergraph_large, s) + assert approximation_matching_checking( + optimal_matching_large, approximate_matching_large + ) + + +if __name__ == "__main__": + pytest.main() diff --git a/tutorials/advanced/Advanced 7 - Matching algorithms.ipynb b/tutorials/advanced/Advanced 7 - Matching algorithms.ipynb new file mode 100644 index 00000000..9146655d --- /dev/null +++ b/tutorials/advanced/Advanced 7 - Matching algorithms.ipynb @@ -0,0 +1,261 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Hypergraph Matching Algorithms Tutorial\n", + "\n", + "This tutorial highlights the implementation and usage of several hypergraph matching algorithms as presented in our publication: [Distributed Algorithms for Matching in Hypergraphs](https://arxiv.org/abs/2009.09605v1).\n", + "\n", + "## Algorithms Covered\n", + "- Greedy Matching\n", + "- Iterated Sampling\n", + "- HEDCS Matching\n", + "\n", + "We will demonstrate how to use these algorithms with example hypergraphs and compare their performance." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import hypernetx as hnx\n", + "from hypernetx.classes.hypergraph import Hypergraph\n", + "from hypernetx.algorithms.matching_algorithms import greedy_matching, iterated_sampling, HEDCS_matching\n", + "import random\n", + "import logging\n", + "import time\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example Hypergraph" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Example hypergraph data\n", + "hypergraph_data = {\n", + " 0: (1, 2, 3),\n", + " 1: (4, 5, 6),\n", + " 2: (7, 8, 9),\n", + " 3: (1, 4, 7),\n", + " 4: (2, 5, 8),\n", + " 5: (3, 6, 9)\n", + "}\n", + "\n", + "# Creating a Hypergraph\n", + "hypergraph = Hypergraph(hypergraph_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Greedy Matching Algorithm\n", + "The Greedy Matching algorithm constructs a random k-partitioning of the hypergraph and finds a maximal matching. \n", + "\n", + "### Parameters:\n", + "- `hypergraph`: The input hypergraph.\n", + "- `k`: The number of partitions to divide the hypergraph into.\n", + "\n", + "### Example Usage:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Greedy Matching Result: [(7, 8, 9), (1, 2, 3), (4, 5, 6)]\n" + ] + } + ], + "source": [ + "k = 3\n", + "greedy_result = greedy_matching(hypergraph, k)\n", + "print(\"Greedy Matching Result:\", greedy_result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Iterated Sampling Algorithm\n", + "The Iterated Sampling algorithm uses sampling to find a maximal matching in a d-uniform hypergraph. \n", + "\n", + "### Parameters:\n", + "- `hypergraph`: The input hypergraph.\n", + "- `s`: The number of samples to use in the algorithm.\n", + "\n", + "### Example Usage:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iterated Sampling Result: [[7, 8, 9], [1, 2, 3], [4, 5, 6]]\n" + ] + } + ], + "source": [ + "s = 10\n", + "iterated_result = iterated_sampling(hypergraph, s)\n", + "print(\"Iterated Sampling Result:\", iterated_result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## HEDCS Matching Algorithm\n", + "The HEDCS Matching algorithm constructs a Hyper-Edge Degree Constrained Subgraph (HEDCS) to find a maximal matching. \n", + "\n", + "### Parameters:\n", + "- `hypergraph`: The input hypergraph.\n", + "- `s`: The number of samples to use in the algorithm.\n", + "\n", + "### Example Usage:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "HEDCS Matching Result: [[1, 2, 3], [4, 5, 6], [7, 8, 9]]\n" + ] + } + ], + "source": [ + "hedcs_result = HEDCS_matching(hypergraph, s)\n", + "print(\"HEDCS Matching Result:\", hedcs_result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Performance Comparison\n", + "We will compare the performance of the algorithms on large random hypergraphs." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def generate_random_hypergraph(n, d, m):\n", + " edges = {f'e{i}': random.sample(range(1, n+1), d) for i in range(m)}\n", + " return Hypergraph(edges)\n", + "\n", + "# Generate random hypergraphs of increasing size\n", + "sizes = [100, 200, 300, 400, 500]\n", + "greedy_times = []\n", + "iterated_times = []\n", + "hedcs_times = []\n", + "\n", + "for size in sizes:\n", + " hypergraph = generate_random_hypergraph(size, 3, size)\n", + " \n", + " start_time = time.time()\n", + " greedy_matching(hypergraph, k)\n", + " greedy_times.append(time.time() - start_time)\n", + " \n", + " start_time = time.time()\n", + " iterated_sampling(hypergraph, s, max_iterations = 500)\n", + " iterated_times.append(time.time() - start_time)\n", + " \n", + " start_time = time.time()\n", + " HEDCS_matching(hypergraph, s)\n", + " hedcs_times.append(time.time() - start_time)\n", + "\n", + "# Plot the results\n", + "plt.figure(figsize=(10, 6))\n", + "plt.plot(sizes, greedy_times, label='Greedy Matching')\n", + "plt.plot(sizes, iterated_times, label='Iterated Sampling')\n", + "plt.plot(sizes, hedcs_times, label='HEDCS Matching')\n", + "plt.xlabel('Hypergraph Size')\n", + "plt.ylabel('Time (seconds)')\n", + "plt.title('Performance Comparison of Hypergraph Matching Algorithms')\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion\n", + "In this tutorial, we demonstrated the implementation and usage of several hypergraph matching algorithms. We also compared their performance on random hypergraphs of increasing size.\n", + "\n", + "For more details, please refer to our publication: [Distributed Algorithms for Matching in Hypergraphs](https://arxiv.org/abs/2009.09605v1)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}