codezonediitj · pravalikavis · May 1, 2020 · May 1, 2020 · May 1, 2020 · May 1, 2020
diff --git a/pydatastructs/linear_data_structures/__init__.py b/pydatastructs/linear_data_structures/__init__.py
@@ -26,6 +26,7 @@
     brick_sort,
     brick_sort_parallel,
     heapsort,
-    matrix_multiply_parallel
+    matrix_multiply_parallel,
+    optimal_grouping
 )
 __all__.extend(algorithms.__all__)
diff --git a/pydatastructs/linear_data_structures/algorithms.py b/pydatastructs/linear_data_structures/algorithms.py
@@ -3,13 +3,15 @@
 from pydatastructs.utils.misc_util import _check_type, _comp
 from concurrent.futures import ThreadPoolExecutor
 from math import log, floor
+from typing import List
 
 __all__ = [
     'merge_sort_parallel',
     'brick_sort',
     'brick_sort_parallel',
     'heapsort',
-    'matrix_multiply_parallel'
+    'matrix_multiply_parallel',
+    'optimal_grouping'
 ]
 
 def _merge(array, sl, el, sr, er, end, comp):
@@ -346,7 +348,7 @@ def matrix_multiply_parallel(matrix_1, matrix_2, num_threads):
     row_matrix_2, col_matrix_2 = len(matrix_2), len(matrix_2[0])
 
     if col_matrix_1 != row_matrix_2:
-        raise ValueError("Matrix size mismatch: %s * %s"%(
+        raise ValueError("Matrix size mismatch: %s * %s" % (
         (row_matrix_1, col_matrix_1), (row_matrix_2, col_matrix_2)))
 
     C = [[None for i in range(col_matrix_1)] for j in range(row_matrix_2)]
@@ -360,3 +362,185 @@ def matrix_multiply_parallel(matrix_1, matrix_2, num_threads):
                                           i, j).result()
 
     return C
+
+def _compare_opt_group(maximize, value, compareWith=None):
+    """
+    compares a value with another. if compareWith is None then value is compared with Infinity or -Infinity
+    parameters
+        [maximize] if True then the function returns true if value is greater than compareWith and vice versa
+    """
+    if compareWith is None:
+        if maximize:
+            compareWith = float('-inf')
+        else:
+            compareWith = float('inf')
+    if maximize:
+        return value > compareWith
+    return value < compareWith
+
+def _initialize_arrays_opt_group(maximize, rows, columns):
+    """
+    returns a 2-d array of rows*columns size filled with either Infinity or -Infinity
+    parameters:
+        [maximize]
+            if 'True' fills with -Infinity and vice versa
+        [rows]
+            expects a number
+        [columns]
+            expects a number
+    """
+    value = float('inf')
+    if maximize:
+        value = float('-inf')
+    return [[value for a in range(0, columns+1)] for a in range(0, rows+1)]
+
+def _optimal_grouping_rec(object_arr, cost_storage, solution_matrix, maximize_prob, min_compare_len, lookup_index, get_lookup_fn, cost_fn):
+    """
+    Helper function for optimal_grouping function
+    """
+
+    # gets the present value at the present index
+    present_value = cost_storage[lookup_index[0]][lookup_index[1]]
+    # return the present value if it is not infinity
+    if _compare_opt_group(maximize_prob, present_value):
+        return present_value
+
+    # get the start and end indices where end index depends on the min_compare_len
+    start_index = lookup_index[0]
+    end_index = lookup_index[1]+1-(min_compare_len-1)
+
+    if start_index is end_index or start_index > end_index:
+        cost = cost_fn(object_arr, lookup_index, start_index)
+        if _compare_opt_group(maximize_prob, cost, present_value):
+            cost_storage[lookup_index[0]][lookup_index[1]] = cost
+            solution_matrix[lookup_index[0]][lookup_index[1]] = start_index
+            present_value = cost
+
+    for i in range(start_index, end_index):
+
+        # get indices for left recursion tree
+        left_rec_indices = get_lookup_fn('before', lookup_index, i)
+
+        cost = _optimal_grouping_rec(object_arr, cost_storage, solution_matrix, maximize_prob,
+                                     min_compare_len, left_rec_indices, get_lookup_fn, cost_fn)
+
+        # get indices for right recursion tree
+        right_rec_indices = get_lookup_fn('after', lookup_index, i)
+
+        cost = cost+_optimal_grouping_rec(object_arr, cost_storage, solution_matrix, maximize_prob,
+                                          min_compare_len, right_rec_indices, get_lookup_fn, cost_fn)
+
+        # get cost for present partition
+        cost = cost+cost_fn(object_arr, lookup_index, i)
+
+        # update the values if this is the best solution until now
+        if _compare_opt_group(maximize_prob, cost, present_value):
+            cost_storage[lookup_index[0]][lookup_index[1]] = cost
+            solution_matrix[lookup_index[0]][lookup_index[1]] = i
+            present_value = cost
+
+    return present_value
+
+def optimal_grouping(process_objects, maximize_prob, min_compare_len, lookup_index, get_lookup_fn, cost_fn):
+    """
+    Description
+    ===========
+    Optimal Grouping groups given set of objects using the given cost function
+
+    Parameters
+    ==========
+     process_objects
+        accepts array of objects on which the algorithm is supposed to run
+     maximize_prob
+        pass True if the algorithm should find maximum value of the cost function otherwise pass False
+     min_compare_len
+        a positive number decides to which level of gap the algorithm can maintain while iterating from start to end,
+        for example-> if minimun length is 2 then it can only iterate if endIndex=startIndex+2
+     lookup_index
+        format-->[start_index,endIndex] algorithm runs from start to end
+     get_lookup_fn
+      should return next range of indices
+      sample -> get_lookup_fn(position, rangeIndices, currentIndex)
+       position is either 'before' or 'after'
+       rangeIndices is the present range of index like [start_index,endIndex]
+     cost_fn
+      should return the cost
+      sample -> cost_fn(process_objects,rangeIndices,currentIndex)
+
+
+    Usage examples
+    ==============
+
+      1.OPTIMAL BINARY SEARCH TREE
+
+        from binarytree import Node
+        n = 5
+        p = [None, Node(0.15), Node(0.10), Node(0.05), Node(0.10), Node(0.20)]
+        q = [Node(0.05), Node(0.10), Node(0.05), Node(0.05), Node(0.05), Node(0.10)]
+
+
+        def lookup(position, endIndex, middle):
+            if position is 'before':
+             return [endIndex[0], middle-1]
+            else:
+             return [middle+1, endIndex[1]]
+
+
+        def cost(obj, endIndex, middle):
+
+            if(endIndex[1]<endIndex[0]):
+                return obj['q'][endIndex[1]].value
+
+            sum = 0
+            for i in range(endIndex[0], endIndex[1]+1):
+                sum += obj['p'][i].value
+            for i in range(endIndex[0]-1, endIndex[1]+1):
+                sum += obj['q'][i].value
+            return sum
+
+
+        print(optimal_grouping({'p': p, 'q': q},  False, 1, [1, n], lookup, cost))
+
+
+
+      2.MATRIX CHAIN MULTIPLICATION
+
+        def cost(matrix, endIndex, middle):
+
+            if endIndex[0] == endIndex[1]:
+            return 0
+        return matrix[endIndex[0]-1]*matrix[middle]*matrix[endIndex[1]]
+
+
+        def lookup(position, endIndex, middle):
+        if position is 'before':
+            return [endIndex[0], middle]
+        else:
+            return [middle+1, endIndex[1]]
+
+
+        print(optimal_grouping([30, 35, 15, 5, 10, 20, 25], False, 2, [1, 6], lookup, cost))
+
+    """
+
+    if min_compare_len < 1:
+        raise ValueError(
+            'min_compare_len should be a positive integer')
+
+    if lookup_index.__len__() < 2 or lookup_index[0] > lookup_index[1]:
+        raise ValueError(
+            'lookup index should at least have 2 integer items, first specifying the start and second specifying the last indices')
+    #  end of edge cases
+
+    length = lookup_index[1]-lookup_index[0]+1
+
+    # for storing the computed values (helper array)
+    cost_storage = _initialize_arrays_opt_group(
+        maximize_prob, length+1, length+1)
+    #  for storing the solutions
+    solution_matrix = _initialize_arrays_opt_group(
+        maximize_prob, length+1, length+1)
+
+    _optimal_grouping_rec(process_objects, cost_storage, solution_matrix, maximize_prob,
+                          min_compare_len, lookup_index, get_lookup_fn, cost_fn)
+    return solution_matrix
diff --git a/pydatastructs/linear_data_structures/tests/test_algorithms.py b/pydatastructs/linear_data_structures/tests/test_algorithms.py
@@ -1,7 +1,7 @@
 from pydatastructs import (
     merge_sort_parallel, DynamicOneDimensionalArray,
     OneDimensionalArray, brick_sort, brick_sort_parallel,
-    heapsort, matrix_multiply_parallel)
+    heapsort, matrix_multiply_parallel, optimal_grouping)
 from pydatastructs.utils.raises_util import raises
 import random
 
@@ -76,3 +76,30 @@ def test_matrix_multiply_parallel():
     J = [[2, 1, 2], [1, 2, 1], [2, 2, 2]]
     output = matrix_multiply_parallel(I, J, num_threads=1)
     assert expected_result == output
+
+def test_optimal_grouping():
+    #test case1:
+    def cost(matrix, endIndex, middle):
+
+        if endIndex[0] == endIndex[1]:
+            return 0
+        return matrix[endIndex[0]-1]*matrix[middle]*matrix[endIndex[1]]
+
+    def lookup(position, endIndex, middle):
+        if position is 'before':
+            return [endIndex[0], middle]
+        else:
+            return [middle+1, endIndex[1]]
+    expected_result = [[float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf')],
+                       [float('inf'), 1, 1, 1, 3, 3, 3, float('inf')],
+                       [float('inf'), float('inf'), 2, 2, 3, 3, 3, float('inf')],
+                       [float('inf'), float('inf'), float('inf'), 3, 3, 3, 3, float('inf')],
+                       [float('inf'), float('inf'), float('inf'), float('inf'), 4, 4, 5, float('inf')],
+                       [float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), 5, 5, float('inf')],
+                       [float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), 6, float('inf')],
+                       [float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf')]]
+    assert expected_result == optimal_grouping([30, 35, 15, 5, 10, 20, 25], False, 2, [1, 6], lookup, cost)
+
+    #test case2:
+    expected_result = [[0, float('inf'), float('inf')], [float('inf'), float('inf'), float('inf')], [float('inf'), float('inf'), float('inf')]]
+    assert expected_result == optimal_grouping([], False, 2, [0,0], lookup, cost)