Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generic implementation of optimal grouping of objects using dynamic programming #272

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
3 changes: 2 additions & 1 deletion pydatastructs/linear_data_structures/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
brick_sort,
brick_sort_parallel,
heapsort,
matrix_multiply_parallel
matrix_multiply_parallel,
optimal_grouping
)
__all__.extend(algorithms.__all__)
188 changes: 186 additions & 2 deletions pydatastructs/linear_data_structures/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
from pydatastructs.utils.misc_util import _check_type, _comp
from concurrent.futures import ThreadPoolExecutor
from math import log, floor
from typing import List

__all__ = [
'merge_sort_parallel',
'brick_sort',
'brick_sort_parallel',
'heapsort',
'matrix_multiply_parallel'
'matrix_multiply_parallel',
'optimal_grouping'
]

def _merge(array, sl, el, sr, er, end, comp):
Expand Down Expand Up @@ -346,7 +348,7 @@ def matrix_multiply_parallel(matrix_1, matrix_2, num_threads):
row_matrix_2, col_matrix_2 = len(matrix_2), len(matrix_2[0])

if col_matrix_1 != row_matrix_2:
raise ValueError("Matrix size mismatch: %s * %s"%(
raise ValueError("Matrix size mismatch: %s * %s" % (
(row_matrix_1, col_matrix_1), (row_matrix_2, col_matrix_2)))

C = [[None for i in range(col_matrix_1)] for j in range(row_matrix_2)]
Expand All @@ -360,3 +362,185 @@ def matrix_multiply_parallel(matrix_1, matrix_2, num_threads):
i, j).result()

return C

def _compare_opt_group(maximize, value, compareWith=None):
"""
compares a value with another. if compareWith is None then value is compared with Infinity or -Infinity
parameters
[maximize] if True then the function returns true if value is greater than compareWith and vice versa
"""
if compareWith is None:
if maximize:
compareWith = float('-inf')
else:
compareWith = float('inf')
if maximize:
return value > compareWith
return value < compareWith

def _initialize_arrays_opt_group(maximize, rows, columns):
"""
returns a 2-d array of rows*columns size filled with either Infinity or -Infinity
parameters:
[maximize]
if 'True' fills with -Infinity and vice versa
[rows]
expects a number
[columns]
expects a number
"""
value = float('inf')
if maximize:
value = float('-inf')
return [[value for a in range(0, columns+1)] for a in range(0, rows+1)]

def _optimal_grouping_rec(object_arr, cost_storage, solution_matrix, maximize_prob, min_compare_len, lookup_index, get_lookup_fn, cost_fn):
"""
Helper function for optimal_grouping function
"""

# gets the present value at the present index
present_value = cost_storage[lookup_index[0]][lookup_index[1]]
# return the present value if it is not infinity
if _compare_opt_group(maximize_prob, present_value):
return present_value

# get the start and end indices where end index depends on the min_compare_len
start_index = lookup_index[0]
end_index = lookup_index[1]+1-(min_compare_len-1)

if start_index is end_index or start_index > end_index:
cost = cost_fn(object_arr, lookup_index, start_index)
if _compare_opt_group(maximize_prob, cost, present_value):
cost_storage[lookup_index[0]][lookup_index[1]] = cost
solution_matrix[lookup_index[0]][lookup_index[1]] = start_index
present_value = cost

for i in range(start_index, end_index):

# get indices for left recursion tree
left_rec_indices = get_lookup_fn('before', lookup_index, i)

cost = _optimal_grouping_rec(object_arr, cost_storage, solution_matrix, maximize_prob,
min_compare_len, left_rec_indices, get_lookup_fn, cost_fn)

# get indices for right recursion tree
right_rec_indices = get_lookup_fn('after', lookup_index, i)

cost = cost+_optimal_grouping_rec(object_arr, cost_storage, solution_matrix, maximize_prob,
min_compare_len, right_rec_indices, get_lookup_fn, cost_fn)

# get cost for present partition
cost = cost+cost_fn(object_arr, lookup_index, i)

# update the values if this is the best solution until now
if _compare_opt_group(maximize_prob, cost, present_value):
cost_storage[lookup_index[0]][lookup_index[1]] = cost
solution_matrix[lookup_index[0]][lookup_index[1]] = i
present_value = cost

return present_value

def optimal_grouping(process_objects, maximize_prob, min_compare_len, lookup_index, get_lookup_fn, cost_fn):
"""
Description
===========
Optimal Grouping groups given set of objects using the given cost function

Parameters
==========
process_objects
accepts array of objects on which the algorithm is supposed to run
maximize_prob
pass True if the algorithm should find maximum value of the cost function otherwise pass False
min_compare_len
a positive number decides to which level of gap the algorithm can maintain while iterating from start to end,
for example-> if minimun length is 2 then it can only iterate if endIndex=startIndex+2
lookup_index
format-->[start_index,endIndex] algorithm runs from start to end
get_lookup_fn
should return next range of indices
sample -> get_lookup_fn(position, rangeIndices, currentIndex)
position is either 'before' or 'after'
rangeIndices is the present range of index like [start_index,endIndex]
cost_fn
should return the cost
sample -> cost_fn(process_objects,rangeIndices,currentIndex)


Usage examples
==============

1.OPTIMAL BINARY SEARCH TREE

from binarytree import Node
n = 5
p = [None, Node(0.15), Node(0.10), Node(0.05), Node(0.10), Node(0.20)]
q = [Node(0.05), Node(0.10), Node(0.05), Node(0.05), Node(0.05), Node(0.10)]


def lookup(position, endIndex, middle):
if position is 'before':
return [endIndex[0], middle-1]
else:
return [middle+1, endIndex[1]]


def cost(obj, endIndex, middle):

if(endIndex[1]<endIndex[0]):
return obj['q'][endIndex[1]].value

sum = 0
for i in range(endIndex[0], endIndex[1]+1):
sum += obj['p'][i].value
for i in range(endIndex[0]-1, endIndex[1]+1):
sum += obj['q'][i].value
return sum


print(optimal_grouping({'p': p, 'q': q}, False, 1, [1, n], lookup, cost))



2.MATRIX CHAIN MULTIPLICATION

def cost(matrix, endIndex, middle):

if endIndex[0] == endIndex[1]:
return 0
return matrix[endIndex[0]-1]*matrix[middle]*matrix[endIndex[1]]


def lookup(position, endIndex, middle):
if position is 'before':
return [endIndex[0], middle]
else:
return [middle+1, endIndex[1]]


print(optimal_grouping([30, 35, 15, 5, 10, 20, 25], False, 2, [1, 6], lookup, cost))

"""

if min_compare_len < 1:
raise ValueError(
'min_compare_len should be a positive integer')

if lookup_index.__len__() < 2 or lookup_index[0] > lookup_index[1]:
raise ValueError(
'lookup index should at least have 2 integer items, first specifying the start and second specifying the last indices')
# end of edge cases

length = lookup_index[1]-lookup_index[0]+1

# for storing the computed values (helper array)
cost_storage = _initialize_arrays_opt_group(
maximize_prob, length+1, length+1)
# for storing the solutions
solution_matrix = _initialize_arrays_opt_group(
maximize_prob, length+1, length+1)

_optimal_grouping_rec(process_objects, cost_storage, solution_matrix, maximize_prob,
min_compare_len, lookup_index, get_lookup_fn, cost_fn)
return solution_matrix
29 changes: 28 additions & 1 deletion pydatastructs/linear_data_structures/tests/test_algorithms.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pydatastructs import (
merge_sort_parallel, DynamicOneDimensionalArray,
OneDimensionalArray, brick_sort, brick_sort_parallel,
heapsort, matrix_multiply_parallel)
heapsort, matrix_multiply_parallel, optimal_grouping)
from pydatastructs.utils.raises_util import raises
import random

Expand Down Expand Up @@ -76,3 +76,30 @@ def test_matrix_multiply_parallel():
J = [[2, 1, 2], [1, 2, 1], [2, 2, 2]]
output = matrix_multiply_parallel(I, J, num_threads=1)
assert expected_result == output

def test_optimal_grouping():
#test case1:
def cost(matrix, endIndex, middle):

if endIndex[0] == endIndex[1]:
return 0
return matrix[endIndex[0]-1]*matrix[middle]*matrix[endIndex[1]]

def lookup(position, endIndex, middle):
if position is 'before':
return [endIndex[0], middle]
else:
return [middle+1, endIndex[1]]
expected_result = [[float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf')],
[float('inf'), 1, 1, 1, 3, 3, 3, float('inf')],
[float('inf'), float('inf'), 2, 2, 3, 3, 3, float('inf')],
[float('inf'), float('inf'), float('inf'), 3, 3, 3, 3, float('inf')],
[float('inf'), float('inf'), float('inf'), float('inf'), 4, 4, 5, float('inf')],
[float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), 5, 5, float('inf')],
[float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), 6, float('inf')],
[float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf')]]
assert expected_result == optimal_grouping([30, 35, 15, 5, 10, 20, 25], False, 2, [1, 6], lookup, cost)

#test case2:
expected_result = [[0, float('inf'), float('inf')], [float('inf'), float('inf'), float('inf')], [float('inf'), float('inf'), float('inf')]]
assert expected_result == optimal_grouping([], False, 2, [0,0], lookup, cost)