Skip to content

Commit

Permalink
MAINT: stats: mode: mode is a reduction operation; should consume an …
Browse files Browse the repository at this point in the history
…axis (scipy#15423)

* MAINT: stats: mode: mode is a reduction operation; should consume a dimension
  • Loading branch information
mdhaber authored Feb 3, 2022
1 parent a574519 commit 725a4eb
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 27 deletions.
15 changes: 7 additions & 8 deletions scipy/stats/_stats_py.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,12 +440,12 @@ def mode(a, axis=0, nan_policy='propagate'):
... [4, 7, 5, 9]])
>>> from scipy import stats
>>> stats.mode(a)
ModeResult(mode=array([[3, 1, 0, 0]]), count=array([[1, 1, 1, 1]]))
ModeResult(mode=array([3, 1, 0, 0]), count=array([1, 1, 1, 1]))
To get mode of whole array, specify ``axis=None``:
>>> stats.mode(a, axis=None)
ModeResult(mode=array([3]), count=array([3]))
ModeResult(mode=3, count=3)
"""
a, axis = _chk_asarray(a, axis)
Expand All @@ -462,18 +462,18 @@ def mode(a, axis=0, nan_policy='propagate'):
# Fall back to a slower method since np.unique does not work with NaN
scores = set(np.ravel(a)) # get ALL unique values
testshape = list(a.shape)
testshape[axis] = 1
testshape.pop(axis)
oldmostfreq = np.zeros(testshape, dtype=a.dtype)
oldcounts = np.zeros(testshape, dtype=int)

for score in scores:
template = (a == score)
counts = np.sum(template, axis, keepdims=True)
counts = np.sum(template, axis)
mostfrequent = np.where(counts > oldcounts, score, oldmostfreq)
oldcounts = np.maximum(counts, oldcounts)
oldmostfreq = mostfrequent

return ModeResult(mostfrequent, oldcounts)
return ModeResult(mostfrequent[()], oldcounts[()])

def _mode1D(a):
vals, cnts = np.unique(a, return_counts=True)
Expand All @@ -490,9 +490,8 @@ def _mode1D(a):
counts = np.empty(a_view.shape[:-1], dtype=np.int_)
for ind in inds:
modes[ind], counts[ind] = _mode1D(a_view[ind])
newshape = list(a.shape)
newshape[axis] = 1
return ModeResult(modes.reshape(newshape), counts.reshape(newshape))

return ModeResult(modes[()], counts[()])


def _mask_to_limits(a, limits, inclusive):
Expand Down
49 changes: 30 additions & 19 deletions scipy/stats/tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2144,8 +2144,8 @@ def test_scalar(self):
def test_basic(self):
data1 = [3, 5, 1, 10, 23, 3, 2, 6, 8, 6, 10, 6]
vals = stats.mode(data1)
assert_equal(vals[0][0], 6)
assert_equal(vals[1][0], 3)
assert_equal(vals[0], 6)
assert_equal(vals[1], 3)

def test_axes(self):
data1 = [10, 10, 30, 40]
Expand All @@ -2156,16 +2156,16 @@ def test_axes(self):
arr = np.array([data1, data2, data3, data4, data5])

vals = stats.mode(arr, axis=None)
assert_equal(vals[0], np.array([30]))
assert_equal(vals[1], np.array([8]))
assert_equal(vals[0], np.array(30))
assert_equal(vals[1], np.array(8))

vals = stats.mode(arr, axis=0)
assert_equal(vals[0], np.array([[10, 10, 30, 30]]))
assert_equal(vals[1], np.array([[2, 3, 3, 2]]))
assert_equal(vals[0], np.array([10, 10, 30, 30]))
assert_equal(vals[1], np.array([2, 3, 3, 2]))

vals = stats.mode(arr, axis=1)
assert_equal(vals[0], np.array([[10], [10], [20], [30], [30]]))
assert_equal(vals[1], np.array([[2], [4], [3], [4], [3]]))
assert_equal(vals[0], np.array([10, 10, 20, 30, 30]))
assert_equal(vals[1], np.array([2, 4, 3, 4, 3]))

@pytest.mark.parametrize('axis', np.arange(-4, 0))
def test_negative_axes_gh_15375(self, axis):
Expand All @@ -2178,16 +2178,16 @@ def test_negative_axes_gh_15375(self, axis):
def test_strings(self):
data1 = ['rain', 'showers', 'showers']
vals = stats.mode(data1)
assert_equal(vals[0][0], 'showers')
assert_equal(vals[1][0], 2)
assert_equal(vals[0], 'showers')
assert_equal(vals[1], 2)

def test_mixed_objects(self):
objects = [10, True, np.nan, 'hello', 10]
arr = np.empty((5,), dtype=object)
arr[:] = objects
vals = stats.mode(arr)
assert_equal(vals[0][0], 10)
assert_equal(vals[1][0], 2)
assert_equal(vals[0], 10)
assert_equal(vals[1], 2)

def test_objects(self):
# Python objects must be sortable (le + eq) and have ne defined
Expand Down Expand Up @@ -2215,8 +2215,8 @@ def __hash__(self):
assert_equal(np.unique(arr).shape, (4,))
vals = stats.mode(arr)

assert_equal(vals[0][0], Point(2))
assert_equal(vals[1][0], 4)
assert_equal(vals[0], Point(2))
assert_equal(vals[1], 4)

def test_mode_result_attributes(self):
data1 = [3, 5, 1, 10, 23, 3, 2, 6, 8, 6, 10, 6]
Expand Down Expand Up @@ -2245,21 +2245,32 @@ def test_mode_nan(self):
])
def test_smallest_equal(self, data):
result = stats.mode(data, nan_policy='omit')
assert_equal(result[0][0], 1)
assert_equal(result[0], 1)

def test_obj_arrays_ndim(self):
# regression test for gh-9645: `mode` fails for object arrays w/ndim > 1
data = [['Oxidation'], ['Oxidation'], ['Polymerization'], ['Reduction']]
ar = np.array(data, dtype=object)
m = stats.mode(ar, axis=0)
assert np.all(m.mode == 'Oxidation') and m.mode.shape == (1, 1)
assert np.all(m.count == 2) and m.count.shape == (1, 1)
assert np.all(m.mode == 'Oxidation') and m.mode.shape == (1,)
assert np.all(m.count == 2) and m.count.shape == (1,)

data1 = data + [[np.nan]]
ar1 = np.array(data1, dtype=object)
m = stats.mode(ar1, axis=0)
assert np.all(m.mode == 'Oxidation') and m.mode.shape == (1, 1)
assert np.all(m.count == 2) and m.count.shape == (1, 1)
assert np.all(m.mode == 'Oxidation') and m.mode.shape == (1,)
assert np.all(m.count == 2) and m.count.shape == (1,)

@pytest.mark.parametrize('axis', np.arange(-3, 3))
@pytest.mark.parametrize('dtype', [np.float64, 'object'])
def test_mode_shape_gh_9955(self, axis, dtype):
rng = np.random.default_rng(984213899)
a = rng.uniform(size=(3, 4, 5)).astype(dtype)
res = stats.mode(a, axis=axis)
reference_shape = list(a.shape)
reference_shape.pop(axis)
np.testing.assert_array_equal(res.mode.shape, reference_shape)
np.testing.assert_array_equal(res.count.shape, reference_shape)


class TestSEM:
Expand Down

0 comments on commit 725a4eb

Please sign in to comment.