From 9e3b999a59ba0e966f0e19bf3c5df03978850bb6 Mon Sep 17 00:00:00 2001 From: Matthieu Viry Date: Tue, 2 Apr 2024 17:32:02 +0200 Subject: [PATCH] Change readme from rst to md due to weird rendering on gh --- MANIFEST.in | 2 +- README.md | 112 +++++++++++++++++++++++++++++++++++++++++++++++ README.rst | 123 ---------------------------------------------------- setup.py | 2 +- 4 files changed, 114 insertions(+), 125 deletions(-) create mode 100644 README.md delete mode 100644 README.rst diff --git a/MANIFEST.in b/MANIFEST.in index 034ca50..602fc2f 100755 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,7 +1,7 @@ # C/cython source files include jenkspy/src/* -include README.rst +include README.md include LICENSE include tests/*.* include requirements.txt diff --git a/README.md b/README.md new file mode 100644 index 0000000..1f0b51e --- /dev/null +++ b/README.md @@ -0,0 +1,112 @@ +# Jenkspy: Fast Fisher-Jenks breaks for Python + +Compute "natural breaks" (*Fisher-Jenks algorithm*) on list / tuple / array / numpy.ndarray of integers/floats. + +The algorithm implemented by this library is also sometimes referred to as *Fisher-Jenks algorithm*, *Jenks Optimisation Method* or *Fisher exact optimization method*. This is a deterministic method to calculate the optimal class boundaries. + +Intended compatibility: CPython 3.6+ + +Wheels are provided via PyPI for Windows / MacOS / Linux users - Also available on conda-forge channel for Anaconda users. + +[![](https://github.com/mthh/jenkspy/actions/workflows/wheel.yml/badge.svg)](https://github.com/mthh/jenkspy/actions/workflows/wheel.yml) +[![](https://img.shields.io/pypi/v/jenkspy.svg?color=007ec6)](https://pypi.python.org/pypi/jenkspy) +[![](https://anaconda.org/conda-forge/jenkspy/badges/version.svg)](https://anaconda.org/conda-forge/jenkspy) +[![](https://img.shields.io/pypi/dm/jenkspy.svg)](https://pypi.python.org/pypi/jenkspy) + +## Usage + +Two ways of using `jenkspy` are available: + +- by using the `jenks_breaks` function which takes as input +a [`list`](https://docs.python.org/3/library/stdtypes.html#list) +/ [`tuple`](https://docs.python.org/3/library/stdtypes.html#tuple) +/ [`array.array`](https://docs.python.org/3/library/array.html#array.array) +/ [`numpy.ndarray`](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html) of integers or floats and returns a list of values that correspond to the limits of the classes (starting with the minimum value of the series - the lower bound of the first class - and ending with its maximum value - the upper bound of the last class). + +```python +>>> import jenkspy +>>> import json + +>>> with open('tests/test.json', 'r') as f: +... # Read some data from a JSON file +... data = json.loads(f.read()) +... +>>> jenkspy.jenks_breaks(data, n_classes=5) # Asking for 5 classes +[0.0028109620325267315, 2.0935479691252112, 4.205495140049607, 6.178148351609707, 8.09175917180255, 9.997982932254672] +# ^ ^ ^ ^ ^ ^ +# Lower bound Upper bound Upper bound Upper bound Upper bound Upper bound +# 1st class 1st class 2nd class 3rd class 4th class 5th class +# (Minimum value) (Maximum value) +``` + +- by using the `JenksNaturalBreaks` class that is inspired by `scikit-learn` classes. + +The `.fit` and `.group` behavior is slightly different from `jenks_breaks`, +by accepting value outside the range of the minimum and maximum value of `breaks_`, +retaining the input size. It means that fit and group will use only the `inner_breaks_`. +All value below the min bound will be included in the first group and all value higher than the max bound will be included in the last group. + +```python +>>> from jenkspy import JenksNaturalBreaks + +>>> x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + +>>> jnb = JenksNaturalBreaks(4) # Asking for 4 clusters + +>>> jnb.fit(x) # Create the clusters according to values in 'x' +>>> print(jnb.labels_) # Labels for fitted data +... print(jnb.groups_) # Content of each group +... print(jnb.breaks_) # Break values (including min and max) +... print(jnb.inner_breaks_) # Inner breaks (ie breaks_[1:-1]) +[0 0 0 1 1 1 2 2 2 3 3 3] +[array([0, 1, 2]), array([3, 4, 5]), array([6, 7, 8]), array([ 9, 10, 11])] +[0.0, 2.0, 5.0, 8.0, 11.0] +[2.0, 5.0, 8.0] + +>>> print(jnb.predict(15)) # Predict the group of a value +3 + +>>> print(jnb.predict([2.5, 3.5, 6.5])) # Predict the group of several values +[1 1 2] + +>>> print(jnb.group([2.5, 3.5, 6.5])) # Group the elements into there groups +[array([], dtype=float64), array([2.5, 3.5]), array([6.5]), array([], dtype=float64)] +``` + +## Installation + +- **From pypi** + +```shell +pip install jenkspy +``` + +- **From source** + +```shell +git clone http://github.com/mthh/jenkspy +cd jenkspy/ +python setup.py install +``` + +- **For anaconda users** + +```shell +conda install -c conda-forge jenkspy +``` + +## Requirements + +- [Numpy](https://numpy.org) + +- Only for building from source: C compiler, Python C headers, setuptools and Cython. + + +## Motivation: + +- Making a painless installing C extension so it could be used more easily + as a dependency in an other package (and so learning how to build wheels + using *appveyor* / *travis* at first - now it uses *GitHub Actions*). +- Getting the break values! (and fast!). No fancy functionality provided, + but contributions/forks/etc are welcome. +- Other python implementations are currently existing but not as fast or not available on PyPi. diff --git a/README.rst b/README.rst deleted file mode 100644 index 6ac8627..0000000 --- a/README.rst +++ /dev/null @@ -1,123 +0,0 @@ -Fast Fisher-Jenks breaks for Python -=================================== - -Compute "natural breaks" (*Fisher-Jenks algorithm*) on list / tuple / array / numpy.ndarray of integers/floats. - -The algorithm implemented by this library is also sometimes referred to as *Fisher-Jenks algorithm*, *Jenks Optimisation Method* or *Fisher exact optimization method*. This is a deterministic method to calculate the optimal class boundaries. - -Intended compatibility: CPython 3.6+ - -Wheels are provided via PyPI for Windows / MacOS / Linux users - Also available on conda-forge channel for Anaconda users. - -|Version| |Anaconda-Server Badge| |Build Status GH| |PyPI download month| - -Usage ------ - -Two ways of using `jenkspy` are available: - -- by using the ``jenks_breaks`` function which takes as input a `list `_ / `tuple `_ / `array.array `_ / `numpy.ndarray `_ of integers or floats and returns a list of values that correspond to the limits of the classes (starting with the minimum value of the series - the lower bound of the first class - and ending with its maximum value - the upper bound of the last class). - -.. code:: python - - >>> import jenkspy - >>> import json - - >>> with open('tests/test.json', 'r') as f: - ... # Read some data from a JSON file - ... data = json.loads(f.read()) - ... - >>> jenkspy.jenks_breaks(data, n_classes=5) # Asking for 5 classes - [0.0028109620325267315, 2.0935479691252112, 4.205495140049607, 6.178148351609707, 8.09175917180255, 9.997982932254672] - # ^ ^ ^ ^ ^ ^ - # Lower bound Upper bound Upper bound Upper bound Upper bound Upper bound - # 1st class 1st class 2nd class 3rd class 4th class 5th class - # (Minimum value) (Maximum value) - - -- by using the ``JenksNaturalBreaks`` class that is inspired by ``scikit-learn`` classes. - -The ``.fit`` and ``.group`` behavior is slightly different from ``jenks_breaks``, by accepting value outside the range of the minimum and maximum value of ``breaks_``, retaining the input size. It means that fit and group will use only the ``inner_breaks_``. All value below the min bound will be included in the first group and all value higher than the max bound will be included in the last group. - -.. code:: python - - >>> from jenkspy import JenksNaturalBreaks - - >>> x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] - - >>> jnb = JenksNaturalBreaks(4) # Asking for 4 clusters - - >>> jnb.fit(x) # Create the clusters according to values in 'x' - >>> print(jnb.labels_) # Labels for fitted data - ... print(jnb.groups_) # Content of each group - ... print(jnb.breaks_) # Break values (including min and max) - ... print(jnb.inner_breaks_) # Inner breaks (ie breaks_[1:-1]) - [0 0 0 1 1 1 2 2 2 3 3 3] - [array([0, 1, 2]), array([3, 4, 5]), array([6, 7, 8]), array([ 9, 10, 11])] - [0.0, 2.0, 5.0, 8.0, 11.0] - [2.0, 5.0, 8.0] - - >>> print(jnb.predict(15)) # Predict the group of a value - 3 - - >>> print(jnb.predict([2.5, 3.5, 6.5])) # Predict the group of several values - [1 1 2] - - >>> print(jnb.group([2.5, 3.5, 6.5])) # Group the elements into there groups - [array([], dtype=float64), array([2.5, 3.5]), array([6.5]), array([], dtype=float64)] - - -Installation ------------- - -+ **From pypi** - -.. code:: shell - - pip install jenkspy - - -+ **From source** - -.. code:: shell - - git clone http://github.com/mthh/jenkspy - cd jenkspy/ - python setup.py install - -+ **For anaconda users** - -.. code:: shell - - conda install -c conda-forge jenkspy - - -Requirements : --------------- - -- `Numpy `_ - -- Only for building from source: C compiler, Python C headers, setuptools and Cython. - - -Motivation : ------------- - -- Making a painless installing C extension so it could be used more easily - as a dependency in an other package (and so learning how to build wheels - using *appveyor* / *travis* at first - now it uses *GitHub Actions*). -- Getting the break values! (and fast!). No fancy functionality provided, - but contributions/forks/etc are welcome. -- Other python implementations are currently existing but not as fast or not available on PyPi. - -.. |Build status GH| image:: https://github.com/mthh/jenkspy/actions/workflows/wheel.yml/badge.svg - :target: https://github.com/mthh/jenkspy/actions/workflows/wheel.yml - -.. |Version| image:: https://img.shields.io/pypi/v/jenkspy.svg?color=007ec6 - :target: https://pypi.python.org/pypi/jenkspy - -.. |Anaconda-Server Badge| image:: https://anaconda.org/conda-forge/jenkspy/badges/version.svg - :target: https://anaconda.org/conda-forge/jenkspy - -.. |PyPI download month| image:: https://img.shields.io/pypi/dm/jenkspy.svg - :target: https://pypi.python.org/pypi/jenkspy diff --git a/setup.py b/setup.py index 546b24e..1214b44 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ __version__ = parse(next(filter(lambda line: line.startswith('__version__'), f))).body[0].value.s -with open('README.rst') as f: +with open('README.md') as f: long_desc = f.read() with open('requirements.txt') as f: