Merge pull request #31 from nitinmad/py312_dev

Py312 dev
hachmannlab · Jan 20, 2025 · 1b648c6 · 1b648c6
2 parents 21d799b + 4e8f731
commit 1b648c6
Show file tree

Hide file tree

Showing 72 changed files with 41,907 additions and 4,146 deletions.
diff --git a/.codecov.yml b/.codecov.yml
diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -1,5 +1,5 @@
 python:
-  version: 3.8
+  version: 3.12
   install:
     - method: pip
       path: .

diff --git a/.travis.yml b/.travis.yml
@@ -8,6 +8,11 @@ matrix:
   include:
     - python: 3.6
     - python: 3.7
+    - python: 3.8
+    - python: 3.9
+    - python: 3.10
+    - python: 3.11
+    - python: 3.12
 
 # will still build pull requests
 branches:
@@ -46,10 +51,10 @@ install:
   - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION
   - source activate test-environment
   # Install the library with test requirements
-  - conda install -c conda-forge openbabel rdkit tensorflow numpy scipy pandas scikit-learn matplotlib seaborn future six
-  - pip install pytest
+  - conda install -c conda-forge openbabel tensorflow numpy scipy pandas scikit-learn matplotlib seaborn future six
+  - pip install pytest rdkit shap lime
   - pip install -e .[tests]
-  - pip install codecov
+  - pip install codecov-cli
 
 
 

diff --git a/README.md b/README.md
@@ -29,27 +29,32 @@ The format of library is similar to the well known libraries like Scikit-learn.
    - to find out about the latest version and release history, click [here](https://pypi.org/project/chemml/#history)
 
 ## Installation and Dependencies:
-You can download ChemML from PyPI via pip.
+We strongly recommend you to install ChemML in an Anaconda environment. The instructions to create the environment, install ChemML’s dependencies, and subsequently install ChemML using the Python Package Index (PyPI) via pip are as follows:
 
-    pip install chemml --user -U
+    conda create --name chemml_env python=3.12
+    source activate chemml_env
+    conda install -c conda-forge openbabel nb_conda_kernels python-graphviz
+    pip install chemml
 
 Here is a list of external libraries that will be installed with chemml:
    - numpy
    - pandas
    - tensorflow
+   - rdkit
    - scikit-learn
    - matplotlib
    - seaborn
    - lxml
    - openpyxl
    - ipywidgets
+   - shap
+   - lime
 
-We strongly recommend you to install ChemML in an Anaconda environment. The instructions to create the environment, install ChemML’s dependencies, and subsequently install Chemml using the Python Package Index (PyPI) via pip are as follows:
+We also require the user to install PyTorch depending on their operating system and GPU configuration, which can be done by following the wizard on this page:
 
-    conda create --name chemml_env python=3.8
-    source activate chemml_env
-    conda install -c conda-forge openbabel rdkit nb_conda_kernels python-graphviz
-    pip install chemml
+https://pytorch.org/get-started/locally/
+
+Note: The PyTorch CUDA 12.4 install has been tested with CUDA 12.5 and works fine, but future CUDA versions may not be compatible. If any issues arise from CUDA compatibility, it is recommended to follow the instructions on the PyTorch website to compile from source.
 
 ## Citation:
 Please cite the use of ChemML as:
@@ -112,6 +117,7 @@ ChemML is distributed under 3-Clause BSD License (https://opensource.org/license
     - Mojtaba Haghighatlari
     - Aditya Sonpal, [email protected]
     - Aatish Pradhan, [email protected]
+    - Nitin Murthy, [email protected]
     University at Buffalo - The State University of New York (UB)
 
 ### Contributors:

diff --git a/chemml/__init__.py b/chemml/__init__.py
@@ -1,6 +1,6 @@
 # __name__ = "chemml"
-__version__ = "1.2"
-__author__ = ["Aditya Sonpal ([email protected])", "Gaurav Vishwakarma ([email protected]) ", "Aatish Pradhan ([email protected])","Mojtaba Haghighatlari ([email protected])", "Johannes Hachmann ([email protected])"]
+__version__ = "1.3.1"
+__author__ = ["Aditya Sonpal ([email protected])", "Gaurav Vishwakarma ([email protected]) ", "Aatish Pradhan ([email protected])", "Nitin Murthy ([email protected])","Mojtaba Haghighatlari ([email protected])", "Johannes Hachmann ([email protected])"]
 
 
 # import sys

diff --git a/chemml/autoML/models_dict.py b/chemml/autoML/models_dict.py
@@ -8,11 +8,13 @@
     "RandomForestRegressor":"sklearn.ensemble",
     "DecisionTreeRegressor":"sklearn.tree",
     "LogisticRegression": "sklearn.linear_model",
+    "XGBRegressor": "xgboost",
+    "LGBMRegressor": "lightgbm",
     "RandomForestClassifier": "sklearn.ensemble",
     "DecisionTreeClassifier": "sklearn.tree",
     "SVC": "sklearn.svm",
     "KNeighborsClassifier": "sklearn.neighbors",
-    # "LightGBM":,
-    # "XGBoost":,
+    "LGBMClassifier": "lightgbm",
+    "XGBClassifier": "xgboost",
 
 }
diff --git a/chemml/autoML/space.py b/chemml/autoML/space.py
@@ -60,8 +60,40 @@
                                 {'dummy': {'uniform': [np.log(0.0001), np.log(0.1)],                
                                 'mutation': [0, 1]}},
                                 ],
+                'XGBRegressor':[
+                                {'n_estimators': {'choice': np.random.randint(100,200,size=10).tolist()}},
+                                {'reg_alpha': {'uniform': [np.log(0.0001), np.log(0.1)],                
+                                'mutation': [0, 1]}}, 
+                                {'reg_lambda': {'uniform': [np.log(0.0001), np.log(0.1)],                
+                                'mutation': [0, 1]}},
+                ]
                 }
 
+'''
+# XGBoost Hyper Parameter Optimization - Copilot data
+hyperparameters = {
+    'learning_rate': [0.01, 0.05, 0.1, 0.2, 0.3],  # Typical values range from 0.01 to 0.3[^1^][4]
+    'max_depth': [3, 5, 7, 9],  # Maximum depth of a tree
+    'min_child_weight': [1, 3, 5],  # Minimum sum of instance weight needed in a child
+    'gamma': [0, 0.1, 0.2],  # Minimum loss reduction required to make a further partition on a leaf node of the tree
+    'colsample_bytree': [0.3, 0.5, 0.7, 1],  # Subsample ratio of columns when constructing each tree
+    'n_estimators': [100, 200, 300, 400, 500],  # Number of gradient boosted trees. Equivalent to number of boosting rounds
+    'subsample': [0.5, 0.7, 1],  # Subsample ratio of the training instances
+    'reg_alpha': [0, 0.5, 1],  # L1 regularization term on weights
+    'reg_lambda': [1, 1.5, 2]  # L2 regularization term on weights
+}
+LightGBM Regressor hyperparameter space
+hyperparameters = {
+    'num_leaves': [31, 50, 70, 90],  # Maximum tree leaves for base learners
+    'learning_rate': [0.01, 0.05, 0.1, 0.2, 0.3],
+    'n_estimators': [100, 200, 300, 400, 500],
+    'max_depth': [-1, 5, 10, 15],  # Maximum tree depth for base learners
+    'min_data_in_leaf': [10, 20, 30, 40, 50],  # Minimum number of data in one leaf
+    'bagging_fraction': [0.5, 0.7, 0.9, 1]  # Subsample ratio of the training instance
+}
+
+'''
+
 space_models_classifiers = {
                 "LogisticRegression": [
                         {'C': {'choice': np.linspace(start=0.1, stop=100, num=20, endpoint=True).tolist()}},
@@ -98,4 +130,29 @@
                         {'dummy': {'uniform': [np.log(0.0001), np.log(0.1)],                
                                 'mutation': [0, 1]}}
                         ],
-                }
+                }
+
+
+'''
+XGBoost Classifier hyperparameter space
+hyperparameters = {
+    'learning_rate': [0.01, 0.05, 0.1, 0.2, 0.3],
+    'max_depth': [3, 5, 7, 9],
+    'min_child_weight': [1, 3, 5],
+    'gamma': [0, 0.1, 0.2],
+    'colsample_bytree': [0.3, 0.5, 0.7, 1],
+    'n_estimators': [100, 200, 300, 400, 500],
+    'subsample': [0.5, 0.7, 1],
+    'reg_alpha': [0, 0.5, 1],
+    'reg_lambda': [1, 1.5, 2]
+}
+LightGBM Classifier hyperparameter space
+hyperparameters = {
+    'num_leaves': [31, 50, 70, 90],  # Maximum tree leaves for base learners
+    'learning_rate': [0.01, 0.05, 0.1, 0.2, 0.3],
+    'n_estimators': [100, 200, 300, 400, 500],
+    'max_depth': [-1, 5, 10, 15],  # Maximum tree depth for base learners
+    'min_data_in_leaf': [10, 20, 30, 40, 50],  # Minimum number of data in one leaf
+    'bagging_fraction': [0.5, 0.7, 0.9, 1]  # Subsample ratio of the training instance
+}
+'''
diff --git a/chemml/chem/RDKFP.py b/chemml/chem/RDKFP.py
@@ -89,7 +89,7 @@ def represent(self, molecules):
         elif isinstance(molecules, Molecule):
             molecules = np.array([molecules])
         else:
-            msg = "The molecule must be a chemml.chem.Molecule object or a list of objets."
+            msg = "The molecule must be a chemml.chem.Molecule object or a list of objects."
             raise ValueError(msg)
 
         if molecules.ndim >1:

diff --git a/chemml/chem/__init__.py b/chemml/chem/__init__.py
@@ -17,6 +17,7 @@
 from .CoulMat import BagofBonds
 from .RDKFP import RDKitFingerprint
 from .Dragon import Dragon
+from .foss_descriptors import RDKDesc, Mordred, PadelDesc
 from .local_features import atom_features
 from .local_features import bond_features
 from .local_features import num_atom_features
@@ -30,6 +31,9 @@
     'BagofBonds',
     'RDKitFingerprint',
     'Dragon',
+    'RDKDesc',
+    'Mordred',
+    'PadelDesc',
     'atom_features',
     'bond_features',
     'num_atom_features',