diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000..1bccc1fa
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+*.h5 filter=lfs diff=lfs merge=lfs -text
diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml
index d6dcaec1..17af60f8 100644
--- a/.github/workflows/pytest.yaml
+++ b/.github/workflows/pytest.yaml
@@ -18,10 +18,13 @@ jobs:
         # windows-latest is not supported because pyscf is not supported on windows
         # https://pyscf.org/user/install.html
         os: ["ubuntu-latest", "macos-latest"]
-        py: ["3.9", "3.10", "3.11", "3.12"]
+        py: ["3.10", "3.11", "3.12"]
 
     steps:
       - uses: "actions/checkout@v4"
+        # Whether to download Git-LFS files
+        with:
+          lfs: true
 
       - name: Setup python for test ${{ matrix.py }}
         uses: actions/setup-python@v5
@@ -30,13 +33,14 @@ jobs:
 
       - name: Install development version
         run: |
-          pip install -v .
+          pip install -e .
 
       - name: Install extra test dependencies
         run: |
           pip install --upgrade pip
           pip install .[test_extra]
 
+
       - name: Run pytest default tests
         uses: pavelzw/pytest-action@v2
         with:
@@ -59,3 +63,4 @@ jobs:
           click-to-expand: true
           report-title: 'Dev Test Report'
           pytest-args: '-m dev'
+
diff --git a/atomdb/data/database_beta_1.3.0.h5 b/atomdb/data/database_beta_1.3.0.h5
index 744f7497..b4904224 100644
Binary files a/atomdb/data/database_beta_1.3.0.h5 and b/atomdb/data/database_beta_1.3.0.h5 differ
diff --git a/atomdb/data/elements_data.h5 b/atomdb/data/elements_data.h5
new file mode 100644
index 00000000..f8cb3e66
--- /dev/null
+++ b/atomdb/data/elements_data.h5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fde2a5f5db8c0adb8418016ea8b85d5f12af4e2e40a7f07bef7bcfe474ae3e81
+size 105117616
diff --git a/atomdb/datasets/datasets_data.h5 b/atomdb/datasets/datasets_data.h5
new file mode 100644
index 00000000..b4c77869
--- /dev/null
+++ b/atomdb/datasets/datasets_data.h5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afcf1e437f143d5861f8c30cd890cc7996c40ddc12bb112d949f4db64537ed74
+size 922707071
diff --git a/atomdb/datasets/slater/h5file_creator.py b/atomdb/datasets/slater/h5file_creator.py
new file mode 100644
index 00000000..c5c8033b
--- /dev/null
+++ b/atomdb/datasets/slater/h5file_creator.py
@@ -0,0 +1,420 @@
+import warnings
+import numpy as np
+from importlib_resources import files
+import tables as pt
+from dataclasses import asdict
+from atomdb.datasets.slater.run import NPOINTS
+from atomdb.periodic_test import element_symbol_map, get_scalar_data, ElementAttr
+
+
+# Suppresses NaturalNameWarning warnings from PyTables.
+warnings.filterwarnings("ignore", category=pt.NaturalNameWarning)
+
+max_norba = 56
+
+SLATER_PROPERTY_CONFIGS = [
+    {
+        "SpeciesInfo": "elem",
+        "type": "string",
+    },
+    {
+        "SpeciesInfo": "nexc",
+        "type": "int",
+    },
+    {
+        "SpeciesInfo": "charge",
+        "type": "int",
+    },
+    {
+        "SpeciesInfo": "mult",
+        "type": "int",
+    },
+    {
+        "SpeciesInfo": "nelec",
+        "type": "int",
+    },
+    {
+        "SpeciesInfo": "nspin",
+        "type": "int",
+    },
+    {
+        "SpeciesInfo": "energy",
+        "type": "float",
+    },
+    {
+        "SpeciesInfo": "ip",
+        "type": "float",
+    },
+    {
+        "SpeciesInfo": "mu",
+        "type": "float",
+    },
+    {
+        "SpeciesInfo": "eta",
+        "type": "float",
+    },
+    {
+        "SpeciesInfo": "nbasis",
+        "type": "int",
+    },
+    {
+        "property": "obasis_name",
+        "table_name": "obasis_name",
+        "description": "Orbital basis name",
+        "type": "string",
+    },
+    {
+        "array_property": "mo_energy_a",
+        "table_name": "mo_energy_a",
+        "description": "Alpha MO Energies",
+    },
+    {
+        "array_property": "mo_energy_b",
+        "table_name": "mo_energy_b",
+        "description": "Beta MO Energies",
+    },
+    {
+        "array_property": "mo_occs_a",
+        "table_name": "mo_occs_a",
+        "description": "Alpha MO Occupations",
+    },
+    {
+        "array_property": "mo_occs_b",
+        "table_name": "mo_occs_b",
+        "description": "Alpha MO Energies",
+    },
+    {"Carray_property": "rs", "table_name": "rs", "folder": "RadialGrid", "spins": "no"},
+    {
+        "Carray_property": "mo_dens_a",
+        "table_name": "mo_dens_a",
+        "folder": "Density",
+        "spins": "yes",
+    },
+    {
+        "Carray_property": "mo_dens_b",
+        "table_name": "mo_dens_b",
+        "folder": "Density",
+        "spins": "yes",
+    },
+    {"Carray_property": "dens_tot", "table_name": "dens_tot", "folder": "Density", "spins": "no"},
+    {
+        "Carray_property": "mo_d_dens_a",
+        "table_name": "mo_d_dens_a",
+        "folder": "DensityGradient",
+        "spins": "yes",
+    },
+    {
+        "Carray_property": "mo_d_dens_b",
+        "table_name": "mo_d_dens_b",
+        "folder": "DensityGradient",
+        "spins": "yes",
+    },
+    {
+        "Carray_property": "d_dens_tot",
+        "table_name": "d_dens_tot",
+        "folder": "DensityGradient",
+        "spins": "no",
+    },
+    {
+        "Carray_property": "mo_dd_dens_a",
+        "table_name": "mo_dd_dens_a",
+        "folder": "DensityLaplacian",
+        "spins": "yes",
+    },
+    {
+        "Carray_property": "mo_dd_dens_b",
+        "table_name": "mo_dd_dens_b",
+        "folder": "DensityLaplacian",
+        "spins": "yes",
+    },
+    {
+        "Carray_property": "dd_dens_tot",
+        "table_name": "dd_dens_tot",
+        "folder": "DensityLaplacian",
+        "spins": "no",
+    },
+    {
+        "Carray_property": "mo_ked_a",
+        "table_name": "mo_ked_a",
+        "folder": "KineticEnergyDensity",
+        "spins": "yes",
+    },
+    {
+        "Carray_property": "mo_ked_b",
+        "table_name": "mo_ked_b",
+        "folder": "KineticEnergyDensity",
+        "spins": "yes",
+    },
+    {
+        "Carray_property": "ked_tot",
+        "table_name": "ked_tot",
+        "folder": "KineticEnergyDensity",
+        "spins": "no",
+    },
+]
+
+
+class IntPropertyDescription(pt.IsDescription):
+    value = pt.Int32Col()
+
+
+class StringPropertyDescription(pt.IsDescription):
+    value = pt.StringCol(25)
+
+
+class FloatPropertyDescription(pt.IsDescription):
+    value = pt.Float64Col()
+
+
+# static definition
+class ArrayPropertyDescription(pt.IsDescription):
+    value = pt.Float64Col(shape=(max_norba,))
+
+
+class SpeciesInfo(pt.IsDescription):
+    """Schema for SpeciesInfo table."""
+
+    elem = pt.StringCol(25)
+    charge = pt.Int32Col()
+    mult = pt.Int32Col()
+    nexc = pt.Int32Col()
+    nelec = pt.Int32Col()
+    nspin = pt.Int32Col()
+    nbasis = pt.Int32Col()
+    energy = pt.Float64Col()
+    ip = pt.Float64Col()
+    mu = pt.Float64Col()
+    eta = pt.Float64Col()
+
+
+def create_species_info_table(species_info_table_row, prop_name, prop_type, value):
+    """Adds a property column to speciesInfo table.
+
+    Args:
+        table_row (dict): single row in the table that holds all the columns.
+        prop_name (str): Name of the property column to add to the table.
+        prop_type (str): Data type of the property ('int', 'string', or 'float').
+        value: The value to store in the column.
+
+    """
+    if prop_type == "int":
+        value = int(value) if value is not None else 0
+
+    elif prop_type == "string":
+        value = str(value) if value is not None else ""
+
+    elif prop_type == "float":
+        value = float(value) if value is not None else np.nan
+
+    species_info_table_row[prop_name] = value
+
+
+def create_properties_tables(hdf5_file, parent_folder, config, value):
+    """Creates a table for storing properties in the HDF5 file.
+
+    Args:
+        hdf5_file (tables.File): The open HDF5 file where the table will be created.
+        parent_folder (tables.Group): The parent folder in the HDF5 file where the table will be stored.
+        config (dict): Configuration dictionary containing table metadata, including:
+            - 'table_name': Name of the table.
+            - 'description': Description of the table.
+            - 'type': Data type of the property ('int', 'string', or 'float').
+        value: The value to store in the table.
+    """
+
+    # Extract table metadata from config.
+    table_name = config["table_name"]
+    table_description = config["description"]
+    type = config["type"]
+
+    if type == "int":
+        row_description = IntPropertyDescription
+        value = int(value) if value is not None else 0
+
+    elif type == "string":
+        row_description = StringPropertyDescription
+        value = str(value) if value is not None else ""
+
+    elif type == "float":
+        row_description = FloatPropertyDescription
+        value = float(value) if value is not None else np.nan
+
+    # Create the table and populate the data
+    table = hdf5_file.create_table(parent_folder, table_name, row_description, table_description)
+    row = table.row
+    row["value"] = value
+    row.append()
+    table.flush()
+
+
+def create_properties_arrays(hdf5_file, parent_folder, table_name, description, data):
+    """Creates a table for storing an array property in the HDF5 file.
+
+    Args:
+        hdf5_file (tables.File): The open HDF5 file where the array will be created.
+        parent_folder (tables.Group): The parent folder in the HDF5 file where the table will be stored.
+        table_name (str): Name of the table to create.
+        description (str): Description of the table.
+        data (numpy.ndarray): The array data to store in the table.
+    """
+    filters = pt.Filters(complevel=5, complib="blosc2")
+
+    # Create the table and populate the data
+    table = hdf5_file.create_table(
+        parent_folder, table_name, ArrayPropertyDescription, description, filters=filters
+    )
+    row = table.row
+    padded_data = np.pad(data, (0, max_norba - len(data)), "constant", constant_values=0)
+    row["value"] = padded_data
+    row.append()
+    table.flush()
+
+
+def create_spins_array(h5file, parent_folder, key, array_data, shape):
+    """Creates a  CArray for storing spin-dependent array data in the HDF5 file.
+
+    Args:
+        hdf5_file (tables.File): The open HDF5 file where the CArray will be created.
+    parent_folder (tables.Group): The parent folder in the HDF5 file where the CArray will be stored.
+        key (str): Name of the CArray.
+        array_data (numpy.ndarray): The array data to store in the CArray.
+        shape (int): The total size of the CArray.
+    """
+    data_length = len(array_data)
+    filters = pt.Filters(complevel=5, complib="blosc2")
+
+    # Create the CArray and populate the data
+    array = h5file.create_carray(
+        parent_folder, key, pt.Float64Atom(), shape=(shape,), filters=filters
+    )
+    array[:data_length] = array_data
+    array[data_length:] = 0
+
+
+def create_tot_array(h5file, parent_folder, key, array_data):
+    """Creates a CArray for storing total (non-spin-dependent) array data in the HDF5 file.
+
+    Args:
+        h5file (tables.File): The open HDF5 file where the CArray will be created.
+        parent_folder (tables.Group): The parent folder in the HDF5 file where the CArray will be stored.
+        key (str): Name of the CArray.
+        array_data (numpy.ndarray): The array data to store in the CArray.
+    """
+    data_length = len(array_data)
+    filters = pt.Filters(complevel=5, complib="blosc2")
+
+    # Create the CArray and populate the data
+    tot_gradient_array = h5file.create_carray(
+        parent_folder, key, pt.Float64Atom(), shape=(NPOINTS,), filters=filters
+    )
+    if data_length < NPOINTS:
+        tot_gradient_array[:data_length] = array_data
+        tot_gradient_array[data_length:] = 0
+
+    else:
+        tot_gradient_array[:] = array_data
+
+
+def create_hdf5_file(DATASETS_H5FILE, fields, dataset, mult):
+    """Creates an HDF5 folder with structured data for a specific dataset and element.
+
+    Args:
+        DATASETS_H5FILE (tables.File): An open PyTables HDF5 file object to store the data.
+        fields (dataclass): A dataclass containing the fields to store in the HDF5 file.
+        dataset (str): Name of the dataset.
+        mult (int): Multiplicity.
+    """
+    fields = asdict(fields)
+    dataset = dataset.lower()
+    shape = NPOINTS * max_norba
+
+    elem = fields["elem"]
+    nexc = fields["nexc"]
+    atnum = element_symbol_map[elem][ElementAttr.atnum]
+    charge = atnum - fields["nelec"]
+
+    # charge and mult can be calculated (instead of passing them)?
+    dataset_folder = f"/Datasets/{dataset}"
+    elem_folder = f"{dataset_folder}/{elem}"
+    specific_elem_folder = f"{elem_folder}/{elem}_{charge:03d}_{mult:03d}_{nexc:03d}"
+
+    # Create dataset folder if it doesn't exist
+    if dataset_folder not in DATASETS_H5FILE:
+        DATASETS_H5FILE.create_group("/Datasets", dataset, f"{dataset} Data")
+
+    # Create element folder if it doesn't exist
+    if elem_folder not in DATASETS_H5FILE:
+        DATASETS_H5FILE.create_group(dataset_folder, elem, f"{elem} Data")
+
+    # Create specific element folder (charge/mult/nexc) if it doesn't exist
+    if specific_elem_folder not in DATASETS_H5FILE:
+        DATASETS_H5FILE.create_group(
+            elem_folder,
+            f"{elem}_{charge:03d}_{mult:03d}_{nexc:03d}",
+            f"{elem} {charge} {mult} {nexc} Data",
+        )
+
+    folders = {
+        "Properties": DATASETS_H5FILE.create_group(
+            specific_elem_folder, "Properties", "Properties Data"
+        ),
+        "RadialGrid": DATASETS_H5FILE.create_group(
+            specific_elem_folder, "RadialGrid", "Radial Grid Data"
+        ),
+        "Density": DATASETS_H5FILE.create_group(specific_elem_folder, "Density", "Density Data"),
+        "DensityGradient": DATASETS_H5FILE.create_group(
+            specific_elem_folder, "DensityGradient", "Density Gradient Data"
+        ),
+        "DensityLaplacian": DATASETS_H5FILE.create_group(
+            specific_elem_folder, "DensityLaplacian", "Density Laplacian Data"
+        ),
+        "KineticEnergyDensity": DATASETS_H5FILE.create_group(
+            specific_elem_folder, "KineticEnergyDensity", "Kinetic Energy Density Data"
+        ),
+    }
+
+    # Create basic species table and its row
+    species_info_table = DATASETS_H5FILE.create_table(
+        folders["Properties"], "species_info", SpeciesInfo, "Species Information"
+    )
+    species_info_table_row = species_info_table.row
+
+    # Create basic property tables
+    for config in SLATER_PROPERTY_CONFIGS:
+        if "SpeciesInfo" in config:
+            prop_name = config["SpeciesInfo"]
+            create_species_info_table(
+                species_info_table_row, prop_name, config["type"], fields[prop_name]
+            )
+
+        elif "property" in config:
+            prop_name = config["property"]
+            create_properties_tables(
+                DATASETS_H5FILE, folders["Properties"], config, fields[prop_name]
+            )
+
+        # Create array property tables
+        elif "array_property" in config:
+            prop_name = config["array_property"]
+            create_properties_arrays(
+                DATASETS_H5FILE,
+                folders["Properties"],
+                config["table_name"],
+                config["description"],
+                fields[prop_name],
+            )
+
+        elif "Carray_property" in config:
+            prop_name = config["Carray_property"]
+            parent_folder = folders[config["folder"]]
+            if config["spins"] == "yes":
+                create_spins_array(
+                    DATASETS_H5FILE, parent_folder, config["table_name"], fields[prop_name], shape
+                )
+            elif config["spins"] == "no":
+                create_tot_array(
+                    DATASETS_H5FILE, parent_folder, config["table_name"], fields[prop_name]
+                )
+
+    species_info_table_row.append()
+    species_info_table.flush()
diff --git a/atomdb/datasets/slater/run.py b/atomdb/datasets/slater/run.py
index 391e0c91..a74f10b7 100644
--- a/atomdb/datasets/slater/run.py
+++ b/atomdb/datasets/slater/run.py
@@ -19,14 +19,15 @@
 import re
 import atomdb
 
-from atomdb.periodic import Element
 from grid.onedgrid import UniformInteger
 from grid.rtransform import ExpRTransform
 
 # from importlib_resources import files
 from atomdb.utils import DEFAULT_DATAPATH
 from scipy.special import factorial
-
+from dataclasses import dataclass
+from typing import Optional, Dict
+from atomdb.periodic_test import element_symbol_map, get_scalar_data
 
 __all__ = ["AtomicDensity", "load_slater_wfn", "run"]
 
@@ -39,6 +40,65 @@
 # DATAPATH = os.path.abspath(DATAPATH._paths[0])
 
 
+@dataclass
+class DefinitionClass:
+    """Data structure for the Slater dataset."""
+
+    # species info
+    elem: str
+    atnum: int
+    nelec: int
+    nspin: int
+    nexc: int
+    nbasis: int
+    charge: int
+    mult: int
+    obasis_name: str
+
+    # properties (all from multiple sources Dict[str, float] )
+    atmass: Optional[Dict[str, float]]
+    cov_radius: Optional[Dict[str, float]]
+    vdw_radius: Optional[Dict[str, float]]
+    at_radius: Optional[Dict[str, float]]
+    polarizability: Optional[Dict[str, float]]
+    dispersion: Optional[Dict[str, float]]
+
+    # [float]
+    energy: Optional[float]
+    ip: Optional[float]
+    mu: Optional[float]
+    eta: Optional[float]
+
+    # [np.ndarray]
+    mo_energy_a: Optional[np.ndarray]
+    mo_energy_b: Optional[np.ndarray]
+    mo_occs_a: Optional[np.ndarray]
+    mo_occs_b: Optional[np.ndarray]
+
+    # Radial grid
+    rs: np.ndarray = Optional[np.ndarray]
+
+    # Density
+    mo_dens_a: np.ndarray = Optional[np.ndarray]
+    mo_dens_b: np.ndarray = Optional[np.ndarray]
+    dens_tot: np.ndarray = Optional[np.ndarray]
+
+    # Density gradient
+    mo_d_dens_a: np.ndarray = Optional[np.ndarray]
+    mo_d_dens_b: np.ndarray = Optional[np.ndarray]
+    d_dens_tot: np.ndarray = Optional[np.ndarray]
+
+    # Density laplacian
+    mo_dd_dens_a: np.ndarray = Optional[np.ndarray]
+    mo_dd_dens_b: np.ndarray = Optional[np.ndarray]
+    dd_dens_tot: np.ndarray = Optional[np.ndarray]
+
+    # KED
+    mo_ked_a: np.ndarray = Optional[np.ndarray]
+    mo_ked_b: np.ndarray = Optional[np.ndarray]
+    ked_tot: np.ndarray = Optional[np.ndarray]
+
+
 class AtomicDensity:
     r"""
     Atomic Density Class.
@@ -1067,7 +1127,7 @@ def run(elem, charge, mult, nexc, dataset, datapath):
 
     # Set up internal variables
     elem = atomdb.element_symbol(elem)
-    atnum = atomdb.element_number(elem)
+    atnum = element_symbol_map[elem][0]
     nelec = atnum - charge
     nspin = mult - 1
 
@@ -1088,6 +1148,8 @@ def run(elem, charge, mult, nexc, dataset, datapath):
     # Get electronic structure data
     energy = species.energy[0]  # get energy from list
     norba = len(mo_occ) // 2
+    nbasis = norba
+
     # Get MO energies and occupations
     mo_e_up = species.orbitals_energy.ravel()[:norba]
     mo_e_dn = species.orbitals_energy.ravel()[norba:]
@@ -1121,17 +1183,13 @@ def run(elem, charge, mult, nexc, dataset, datapath):
     mo_ked_a = species.eval_orbs_ked_positive_definite(rs)[:norba, :]
     mo_ked_b = species.eval_orbs_ked_positive_definite(rs)[:norba, :]
 
-    # Get information about the element
-    atom = Element(elem)
-    atmass = atom.mass
-    cov_radius, vdw_radius, at_radius, polarizability, dispersion = [
-        None,
-    ] * 5
-    # overwrite values for neutral atomic species
-    if charge == 0:
-        cov_radius, vdw_radius, at_radius = (atom.cov_radius, atom.vdw_radius, atom.at_radius)
-        polarizability = atom.pold
-        dispersion = {"C6": atom.c6}
+    # Get periodic data
+    cov_radius = get_scalar_data("cov_radius", atnum, nelec)
+    vdw_radius = get_scalar_data("vdw_radius", atnum, nelec)
+    at_radius = get_scalar_data("at_radius", atnum, nelec)
+    polarizability = get_scalar_data("polarizability", atnum, nelec)
+    dispersion = get_scalar_data("dispersion", atnum, nelec)
+    atmass = get_scalar_data("atmass", atnum, nelec)
 
     # Conceptual-DFT properties (WIP)
     ip = -mo_e_up[np.sum(occs_up) - 1]  # - energy of HOMO
@@ -1139,10 +1197,13 @@ def run(elem, charge, mult, nexc, dataset, datapath):
     mu = None
     eta = None
 
-    # Return Species instance
-    fields = dict(
+    # Return fields
+    fields = DefinitionClass(
         elem=elem,
+        charge=charge,
+        mult=mult,
         atnum=atnum,
+        nbasis=norba,
         obasis_name="Slater",
         nelec=nelec,
         nspin=nspin,
@@ -1179,4 +1240,4 @@ def run(elem, charge, mult, nexc, dataset, datapath):
         mo_ked_b=mo_ked_b.flatten(),
         ked_tot=ked_tot,
     )
-    return atomdb.Species(dataset, fields)
+    return fields
diff --git a/atomdb/migration/datasets/datasets_data.py b/atomdb/migration/datasets/datasets_data.py
new file mode 100644
index 00000000..d9557425
--- /dev/null
+++ b/atomdb/migration/datasets/datasets_data.py
@@ -0,0 +1,20 @@
+"""
+running this file will recreate datasets_data.h5, that will lead to create an empty datasets folder once again
+"""
+
+from importlib_resources import files
+import tables as pt
+
+hdf5_file = files("atomdb.datasets").joinpath("datasets_data.h5")
+
+with pt.open_file(hdf5_file, mode="w", title="Datasets Data Files") as h5file:
+    # create the root folder 'datasets'
+    datasets_folder = h5file.create_group("/", "Datasets", "Datasets Data")
+
+    # create a folder for each dataset to hold its data files
+    h5file.create_group(datasets_folder, "slater", "Slater dataset")
+    h5file.create_group(datasets_folder, "gaussian", "Gaussian dataset")
+    h5file.create_group(datasets_folder, "hci", "HCI dataset")
+    h5file.create_group(datasets_folder, "nist", "NIST dataset")
+    h5file.create_group(datasets_folder, "numeric", "Numeric dataset")
+    h5file.create_group(datasets_folder, "uhf_augccpvdz", "UHF aug-cc-pVDZ dataset")
diff --git a/atomdb/migration/periodic/elements_data.py b/atomdb/migration/periodic/elements_data.py
new file mode 100644
index 00000000..33a6c636
--- /dev/null
+++ b/atomdb/migration/periodic/elements_data.py
@@ -0,0 +1,374 @@
+import csv
+import tables as pt
+import numpy as np
+from importlib_resources import files
+import warnings
+from atomdb.utils import CONVERTOR_TYPES
+
+# Suppresses NaturalNameWarning warnings from PyTables.
+warnings.filterwarnings("ignore", category=pt.NaturalNameWarning)
+
+# Set-up variables
+elements_data_csv = files("atomdb.data").joinpath("elements_data.csv")
+data_info_csv = files("atomdb.data").joinpath("data_info.csv")
+hdf5_file = files("atomdb.data").joinpath("elements_data.h5")
+
+
+# Properties of each element in the HDF5 file.
+PROPERTY_CONFIGS = [
+    {
+        "basic_property": "atnum",
+        "table_name": "atnum",
+        "description": "Atom Number",
+        "type": "int",
+    },
+    {
+        "basic_property": "symbol",
+        "table_name": "symbol",
+        "description": "Atom Symbol",
+        "type": "string",
+    },
+    {
+        "basic_property": "name",
+        "table_name": "name",
+        "description": "Atom Name",
+        "type": "string",
+    },
+    {
+        "basic_property": "group",
+        "table_name": "group",
+        "description": "Atom Group",
+        "type": "int",
+    },
+    {
+        "basic_property": "period",
+        "table_name": "period",
+        "description": "Atom Period",
+        "type": "int",
+    },
+    {
+        "basic_property": "mult",
+        "table_name": "mult",
+        "description": "Atom multiplicity",
+        "type": "int",
+    },
+    {"property": "cov_radius", "table_name": "cov_radius", "description": "Covalent Radius"},
+    {"property": "vdw_radius", "table_name": "vdw_radius", "description": "Van der Waals Radius"},
+    {
+        "property": "at_radius",
+        "group": "Radius",
+        "table_name": "at_radius",
+        "description": "Atomic Radius",
+    },
+    {"property": "mass", "table_name": "atmass", "description": "Atomic Mass"},
+    {"property": "pold", "table_name": "polarizability", "description": "Polarizability"},
+    {"property": "c6", "table_name": "dispersion_c6", "description": "C6 Dispersion Coefficient"},
+    {"property": "eneg", "table_name": "eneg", "description": "Electronegativity"},
+]
+
+
+class NumberElementDescription(pt.IsDescription):
+    value = pt.Int32Col()
+
+
+class StringElementDescription(pt.IsDescription):
+    value = pt.StringCol(25)
+
+
+class PropertyValues(pt.IsDescription):
+    """Schema for property value tables."""
+
+    source = pt.StringCol(30, pos=0)
+    unit = pt.StringCol(20, pos=1)
+    value = pt.Float64Col(pos=2)
+
+
+class ElementsDataInfo(pt.IsDescription):
+    """Schema for the property_info table."""
+
+    property_key = pt.StringCol(20, pos=0)
+    property_name = pt.StringCol(50, pos=1)
+    source_key = pt.StringCol(30, pos=2)
+    property_description = pt.StringCol(250, pos=3)
+    reference = pt.StringCol(250, pos=4)
+    doi = pt.StringCol(150, pos=5)
+    notes = pt.StringCol(500, pos=6)
+
+
+def create_properties_tables(
+    hdf5_file,
+    parent_folder,
+    table_name,
+    table_description,
+    row_description,
+    columns,
+    row_data,
+    sources_data,
+    units_data,
+):
+    """
+    Create a table in the HDF5 file for a specific properties.
+
+    Args:
+        hdf5_file: PyTables file object.
+        parent_folder: Group where the table will be created.
+        table_name (str): Name of the table.
+        table_description (str): Description of the table.
+        row_description: PyTables IsDescription class for the table schema.
+        columns (list): List of column names from the CSV to include.
+        row_data (dict): Data for the current element.
+        sources_data (dict): sources of each property.
+        units_data (dict): units of each property.
+    """
+
+    # Creates a new table in the HDF5 file.
+    table = hdf5_file.create_table(parent_folder, table_name, row_description, table_description)
+
+    # Iterates over the list of columns relevant to the current table.
+    for col in columns:
+        source = sources_data.get(col, "unknown")  # defaulting to 'unknown' if not found.
+        unit = units_data.get(col, "unknown")  # defaulting to 'unknown' if not found.
+        value = np.nan
+
+        if col in row_data and row_data[col].strip():
+            try:
+                value = float(row_data[col])
+                value = CONVERTOR_TYPES[unit](value)
+            except (ValueError, TypeError):
+                value = np.nan
+
+        # Creates a new row in the table.
+        row = table.row
+        row["source"] = source.encode("utf-8") if source else ""
+        row["unit"] = unit.encode("utf-8") if unit else ""
+        row["value"] = value
+        row.append()
+
+    # Flushes the table to ensure all data is written to the HDF5 file.
+    table.flush()
+
+
+def create_basic_properties_tables(
+    hdf5_file, parent_folder, table_name, row_description, table_description, value, prop_type
+):
+    """
+    Create a table for a single basic property.
+
+    Args:
+        hdf5_file: PyTables file object.
+        parent_folder: Group where the table will be created.
+        table_name (str): Name of the table.
+        row_description: PyTables IsDescription class for the table schema.
+        table_description (str): Description of the table.
+        value (integer or string): The value to store in the table.
+    """
+    table = hdf5_file.create_table(parent_folder, table_name, row_description, table_description)
+    row = table.row
+    if prop_type == "int":
+        row["value"] = value
+    if prop_type == "string":
+        row["value"] = value.encode("utf-8") if value else ""
+
+    row.append()
+    table.flush()
+
+
+def read_elements_data_csv(elements_data_csv):
+    """
+    Read the elements_data.csv file.
+
+    Args:
+        elements_data_csv: Path to the elements_data.csv file.
+
+    Returns:
+        - data (List): List of dictionaries containing element data.
+        - unique_headers (List): List of unique column headers.
+        - sources_data (dict): sources of each property.
+        - units_data (dict): units of each property.
+    """
+
+    # Opens the csv file, filters out comment lines (starting with #) and empty lines.
+    with open(elements_data_csv, "r") as f:
+        reader = csv.reader(f)
+        lines = [line for line in reader if not line[0].startswith("#") and any(line)]
+
+    headers = [header.strip() for header in lines[0]]  # first row as column headers
+    sources = [source.strip() for source in lines[1]]  # second row as sources
+    units = [unit.strip() for unit in lines[2]]  # third row as units
+    data_rows = lines[3:]  # remaining rows as data
+
+    # Process headers to make them unique
+    unique_headers = []
+    header_counts = {}
+    for header in headers:
+        if header in header_counts:
+            header_counts[header] += 1
+            unique_headers.append(
+                f"{header}.{header_counts[header]}"
+            )  # creates suffix (header.1, header.2) for duplicate headers
+        else:
+            header_counts[header] = 0
+            unique_headers.append(header)
+
+    # Create data as list of dictionaries
+    data = []
+    for row in data_rows:
+        data.append(dict(zip(unique_headers, row)))
+
+    sources_data = dict(zip(unique_headers, sources))
+    units_data = dict(zip(unique_headers, units))
+
+    return data, unique_headers, sources_data, units_data
+
+
+def read_data_info_csv(data_info_csv):
+    """
+    Read and parse the data_info.csv file containing metadata.
+
+    Args:
+        data_info_csv: Path to the data_info.csv file.
+
+    Returns:
+        data_info (List): List of dictionaries containing metadata for each property.
+    """
+    # Opens the csv file, filters out comment lines (starting with #) and empty lines.
+    with open(data_info_csv, "r") as f:
+        lines = []
+        for line in f:
+            stripped = line.strip()
+            if stripped and not stripped.startswith("#"):
+                lines.append(stripped)
+
+        # hardcode the headers
+        data_info_headers = [
+            "Property key",
+            "Property name",
+            "Source key",
+            "Property description",
+            "Reference",
+            "doi",
+            "Notes",
+        ]
+
+        reader = csv.reader(lines)
+        data_rows = list(reader)
+
+        data_info = []
+        for row in data_rows:
+            data_info.append(dict(zip(data_info_headers, row)))
+
+    return data_info
+
+
+def write_elements_data_to_hdf5(data, unique_headers, sources_data, units_data):
+    """Write element data to an HDF5 file using PyTables.
+
+    Args:
+        data (list of dict): List of dictionaries containing element data.
+        unique_headers (list of str): List of unique column headers from the data, used to identify properties.
+        sources_data (dict): sources of each property.
+        units_data (dict): units of each property.
+    """
+    h5file = pt.open_file(hdf5_file, mode="w", title="Periodic Data")
+    elements_group = h5file.create_group("/", "Elements", "Elements Data")
+
+    for row in data:
+        atnum = int(row["atnum"]) if "atnum" in row and row["atnum"].strip() else 0
+        name = row["name"] if "name" in row and row["name"].strip() else ""
+        element_group_name = f"{atnum:03d}"
+        element_group = h5file.create_group(elements_group, element_group_name, f"Data for {name}")
+
+        # Handle basic properties
+        for config in PROPERTY_CONFIGS:
+            if "basic_property" in config:
+                property_name = config["basic_property"]
+                table_name = config["table_name"]
+                description = config["description"]
+                prop_type = config["type"]
+
+                # checking the property type to use the relevant ElementDescription class
+                if prop_type == "int":
+                    row_description = NumberElementDescription
+                    value = (
+                        int(row[property_name])
+                        if property_name in row and row[property_name].strip()
+                        else 0
+                    )
+                elif prop_type == "string":
+                    row_description = StringElementDescription
+                    value = (
+                        row[property_name]
+                        if property_name in row and row[property_name].strip()
+                        else ""
+                    )
+
+                create_basic_properties_tables(
+                    h5file,
+                    element_group,
+                    table_name,
+                    row_description,
+                    description,
+                    value,
+                    prop_type,
+                )
+
+            # handle rest of the properties
+            else:
+                columns = [col for col in unique_headers if col.startswith(config["property"])]
+                if columns:
+                    create_properties_tables(
+                        h5file,
+                        element_group,
+                        config["table_name"],
+                        config["description"],
+                        PropertyValues,
+                        columns,
+                        row,
+                        sources_data,
+                        units_data,
+                    )
+
+    h5file.close()
+
+
+def write_data_info_to_hdf5(data_info_list):
+    """
+    Write dara from data_info.csv to the HDF5 file.
+
+    Args:
+        data_info_list: List of dictionaries containing metadata.
+    """
+
+    # Opens the HDF5 file in append mode ("a") --> add metadata without overwriting existing data.
+    with pt.open_file(hdf5_file, mode="a", title="Periodic Data") as h5file:
+        data_info_group = h5file.create_group("/", "data_info", "Data Info")
+
+        property_info_table = h5file.create_table(
+            data_info_group, "property_info", ElementsDataInfo, "Property Information"
+        )
+
+        for row in data_info_list:
+            table_row = property_info_table.row
+            table_row["property_key"] = row.get("Property key", "").encode("utf-8")
+            table_row["property_name"] = row.get("Property name", "").encode("utf-8")
+            table_row["source_key"] = row.get("Source key", "").encode("utf-8")
+            table_row["property_description"] = row.get("Property description", "").encode("utf-8")
+            table_row["reference"] = row.get("Reference", "").encode("utf-8")
+            table_row["doi"] = row.get("doi", "").encode("utf-8")
+            table_row["notes"] = row.get("Notes", "").encode("utf-8")
+            table_row.append()
+        property_info_table.flush()
+
+
+if __name__ == "__main__":
+    # Read the elements data from the CSV file
+    data, unique_headers, sources_data, units_data = read_elements_data_csv(elements_data_csv)
+
+    # Read the provenance data from the CSV file
+    data_info_df = read_data_info_csv(data_info_csv)
+
+    # Write the periodic table data to an HDF5 file
+    write_elements_data_to_hdf5(data, unique_headers, sources_data, units_data)
+
+    # Write the provenance data to the HDF5 file
+    write_data_info_to_hdf5(data_info_df)
diff --git a/atomdb/periodic_test.py b/atomdb/periodic_test.py
new file mode 100644
index 00000000..2998f3d1
--- /dev/null
+++ b/atomdb/periodic_test.py
@@ -0,0 +1,108 @@
+from enum import IntEnum
+from numbers import Integral
+import tables as pt
+import numpy as np
+from importlib_resources import files
+
+
+__all__ = [
+    "PROPERTY_NAME_MAP",
+    "get_scalar_data",
+    "element_symbol_map",
+    "ElementAttr",
+]
+
+
+class ElementAttr(IntEnum):
+    atnum = 0
+    name = 1
+
+
+elements_hdf5_file = files("atomdb.data").joinpath("elements_data.h5")
+ELEMENTS_H5FILE = pt.open_file(elements_hdf5_file, mode="r")
+
+PROPERTY_NAME_MAP = {
+    "atmass": "atmass",
+    "cov_radius": "cov_radius",
+    "vdw_radius": "vdw_radius",
+    "at_radius": "at_radius",
+    "polarizability": "polarizability",
+    "dispersion_c6": "dispersion_c6",
+    "dispersion": "dispersion_c6",  # fields in run
+    "elem": "symbol",
+    "atnum": "atnum",
+    "name": "name",
+    "mult": "mult",
+}
+
+
+def get_scalar_data(prop_name, atnum, nelec):
+    """
+    Get a scalar property value for a given element.
+
+    Args:
+        prop_name (str): Property name to retrieve.
+        atnum (int): Atomic number of the element.
+        nelec (int): Number of electrons in the element.
+
+    Returns:
+        int | float | str | dict[str, float] | None:
+            - int, float, or str for single-valued properties.
+            - dict for properties with multiple sources.
+            - None
+    """
+
+    charge = atnum - nelec
+
+    if charge != 0 and prop_name not in ["atmass", "elem", "atnum", "name"]:
+        return None
+
+    # get the element group
+    element_group = f"/Elements/{atnum:03d}"
+
+    table_name = PROPERTY_NAME_MAP[prop_name]
+    table_path = f"{element_group}/{table_name}"
+
+    # get the table node from the HDF5 file
+    table = ELEMENTS_H5FILE.get_node(table_path)
+
+    # Handle basic properties (single column --> no sources)
+    if len(table.colnames) == 1 and table.colnames[0] == "value":
+        value = table[0]["value"]
+        # if the value is an int, return it as an int
+        if isinstance(value, Integral):
+            return int(value)
+        # if the value is a string, decode from bytes
+        elif isinstance(value, bytes):
+            return value.decode("utf-8")
+    else:
+        # handle properties with multiple sources
+        result = {}
+        for row in table:
+            source = row["source"].decode("utf-8")
+            value = row["value"]
+            # exclude none values
+            if not np.isnan(value):
+                result[source] = float(value)
+        return result if result else None
+
+
+def map_element_symbol():
+    """
+    Build a mapping of element symbols to their atomic number and name.
+
+    Returns:
+        dict[str, tuple[int, str]]:
+            Dictionary mapping element symbol → (atomic_number, name).
+    """
+    element_symbol_map = {}
+    for element_group in ELEMENTS_H5FILE.root.Elements:
+        symbol = element_group.symbol[0]["value"].decode("utf-8").strip()
+        atnum = element_group.atnum[0]["value"]
+        name = element_group.name[0]["value"].decode("utf-8").strip()
+        element_symbol_map[symbol] = (atnum, name)
+
+    return element_symbol_map
+
+
+element_symbol_map = map_element_symbol()
diff --git a/atomdb/species.py b/atomdb/species.py
index 2408eb05..ffe596a2 100644
--- a/atomdb/species.py
+++ b/atomdb/species.py
@@ -19,19 +19,23 @@
 import re
 from dataclasses import asdict, dataclass, field
 from importlib import import_module
-from numbers import Integral
 from os import makedirs, path
 
 import numpy as np
 import pooch
 import requests
-from msgpack import packb, unpackb
-from msgpack_numpy import decode, encode
 from numpy import ndarray
 from scipy.interpolate import CubicSpline
 
-from atomdb.periodic import Element, element_symbol
+from atomdb.periodic_test import element_symbol_map, PROPERTY_NAME_MAP, get_scalar_data, ElementAttr
 from atomdb.utils import DEFAULT_DATAPATH, DEFAULT_DATASET, DEFAULT_REMOTE
+from importlib_resources import files
+import tables as pt
+from numbers import Integral
+
+datasets_hdf5_file = files("atomdb.datasets").joinpath("datasets_data.h5")
+DATASETS_H5FILE = pt.open_file(datasets_hdf5_file, mode="a")
+
 
 __all__ = [
     "Species",
@@ -68,28 +72,12 @@ def scalar(method):
 
     @property
     def wrapper(self):
+        # Checking if the property is not in PROPERTY_NAME_MAP, if not then fetch it from SpeciesData
+        if name not in PROPERTY_NAME_MAP:
+            return getattr(self._data, name)
 
-        # Map the name of the method in the SpeciesData class to the name in the Elements class
-        # This dict can be removed if the Elements csv file uses the same names as the SpeciesData class.
-        namemap = {
-            "cov_radius": "cov_radius",
-            "vdw_radius": "vdw_radius",
-            "at_radius": "at_radius",
-            "polarizability": "pold",
-            "dispersion_c6": "c6",
-            "atmass": "mass",
-        }
-
-        if name == "atmass":
-            return getattr(Element(self._data.elem), namemap[name])
-        if name in namemap:
-            # Only return Element property if neutral, otherwise None
-            charge = self._data.atnum - self._data.nelec
-            return getattr(Element(self._data.elem), namemap[name]) if charge == 0 else None
-
-        return getattr(self._data, name)
+        get_scalar_data(name, self._data.atnum, self._data.nelec)
 
-    # conserve the docstring of the method
     wrapper.__doc__ = method.__doc__
     return wrapper
 
@@ -201,7 +189,7 @@ def __call__(self, x, deriv=0):
         else:
             y = self._obj(x, nu=deriv)
         # Handle errors from the y = exp(log y) operation -- set NaN to zero
-        np.nan_to_num(y, nan=0., copy=False)
+        np.nan_to_num(y, nan=0.0, copy=False)
         # Cutoff value: assume y(x) is zero where x > final given point x_n
         y[x > self._obj.x[-1]] = 0
         return y
@@ -231,66 +219,6 @@ def __init__(self, data) -> None:
         self.nbasis = self.norba  # number of spatial basis functions
 
 
-@dataclass(eq=False, order=False)
-class SpeciesData:
-    r"""Database entry fields for atomic and ionic species."""
-
-    # Species info
-    elem: str = field(default_factory=default_required("elem", "str"))
-    atnum: int = field(default_factory=default_required("atnum", "int"))
-    nelec: int = field(default_factory=default_required("nelec", "int"))
-    nspin: int = field(default_factory=default_required("nspin", "int"))
-    nexc: int = field(default_factory=default_required("nexc", "int"))
-
-    # Scalar properties
-    atmass: float = field(default=None)
-    cov_radius: float = field(default=None)
-    vdw_radius: float = field(default=None)
-    at_radius: float = field(default=None)
-    polarizability: float = field(default=None)
-    dispersion: float = field(default=None)
-
-    # Scalar energy and CDFT-related properties
-    energy: float = field(default=None)
-    ip: float = field(default=None)
-    mu: float = field(default=None)
-    eta: float = field(default=None)
-
-    # Basis set name
-    obasis_name: str = field(default=None)
-
-    # Radial grid
-    rs: ndarray = field(default_factory=default_vector)
-
-    # Orbital energies
-    mo_energy_a: ndarray = field(default_factory=default_vector)
-    mo_energy_b: ndarray = field(default_factory=default_vector)
-
-    # Orbital occupations
-    mo_occs_a: ndarray = field(default_factory=default_vector)
-    mo_occs_b: ndarray = field(default_factory=default_vector)
-
-    # Orbital densities
-    mo_dens_a: ndarray = field(default_factory=default_matrix)
-    mo_dens_b: ndarray = field(default_factory=default_matrix)
-    dens_tot: ndarray = field(default_factory=default_matrix)
-
-    # Orbital density gradients
-    mo_d_dens_a: ndarray = field(default_factory=default_matrix)
-    mo_d_dens_b: ndarray = field(default_factory=default_matrix)
-    d_dens_tot: ndarray = field(default_factory=default_matrix)
-
-    # Orbital density Laplacian
-    mo_dd_dens_a: ndarray = field(default_factory=default_matrix)
-    mo_dd_dens_b: ndarray = field(default_factory=default_matrix)
-    dd_dens_tot: ndarray = field(default_factory=default_matrix)
-
-    # Orbital kinetic energy densities
-    mo_ked_a: ndarray = field(default_factory=default_matrix)
-    mo_ked_b: ndarray = field(default_factory=default_matrix)
-    ked_tot: ndarray = field(default_factory=default_matrix)
-
-
 class Species:
     r"""Properties of atomic and ionic species."""
 
@@ -308,7 +236,11 @@ def __init__(self, dataset, fields, spinpol=1):
 
         """
         self._dataset = dataset.lower()
-        self._data = SpeciesData(**fields)
+        # converting fields from dict to DefinitionClass
+        submodule = import_module(f"atomdb.datasets.{dataset}.run")
+        fields = submodule.DefinitionClass(**fields)
+
+        self._data = fields
         self.spinpol = spinpol
         self.ao = _AtomicOrbitals(self._data)
 
@@ -699,7 +631,7 @@ def dd_dens_lapl_func(self, spin="t", index=None, log=False):
         Return the function for the electronic density Laplacian.
 
         .. math::
-            
+
             \nabla^2 \rho(\mathbf{r}) = \frac{d^2 \rho(r)}{dr^2} + \frac{2}{r} \frac{d \rho(r)}{dr}
 
         Parameters
@@ -714,13 +646,13 @@ def dd_dens_lapl_func(self, spin="t", index=None, log=False):
             By default, all orbitals of the given spin(s) are included.
         log : bool, default=False
             Whether the logarithm of the density is used for interpolation.
-        
+
         Returns
         -------
         Callable[np.ndarray(N,) -> np.ndarray(N,)]
             a callable function evaluating the Laplacian of the density given a set of radial
             points (1-D array).
-        
+
         Notes
         -----
         When this function is evaluated at a point close to zero, the Laplacian becomes undefined.
@@ -734,11 +666,11 @@ def dd_dens_lapl_func(self, spin="t", index=None, log=False):
         # Define the Laplacian function
         def densityspline_like_func(rs):
             # Avoid division by zero and handle small values of r
-            with np.errstate(divide='ignore'):
+            with np.errstate(divide="ignore"):
                 laplacian = dd_dens_spline(rs) + 2 * d_dens_sp_spline(rs) / rs
                 laplacian = np.where(rs < 1e-10, 0.0, laplacian)
             return laplacian
-        
+
         return densityspline_like_func
 
     @spline
@@ -796,33 +728,27 @@ def compile_species(
         Path to the local AtomDB cache, by default DEFAULT_DATAPATH variable value.
 
     """
-    # Ensure directories exist
-    makedirs(path.join(datapath, dataset.lower(), "db"), exist_ok=True)
-    makedirs(path.join(datapath, dataset.lower(), "raw"), exist_ok=True)
-    # Import the compile script for the appropriate dataset
+    # import the selected dataset compile script and get fields
     submodule = import_module(f"atomdb.datasets.{dataset}.run")
-    # Compile the Species instance and dump the database entry
-    species = submodule.run(elem, charge, mult, nexc, dataset, datapath)
-    dump(species, datapath=datapath)
+    fields = submodule.run(elem, charge, mult, nexc, dataset, datapath)
 
+    # dump the data to the HDF5 file
+    dump(fields, dataset, mult)
 
-def dump(*species, datapath=DEFAULT_DATAPATH):
-    r"""Dump the Species instance(s) to a MessagePack file in the database.
+
+def dump(fields, dataset, mult):
+    r"""Dump the compiled species data to an HDF5 file in the AtomDB database.
 
     Parameters
     ----------
-    species: Iterable
-        Iterables of objects of class `Species`
-    datapath : str, optional
-        Path to the local AtomDB cache, by default DEFAULT_DATAPATH variable value.
-
+    fields (dataclass): A dataclass containing the fields to store in the HDF5 file.
+    dataset (str): Name of the dataset.
+    mult (int): Multiplicity.
     """
-    for s in species:
-        fn = datafile(
-            s._data.elem, s.charge, s.mult, nexc=s.nexc, dataset=s.dataset, datapath=datapath
-        )
-        with open(fn, "wb") as f:
-            f.write(packb(asdict(s._data), default=encode))
+
+    # Save data to the HDF5 file
+    element_folder_creator = import_module(f"atomdb.datasets.{dataset}.h5file_creator")
+    element_folder_creator.create_hdf5_file(DATASETS_H5FILE, fields, dataset, mult)
 
 
 def load(
@@ -858,24 +784,31 @@ def load(
     Object of class Species
 
     """
-    fn = datafile(
-        elem,
-        charge,
-        mult,
-        nexc=nexc,
-        dataset=dataset,
-        datapath=datapath,
-        remotepath=remotepath,
-    )
+
+    # Construct the dataset path
+    dataset_path = f"/Datasets/{dataset}"
+
+    # import the selected dataset HDF5 file creator to access property configurations
+    dataset_submodule = import_module(f"atomdb.datasets.{dataset}.h5file_creator")
+    DATASET_PROPERTY_CONFIGS = getattr(dataset_submodule, f"{dataset.upper()}_PROPERTY_CONFIGS")
+
+    # Handle wildcard case for loading multiple species
     if Ellipsis in (elem, charge, mult, nexc):
+        data_paths = datafile(elem, charge, mult, nexc=nexc, dataset=dataset)
+        # create a list to hold all species objects
         obj = []
-
-        for file in fn:
-            with open(file, "rb") as f:
-                obj.append(Species(dataset, unpackb(f.read(), object_hook=decode)))
+        for data_path in data_paths:
+            elem = data_path.split("/")[-2]
+            # Construct the specific data path for each species
+            fields = get_species_data(data_path, elem, DATASET_PROPERTY_CONFIGS)
+            obj.append(Species(dataset, fields))
     else:
-        with open(fn, "rb") as f:
-            obj = Species(dataset, unpackb(f.read(), object_hook=decode))
+        # Construct the specific data path for a single species
+        data_path = f"{dataset_path}/{elem}/{elem}_{charge:03d}_{mult:03d}_{nexc:03d}"
+        # get the species data and then create a species object
+        fields = get_species_data(data_path, elem, DATASET_PROPERTY_CONFIGS)
+        obj = Species(dataset, fields)
+
     return obj
 
 
@@ -888,19 +821,16 @@ def datafile(
     datapath=DEFAULT_DATAPATH,
     remotepath=DEFAULT_REMOTE,
 ):
-    r"""Return the name of the database file for a species.
-
-    This function returns the local path to the database file of a species in the AtomDB cache. If
-    the file is not found, it is downloaded from the remote URL.
+    r"""Return the paths to the database files for a species in AtomDB.
 
     Parameters
     ----------
-    elem : str | Ellipsis
-        Element symbol or Ellipsis for wildcard.
-    charge : int | Ellipsis
-        Charge or Ellipsis for wildcard.
-    mult : int | Ellipsis
-        Multiplicity or Ellipsis for wildcard.
+    elem : str
+        Element symbol.
+    charge : int
+        Charge.
+    mult : int
+        Multiplicity.
     nexc : int, optional
         Excitation level, by default 0.
     dataset : str, optional
@@ -912,59 +842,107 @@ def datafile(
 
     Returns
     -------
-    str
-        Local path to the database file of a species in the AtomDB cache
+    list
+        paths to the database file of a species in AtomDB.
 
     """
-    elem = "[^_]" if elem is Ellipsis else element_symbol(elem)
-    charge = "[^_]" if charge is Ellipsis else f"{charge:03d}"
-    mult = "[^_]" if mult is Ellipsis else f"{mult:03d}"
-    nexc = "[^_]" if nexc is Ellipsis else f"{nexc:03d}"
-
-    # Wildcard search for multiple species, use repodata.txt for matching
-    if "[^_]" in (elem, charge, mult, nexc):
-        # try to retrieve the repodata file from the remote URL
-        try:
-            repodata = pooch.retrieve(
-                url=f"{remotepath}{dataset.lower()}/db/repodata.txt",
-                known_hash=None,
-                path=path.join(datapath, dataset.lower(), "db"),
-                fname="repo_data.txt",
-            )
-        # if the file is not found or remote was not valid, use the local repodata file
-        except (requests.exceptions.HTTPError, ValueError):
-            repodata = path.join(datapath, dataset.lower(), "db", "repo_data.txt")
-
-        with open(repodata) as f:
-            data = f.read()
-            files = re.findall(rf"\b{elem}+_{charge}+_{mult}+_{nexc}\.msg\b", data)
-            species_list = []
-            for file in files:
-                # try to retrieve the file from the remote URL
-                try:
-                    element = pooch.retrieve(
-                        url=f"{remotepath}{dataset.lower()}/db/{file}",
-                        known_hash=None,
-                        path=path.join(datapath, dataset.lower(), "db"),
-                        fname=f"{file}",
-                    )
-                # if the file is not found, use the local file
-                except (requests.exceptions.HTTPError, ValueError):
-                    element = path.join(datapath, dataset.lower(), "db", file)
-                species_list.append(element)
-            return species_list
-    # try to retrieve the file from the remote URL
-    try:
-        species = pooch.retrieve(
-            url=f"{remotepath}{dataset.lower()}/db/{elem}_{charge}_{mult}_{nexc}.msg",
-            known_hash=None,
-            path=path.join(datapath, dataset.lower(), "db"),
-            fname=f"{elem}_{charge}_{mult}_{nexc}.msg",
-        )
-    # if the file is not found, use the local file
-    except (requests.exceptions.HTTPError, ValueError):
-        species = path.join(datapath, dataset.lower(), "db", f"{elem}_{charge}_{mult}_{nexc}.msg")
-    return species
+    group_paths = []
+    conditions = []
+
+    # Access the dataset folder in the HDF5 file
+    dataset_path = f"/Datasets/{dataset}"
+    dataset_folder = DATASETS_H5FILE.get_node(dataset_path)
+
+    if elem is not Ellipsis:
+        conditions.append(f'(elem == b"{elem}")')  # b for bytes comparison
+    if charge is not Ellipsis:
+        conditions.append(f"(charge == {charge})")
+    if mult is not Ellipsis:
+        conditions.append(f"(mult == {mult})")
+    if nexc is not Ellipsis:
+        conditions.append(f"(nexc == {nexc})")
+
+    if conditions:
+        query_result = " & ".join(conditions) if conditions else None
+
+        for elem, elem_folder in dataset_folder._v_groups.items():
+            for species_folder in elem_folder._v_groups:
+                properties_folder = DATASETS_H5FILE.get_node(
+                    f"/Datasets/{dataset}/{elem}/{species_folder}/Properties"
+                )
+                species_info_table = properties_folder._f_get_child("species_info")
+
+                matched_species = list(species_info_table.where(query_result))
+                if matched_species:
+                    group_paths.append(f"{dataset_path}/{elem}/{species_folder}")
+
+    # if there are no conditions, return all species
+    else:
+        for elem, elem_folder in dataset_folder._v_groups.items():
+            for species_folder in elem_folder._v_groups:
+                group_paths.append(f"{dataset_path}/{elem}/{species_folder}")
+
+    return group_paths
+
+
+def get_species_data(folder_path, elem, DATASET_PROPERTY_CONFIGS):
+    r"""Retrieve species data from the specified HDF5 folder path.
+
+    Parameters
+    ----------
+    folder_path : str
+        Path to the HDF5 folder containing the species data.
+    elem : str
+        Element symbol.
+    DATASET_PROPERTY_CONFIGS : list
+        list of configuration dictionaries.
+
+    Returns
+    -------
+    dict
+        the extracted species data fields.
+    """
+    fields = {}
+    dataset_folder = DATASETS_H5FILE.get_node(folder_path)
+
+    species_info_table = dataset_folder.Properties._f_get_child("species_info")
+    species_info_row = species_info_table[0]
+
+    # Iterate through property configurations to extract data from datasets_data.h5
+    for config in DATASET_PROPERTY_CONFIGS:
+        if "SpeciesInfo" in config:
+            # Extract species info data
+            prop_name = config["SpeciesInfo"]
+            value = species_info_row[prop_name]
+            if config["type"] == "string":
+                value = value.decode("utf-8")
+            fields[config["SpeciesInfo"]] = value
+
+        elif "property" in config:
+            # Extract single value properties
+            table = dataset_folder.Properties._f_get_child(config["table_name"])
+            value = table[0]["value"]
+            if config["type"] == "string":
+                value = value.decode("utf-8")
+            fields[config["property"]] = value
+
+        elif "array_property" in config:
+            # Extract array properties
+            table = dataset_folder.Properties._f_get_child(config["table_name"])
+            fields[config["array_property"]] = table[0]["value"]
+
+        elif "Carray_property" in config:
+            # Extract Carray properties
+            table = dataset_folder._f_get_child(config["folder"])._f_get_child(config["table_name"])
+            fields[config["Carray_property"]] = table[:]
+
+    fields["atnum"] = element_symbol_map[elem][ElementAttr.atnum]
+
+    # Add scalar properties
+    for prop in ("atmass", "cov_radius", "vdw_radius", "at_radius", "polarizability", "dispersion"):
+        fields[prop] = get_scalar_data(prop, fields["atnum"], fields["nelec"])
+
+    return fields
 
 
 def raw_datafile(
@@ -1006,7 +984,8 @@ def raw_datafile(
     str
         Path to the raw data file.
     """
-    elem = "*" if elem is Ellipsis else element_symbol(elem)
+    # elem = "*" if elem is Ellipsis else element_symbol(elem) --> why using element_symbol here
+    elem = "*" if elem is Ellipsis else elem
     charge = "*" if charge is Ellipsis else f"{charge:03d}"
     mult = "*" if mult is Ellipsis else f"{mult:03d}"
     nexc = "*" if nexc is Ellipsis else f"{nexc:03d}"
diff --git a/atomdb/utils.py b/atomdb/utils.py
index d3a38985..20675bf8 100644
--- a/atomdb/utils.py
+++ b/atomdb/utils.py
@@ -37,6 +37,7 @@
 ]
 
 
+
 DEFAULT_DATASET = "slater"
 r"""Default dataset to query."""
 
diff --git a/pyproject.toml b/pyproject.toml
index 8b9f82a9..dc336b38 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,7 +25,7 @@ authors = [
 description = "AtomDB is a database of atomic and ionic properties."
 readme = "README.md"
 license = {text = "GPL-3.0-or-later"}
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 classifiers = [
     'Development Status :: 5 - Production/Stable',
     'Environment :: Console',
@@ -36,19 +36,19 @@ classifiers = [
     'Intended Audience :: Science/Research',
     "Intended Audience :: Education",
     "Natural Language :: English",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
 ]
 dependencies = [
-    "numpy>=1.16",
+    "numpy>=1.26.4",
     "scipy>=1.4",
     "msgpack>=1.0.0",
     "msgpack-numpy>=0.4.8",
     "h5py>=3.6.0",
     "importlib_resources>=3.0.0",
     "pooch>=1.8.1",
+    "tables>=3.9.2",
 ]
 dynamic = ["version"]
 [tool.setuptools_scm]
@@ -67,7 +67,7 @@ dev = [
     "qc-gbasis",
     # "qc-grid@git+https://github.com/theochem/grid.git@master",
     # TODO: uncomment when grid is available on PyPI
-    # "qc-grid",
+     "qc-grid",
     # "qc-iodata@git+https://github.com/theochem/iodata.git@main",
     "qc-iodata",
 ]
@@ -92,6 +92,9 @@ doc = [
 
 [tool.setuptools]
 packages = ["atomdb"]
+# Adding the package data
+package-data = { "atomdb" = ["data/*.h5", "data/*.msg"] }
+include-package-data = true
 
 [tool.black]
 line-length = 100