From d83b62d0f4946ee738c204ce63e0c9217764ef09 Mon Sep 17 00:00:00 2001 From: Marnik Bercx Date: Fri, 1 May 2026 09:33:39 +0200 Subject: [PATCH 1/3] =?UTF-8?q?=E2=9C=A8=20`PwOutput`:=20add=20Specs=20for?= =?UTF-8?q?=20bands,=20magnetism,=20and=20HOMO/LUMO?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add 14 Specs covering band eigenvalues, magnetism, run-status flags, and HOMO/LUMO levels: - `eigenvalues` and `occupations_kpoint` as numpy arrays of shape `(n_kpoints, n_spin, n_bands)`, derived from `band_structure.ks_energies`. - Run-shape scalars from XML: `number_of_electrons`, `number_of_atoms`, `number_of_species`, `lsda`, `alat` (Å), `ibrav`. - Magnetism (μB/cell): `total_magnetization`, `absolute_magnetization`. - Run-status flags: `scf_converged` (from `convergence_info.scf_conv`) and `job_done` (presence of ``), each defaulting to `False` when the corresponding XML element is missing. - `highest_occupied_level` / `lowest_unoccupied_level` (eV) parsed from the pw.x stdout. `PwStdoutParser.parse` matches both the single-value `highest occupied level (ev): X` line and the paired `highest occupied, lowest unoccupied levels (ev): X Y` line. --- src/qe_tools/outputs/parsers/pw.py | 23 +++ src/qe_tools/outputs/pw.py | 131 ++++++++++++++++++ tests/outputs/conftest.py | 62 +++++++++ .../fixtures/pw/insulator_homo/scf.out | 32 +++++ tests/outputs/test_pw_output.py | 26 +++- .../test_default_xml_211101_.yml | 24 ++++ .../test_default_xml_220603_.yml | 24 ++++ .../test_default_xml_230310_.yml | 26 ++++ .../test_default_xml_240411_.yml | 24 ++++ .../test_default_xml_250521_.yml | 24 ++++ .../test_pw_output/test_insulator_homo.yml | 1 + .../test_success_base_collinear_.yml | 25 ++++ 12 files changed, 416 insertions(+), 6 deletions(-) create mode 100644 tests/outputs/conftest.py create mode 100644 tests/outputs/fixtures/pw/insulator_homo/scf.out create mode 100644 tests/outputs/test_pw_output/test_insulator_homo.yml diff --git a/src/qe_tools/outputs/parsers/pw.py b/src/qe_tools/outputs/parsers/pw.py index 0d23278..88bced7 100644 --- a/src/qe_tools/outputs/parsers/pw.py +++ b/src/qe_tools/outputs/parsers/pw.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re from importlib.resources import files from xml.etree import ElementTree @@ -52,7 +53,29 @@ def parse(content): return XMLSchema(str(files(schemas) / schema_filename)).to_dict(element_root) +_HOMO_LUMO_RE = re.compile( + r"highest occupied,\s*lowest unoccupied levels?\s*\(ev\):\s*" + r"([\-\d.E+]+)\s+([\-\d.E+]+)" +) +_HOMO_RE = re.compile(r"highest occupied level\s*\(ev\):\s*([\-\d.E+]+)") + + class PwStdoutParser(BaseStdoutParser): """ Class for parsing the standard output of pw.x. """ + + @staticmethod + def parse(content: str) -> dict: + parsed_data = BaseStdoutParser.parse(content) + + match = _HOMO_LUMO_RE.search(content) + if match: + parsed_data["highest_occupied_level"] = float(match.group(1)) + parsed_data["lowest_unoccupied_level"] = float(match.group(2)) + else: + match = _HOMO_RE.search(content) + if match: + parsed_data["highest_occupied_level"] = float(match.group(1)) + + return parsed_data diff --git a/src/qe_tools/outputs/pw.py b/src/qe_tools/outputs/pw.py index 611ae1c..f185c79 100644 --- a/src/qe_tools/outputs/pw.py +++ b/src/qe_tools/outputs/pw.py @@ -5,6 +5,7 @@ from pathlib import Path from typing import Annotated, TextIO +import numpy as np from glom import Coalesce, Spec from dough import Unit @@ -19,6 +20,24 @@ from qe_tools import CONSTANTS +def _stack_per_kpoint(band_structure: dict, key: str) -> np.ndarray: + """Stack a per-k-point flat array (eigenvalues / occupations) into shape `(nks, nspin, nbnd)`. + + QE stores `eigenvalues` (and `occupations`) as a single flat list per k-point of + length `nbnd` (spin-unpolarised) or `2 * nbnd_per_spin` (spin-polarised LSDA, with + spin-up values first, spin-down second). + """ + lsda = band_structure["lsda"] + nspin = 2 if lsda else 1 + arr = np.array( + [ks[key]["$"] for ks in band_structure["ks_energies"]], + dtype=float, + ) + nks, total = arr.shape + nbnd = total // nspin + return arr.reshape(nks, nspin, nbnd) + + @output_mapping class _PwParametersMapping: """Parameters the pw.x calculation ran with. @@ -314,6 +333,118 @@ class _PwMapping: ] """Total energy in eV.""" + eigenvalues: Annotated[ + np.ndarray, + Spec( + ( + "xml.output.band_structure", + lambda bs: _stack_per_kpoint(bs, "eigenvalues") + * CONSTANTS.hartree_to_ev, + ) + ), + Unit("eV"), + ] + """Kohn-Sham eigenvalues in eV. + + Numpy array of shape `(n_kpoints, n_spin, n_bands)`: + + - axis 0 (`n_kpoints`): k-points in the order given by `k_points_cartesian` + - axis 1 (`n_spin`): spin channel (length 1 for non-spin-polarised / non-collinear, + length 2 for spin-polarised LSDA, with index 0 = spin-up and index 1 = spin-down) + - axis 2 (`n_bands`): band index, energy-sorted ascending + """ + + occupations_kpoint: Annotated[ + np.ndarray, + Spec( + ( + "xml.output.band_structure", + lambda bs: _stack_per_kpoint(bs, "occupations"), + ) + ), + ] + """Kohn-Sham occupations (dimensionless), aligned with `eigenvalues`. + + Numpy array of shape `(n_kpoints, n_spin, n_bands)`. Same axis meanings as + `eigenvalues`. Values lie in `[0, 1]` for spin-polarised calculations and in + `[0, 2]` (the per-band occupancy) for spin-unpolarised calculations. + """ + + number_of_electrons: Annotated[float, Spec("xml.output.band_structure.nelec")] + """Total number of electrons in the unit cell.""" + + number_of_atoms: Annotated[int, Spec("xml.output.atomic_structure.@nat")] + """Number of atoms in the unit cell.""" + + number_of_species: Annotated[int, Spec("xml.output.atomic_species.@ntyp")] + """Number of distinct atomic species in the unit cell.""" + + lsda: Annotated[bool, Spec("xml.output.band_structure.lsda")] + """Whether the calculation is spin-polarised (LSDA).""" + + total_magnetization: Annotated[ + float, Spec("xml.output.magnetization.total"), Unit("bohr_magneton") + ] + """Total magnetization (sum of spin-up minus spin-down electrons) in Bohr magnetons per cell. + + Only meaningful for spin-polarised calculations. + """ + + absolute_magnetization: Annotated[ + float, Spec("xml.output.magnetization.absolute"), Unit("bohr_magneton") + ] + """Absolute magnetization (integral of `|m(r)|`) in Bohr magnetons per cell. + + Only meaningful for spin-polarised calculations. + """ + + alat: Annotated[ + float, + Spec( + ( + "xml.output.atomic_structure.@alat", + lambda alat: alat * CONSTANTS.bohr_to_ang, + ) + ), + Unit("angstrom"), + ] + """Lattice parameter `alat` in Å (the QE `celldm(1)` converted from Bohr).""" + + ibrav: Annotated[int, Spec("xml.output.atomic_structure.@bravais_index")] + """Bravais lattice index (QE `ibrav` integer); only present when QE writes it.""" + + scf_converged: Annotated[ + bool, + Spec("xml.output.convergence_info.scf_conv.convergence_achieved"), + ] = False + """Whether the SCF cycle converged. Defaults to `False` if the XML lacks the field.""" + + job_done: Annotated[ + bool, + Spec(("xml.closed", lambda _: True)), + ] = False + """Whether the calculation finished cleanly (XML contains a `` element).""" + + highest_occupied_level: Annotated[ + float, Spec("stdout.highest_occupied_level"), Unit("eV") + ] + """Highest occupied Kohn-Sham level in eV. + + Parsed from the stdout line `highest occupied level (ev): ...` (insulators) or the + first value of `highest occupied, lowest unoccupied levels (ev): ...`. Only present + for runs where QE prints it (typically SCF of insulators). + """ + + lowest_unoccupied_level: Annotated[ + float, Spec("stdout.lowest_unoccupied_level"), Unit("eV") + ] + """Lowest unoccupied Kohn-Sham level in eV. + + Parsed from the stdout line `highest occupied, lowest unoccupied levels (ev): ...`. + Only present when QE prints both values (insulators with `nbnd` larger than the + number of occupied bands). + """ + class PwOutput(BaseOutput[_PwMapping]): """Output of the Quantum ESPRESSO pw.x code.""" diff --git a/tests/outputs/conftest.py b/tests/outputs/conftest.py new file mode 100644 index 0000000..19b3978 --- /dev/null +++ b/tests/outputs/conftest.py @@ -0,0 +1,62 @@ +"""Helpers shared by the `tests/outputs/` test suite. + +`dough.testing._serialize` (exposed via the `robust_data_regression_check` fixture) +covers most cases, but it raises on `None` values. The existing `raw_outputs` +snapshots contain `null`s from optional QE XML fields, so for those we still need +a local helper. Use `robust_data_regression_check` everywhere else. +""" + +from __future__ import annotations + +import numpy as np +import pytest + + +def _to_jsonable(obj): + if isinstance(obj, np.ndarray): + return obj.tolist() + if isinstance(obj, (np.floating, np.integer, np.bool_)): + return obj.item() + if isinstance(obj, dict): + return {k: _to_jsonable(v) for k, v in obj.items()} + if isinstance(obj, list): + return [_to_jsonable(item) for item in obj] + return obj + + +@pytest.fixture() +def to_jsonable(): + return _to_jsonable + + +_HEAVY_ARRAY_KEYS = ("eigenvalues", "occupations_kpoint") + + +def _fingerprint(arr: np.ndarray) -> dict: + flat = arr.ravel() + return { + "shape": list(arr.shape), + "sum": round(float(flat.sum()), 5), + "first": round(float(flat[0]), 5), + "last": round(float(flat[-1]), 5), + } + + +@pytest.fixture() +def fingerprint_heavy(): + """Replace large numpy-array values in `get_output_dict()` with shape/sum fingerprints. + + Keeps default_xml regression snapshots small while still detecting Spec-resolution + regressions per QE schema version. + """ + + def factory(out_dict: dict) -> dict: + result = {} + for key, value in out_dict.items(): + if key in _HEAVY_ARRAY_KEYS and isinstance(value, np.ndarray): + result[key] = _fingerprint(value) + else: + result[key] = _to_jsonable(value) + return result + + return factory diff --git a/tests/outputs/fixtures/pw/insulator_homo/scf.out b/tests/outputs/fixtures/pw/insulator_homo/scf.out new file mode 100644 index 0000000..528cf40 --- /dev/null +++ b/tests/outputs/fixtures/pw/insulator_homo/scf.out @@ -0,0 +1,32 @@ + + Program PWSCF v.7.5 starts on 1May2026 at 7: 9:44 + + This program is part of the open-source Quantum ESPRESSO suite + for quantum simulation of materials; please cite + "P. Giannozzi et al., J. Phys.:Condens. Matter 21 395502 (2009); + "P. Giannozzi et al., J. Phys.:Condens. Matter 29 465901 (2017); + "P. Giannozzi et al., J. Chem. Phys. 152 154105 (2020); + URL http://www.quantum-espresso.org", + in publications or presentations arising from this work. More details at + http://www.quantum-espresso.org/quote + + Parallel version (MPI), running on 1 processors + + MPI processes distributed on 1 nodes + +--- Removed: SCF iteration log, k-point eigenvalues, energy decomposition --- + + highest occupied level (ev): 5.3452 + +! total energy = -75.53725762 Ry + +--- Removed: timing breakdown --- + + PWSCF : 2.14s CPU 2.25s WALL + + + This run was terminated on: 7: 9:47 1May2026 + +=------------------------------------------------------------------------------= + JOB DONE. +=------------------------------------------------------------------------------= diff --git a/tests/outputs/test_pw_output.py b/tests/outputs/test_pw_output.py index d32b0a8..ea316fc 100644 --- a/tests/outputs/test_pw_output.py +++ b/tests/outputs/test_pw_output.py @@ -15,7 +15,7 @@ "250521", ], ) -def test_default_xml(data_regression, xml_format): +def test_default_xml(data_regression, fingerprint_heavy, xml_format): """Test the default XML output of pw.x.""" name = f"default_xml_{xml_format}" @@ -26,7 +26,7 @@ def test_default_xml(data_regression, xml_format): data_regression.check( { - "base_outputs": pw_out.get_output_dict(), + "base_outputs": fingerprint_heavy(pw_out.get_output_dict()), "raw_outputs": pw_out.raw_outputs, } ) @@ -38,7 +38,7 @@ def test_default_xml(data_regression, xml_format): "collinear", ], ) -def test_success_base(data_regression, fixture_directory): +def test_success_base(data_regression, fingerprint_heavy, fixture_directory): """Test the base outputs of successful pw.x calculations.""" pw_directory = Path(__file__).parent / "fixtures" / "pw" / fixture_directory @@ -47,7 +47,7 @@ def test_success_base(data_regression, fixture_directory): data_regression.check( { - "base_outputs": pw_out.get_output_dict(), + "base_outputs": fingerprint_heavy(pw_out.get_output_dict()), } ) @@ -58,7 +58,7 @@ def test_success_base(data_regression, fixture_directory): "failed_no_xml", ], ) -def test_failed(data_regression, fixture_directory): +def test_failed(data_regression, to_jsonable, fixture_directory): """Test failed calculations of pw.x.""" pw_directory = Path(__file__).parent / "fixtures" / "pw" / fixture_directory @@ -67,12 +67,26 @@ def test_failed(data_regression, fixture_directory): data_regression.check( { - "base_outputs": pw_out.get_output_dict(), + "base_outputs": to_jsonable(pw_out.get_output_dict()), "raw_outputs": pw_out.raw_outputs, } ) +def test_insulator_homo(robust_data_regression_check): + """Test stdout-derived `highest_occupied_level` from an insulator SCF.""" + + pw_directory = Path(__file__).parent / "fixtures" / "pw" / "insulator_homo" + + pw_out = PwOutput.from_dir(pw_directory) + + robust_data_regression_check( + { + "highest_occupied_level": pw_out.get_output("highest_occupied_level"), + } + ) + + def test_tot_magnetization(data_regression): """Test the output of a spin-polarized pw.x calculation with tot_magnetization.""" diff --git a/tests/outputs/test_pw_output/test_default_xml_211101_.yml b/tests/outputs/test_pw_output/test_default_xml_211101_.yml index b11a07a..e982a23 100644 --- a/tests/outputs/test_pw_output/test_default_xml_211101_.yml +++ b/tests/outputs/test_pw_output/test_default_xml_211101_.yml @@ -1,4 +1,14 @@ base_outputs: + absolute_magnetization: 0.0 + alat: 3.8669746330979136 + eigenvalues: + first: -5.34429 + last: 13.87319 + shape: + - 2 + - 1 + - 8 + sum: 99.50024 fermi_energy: 7.296652068825847 forces: - - 0.0 @@ -7,6 +17,7 @@ base_outputs: - - 0.0 - 0.0 - 0.1714861209572958 + job_done: true k_points_cartesian: - - 0.0 - 0.0 @@ -17,8 +28,20 @@ base_outputs: k_points_weights: - 1.0 - 1.0 + lsda: false + number_of_atoms: 2 number_of_bands: 8 + number_of_electrons: 8.0 number_of_k_points: 2 + number_of_species: 1 + occupations_kpoint: + first: 1.0 + last: 0.0 + shape: + - 2 + - 1 + - 8 + sum: 8.0 parameters: degauss: 0.136056917253 ecutrho: 3265.366014072 @@ -38,6 +61,7 @@ base_outputs: - 32 - 32 xc_functional: PBE + scf_converged: true stress: - - 19.802407083925143 - 3.18983295531761e-15 diff --git a/tests/outputs/test_pw_output/test_default_xml_220603_.yml b/tests/outputs/test_pw_output/test_default_xml_220603_.yml index b2cad05..367a5cb 100644 --- a/tests/outputs/test_pw_output/test_default_xml_220603_.yml +++ b/tests/outputs/test_pw_output/test_default_xml_220603_.yml @@ -1,4 +1,14 @@ base_outputs: + absolute_magnetization: 0.0 + alat: 3.8669746330979136 + eigenvalues: + first: -5.59147 + last: 16.15329 + shape: + - 4 + - 1 + - 8 + sum: 192.42596 fermi_energy: 6.679289920043898 forces: - - 0.0 @@ -7,6 +17,7 @@ base_outputs: - - 0.0 - -1.5988141933326028e-26 - 3.5990582562436014e-05 + job_done: true k_points_cartesian: - - 0.0 - 0.0 @@ -25,8 +36,20 @@ base_outputs: - 0.25 - 0.75 - 0.75 + lsda: false + number_of_atoms: 2 number_of_bands: 8 + number_of_electrons: 8.0 number_of_k_points: 4 + number_of_species: 1 + occupations_kpoint: + first: 1.0 + last: 0.0 + shape: + - 4 + - 1 + - 8 + sum: 16.00805 parameters: degauss: 0.136056917253 ecutrho: 3265.366014072 @@ -54,6 +77,7 @@ base_outputs: - 32 - 32 xc_functional: PBE + scf_converged: true stress: - - 7.558511471674453 - 2.3923747164882088e-15 diff --git a/tests/outputs/test_pw_output/test_default_xml_230310_.yml b/tests/outputs/test_pw_output/test_default_xml_230310_.yml index 7da9818..8cc0629 100644 --- a/tests/outputs/test_pw_output/test_default_xml_230310_.yml +++ b/tests/outputs/test_pw_output/test_default_xml_230310_.yml @@ -1,4 +1,14 @@ base_outputs: + absolute_magnetization: 0.0 + alat: 4.958655009229507 + eigenvalues: + first: -84.85888 + last: 8.85336 + shape: + - 13 + - 1 + - 16 + sum: -2856.13028 fermi_energy: 9.092168712286712 forces: - - 0.0 @@ -13,6 +23,8 @@ base_outputs: - - 0.0 - 9.008856055736822e-25 - -3.12669022280311e-25 + highest_occupied_level: 9.0922 + job_done: true k_points_cartesian: - - 0.0 - 0.0 @@ -67,8 +79,20 @@ base_outputs: - 0.1875 - 0.1875 - 0.03125 + lsda: false + number_of_atoms: 4 number_of_bands: 16 + number_of_electrons: 32.0 number_of_k_points: 13 + number_of_species: 3 + occupations_kpoint: + first: 1.0 + last: 1.0 + shape: + - 13 + - 1 + - 16 + sum: 208.0 parameters: ecutrho: 3265.366014072 ecutwfc: 408.170751759 @@ -94,6 +118,7 @@ base_outputs: - 36 - 36 xc_functional: PBE + scf_converged: true stress: - - -62.88281405555692 - -2.551866364254089e-14 @@ -141,6 +166,7 @@ base_outputs: raw_outputs: stdout: code_version: '7.2' + highest_occupied_level: 9.0922 wall_time_seconds: 100.44 xml: '@Units': Hartree atomic units diff --git a/tests/outputs/test_pw_output/test_default_xml_240411_.yml b/tests/outputs/test_pw_output/test_default_xml_240411_.yml index 4442266..30f220b 100644 --- a/tests/outputs/test_pw_output/test_default_xml_240411_.yml +++ b/tests/outputs/test_pw_output/test_default_xml_240411_.yml @@ -1,4 +1,14 @@ base_outputs: + absolute_magnetization: 0.0 + alat: 3.866974633097697 + eigenvalues: + first: -5.82758 + last: 11.01905 + shape: + - 13 + - 1 + - 8 + sum: 600.40616 fermi_energy: 6.482639103429898 forces: - - 6.945510284945072e-25 @@ -7,6 +17,7 @@ base_outputs: - - -6.945510284945072e-25 - -2.665236731143183e-19 - 5.875314824057878e-06 + job_done: true k_points_cartesian: - - 0.0 - 0.0 @@ -61,8 +72,20 @@ base_outputs: - 0.375 - 0.1875 - 0.1875 + lsda: false + number_of_atoms: 2 number_of_bands: 8 + number_of_electrons: 8.0 number_of_k_points: 13 + number_of_species: 1 + occupations_kpoint: + first: 1.0 + last: 0.0 + shape: + - 13 + - 1 + - 8 + sum: 52.02007 parameters: degauss: 0.136056917253 ecutrho: 3265.366014072 @@ -90,6 +113,7 @@ base_outputs: - 32 - 32 xc_functional: PBESOL + scf_converged: true stress: - - 0.00831212176267791 - 5.84098538270437e-18 diff --git a/tests/outputs/test_pw_output/test_default_xml_250521_.yml b/tests/outputs/test_pw_output/test_default_xml_250521_.yml index 424acce..99fe7b4 100644 --- a/tests/outputs/test_pw_output/test_default_xml_250521_.yml +++ b/tests/outputs/test_pw_output/test_default_xml_250521_.yml @@ -1,4 +1,14 @@ base_outputs: + absolute_magnetization: 0.0 + alat: 4.029999982385125 + eigenvalues: + first: -39.51935 + last: 15.49553 + shape: + - 2 + - 1 + - 24 + sum: -369.14114 fermi_energy: 3.612592982009913 forces: - - -1.7635124848354649 @@ -25,6 +35,7 @@ base_outputs: - - 0.11819283971808975 - -0.5120812364569893 - 0.07225493313766373 + job_done: true k_points_cartesian: - - 0.0 - 0.0 @@ -35,8 +46,20 @@ base_outputs: k_points_weights: - 1.0 - 1.0 + lsda: false + number_of_atoms: 8 number_of_bands: 24 + number_of_electrons: 40.0 number_of_k_points: 2 + number_of_species: 2 + occupations_kpoint: + first: 1.0 + last: 0.0 + shape: + - 2 + - 1 + - 24 + sum: 40.0 parameters: degauss: 0.37415652244575 ecutrho: 4898.049021108 @@ -64,6 +87,7 @@ base_outputs: - 36 - 36 xc_functional: PBESOL + scf_converged: true stress: - - 3.345625870189871 - 0.6180019560502391 diff --git a/tests/outputs/test_pw_output/test_insulator_homo.yml b/tests/outputs/test_pw_output/test_insulator_homo.yml new file mode 100644 index 0000000..67443af --- /dev/null +++ b/tests/outputs/test_pw_output/test_insulator_homo.yml @@ -0,0 +1 @@ +highest_occupied_level: 5.3452 diff --git a/tests/outputs/test_pw_output/test_success_base_collinear_.yml b/tests/outputs/test_pw_output/test_success_base_collinear_.yml index 291eb49..9234426 100644 --- a/tests/outputs/test_pw_output/test_success_base_collinear_.yml +++ b/tests/outputs/test_pw_output/test_success_base_collinear_.yml @@ -1,5 +1,16 @@ base_outputs: + absolute_magnetization: 0.0 + alat: 4.029999982385125 + eigenvalues: + first: -39.45078 + last: 15.61056 + shape: + - 2 + - 2 + - 24 + sum: -729.38408 fermi_energy: 1.8452196496373638 + job_done: true k_points_cartesian: - - 0.0 - 0.0 @@ -10,8 +21,20 @@ base_outputs: k_points_weights: - 0.5 - 0.5 + lsda: true + number_of_atoms: 8 number_of_bands: 24 + number_of_electrons: 40.0 number_of_k_points: 2 + number_of_species: 2 + occupations_kpoint: + first: 1.0 + last: 0.0 + shape: + - 2 + - 2 + - 24 + sum: 80.0 parameters: ecutrho: 4898.049021108 ecutwfc: 612.2561276385 @@ -37,6 +60,7 @@ base_outputs: - 36 - 36 xc_functional: PBESOL + scf_converged: false structure: atomic_species: - F @@ -86,3 +110,4 @@ base_outputs: - Li - F total_energy: 0.0 + total_magnetization: 0.0 From 9937751703c0a02320172a97b658d913d01a804b Mon Sep 17 00:00:00 2001 From: Marnik Bercx Date: Fri, 1 May 2026 09:47:02 +0200 Subject: [PATCH 2/3] =?UTF-8?q?=E2=9C=A8=20`BandsOutput`:=20add=20output?= =?UTF-8?q?=20class=20for=20bands.x?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `bands.x` post-processor reorders the raw `pw.x` band eigenvalues into a plottable band-structure path and (optionally) labels each `(k, band)` with its symmetry representation. The qe-tools side had no parser for the `filband` family — users had to hand-roll text parsing and reshape arithmetic to lift an `(n_kpoints, n_bands)` array out of the gnuplot-flavoured output. Add a dedicated `BandsOutput` class plus three parsers: - `BandsDatParser` reads the `&plot nbnd=..., nks=... /` header and reshapes the body into `k_points` `(n_kpoints, 3)` and `eigenvalues` `(n_kpoints, n_bands)` arrays (eV). - `BandsRapParser` decodes the `*.dat.rap` symmetry-rep file when bands.x was run with `lsym=.true.`, exposing per-`(k, band)` representation indices and a `(n_kpoints,)` boolean flag for high-symmetry points. - `BandsStdoutParser` lifts the `high-symmetry point: kx ky kz x coordinate X` lines out of the bands.x stdout, so plot tick labels can be placed at the correct cumulative path-length. --- src/qe_tools/outputs/__init__.py | 2 + src/qe_tools/outputs/bands.py | 135 ++++++++++++++++++ src/qe_tools/outputs/parsers/bands.py | 116 +++++++++++++++ .../outputs/fixtures/bands/mgo/MgO-bands.dat | 9 ++ .../fixtures/bands/mgo/MgO-bands.dat.rap | 9 ++ tests/outputs/fixtures/bands/mgo/bands.out | 16 +++ tests/outputs/test_bands_output.py | 22 +++ .../test_bands_output/test_bands_mgo.yml | 60 ++++++++ 8 files changed, 369 insertions(+) create mode 100644 src/qe_tools/outputs/bands.py create mode 100644 src/qe_tools/outputs/parsers/bands.py create mode 100644 tests/outputs/fixtures/bands/mgo/MgO-bands.dat create mode 100644 tests/outputs/fixtures/bands/mgo/MgO-bands.dat.rap create mode 100644 tests/outputs/fixtures/bands/mgo/bands.out create mode 100644 tests/outputs/test_bands_output.py create mode 100644 tests/outputs/test_bands_output/test_bands_mgo.yml diff --git a/src/qe_tools/outputs/__init__.py b/src/qe_tools/outputs/__init__.py index 5e5c2d1..461e9ef 100644 --- a/src/qe_tools/outputs/__init__.py +++ b/src/qe_tools/outputs/__init__.py @@ -1,7 +1,9 @@ from .pw import PwOutput from .dos import DosOutput +from .bands import BandsOutput __all__ = ( "PwOutput", "DosOutput", + "BandsOutput", ) diff --git a/src/qe_tools/outputs/bands.py b/src/qe_tools/outputs/bands.py new file mode 100644 index 0000000..a4a571f --- /dev/null +++ b/src/qe_tools/outputs/bands.py @@ -0,0 +1,135 @@ +"""Output of the Quantum ESPRESSO bands.x code.""" + +import typing +from pathlib import Path +from typing import Annotated, TextIO + +import numpy as np +from glom import Spec + +from dough import Unit +from dough.outputs import BaseOutput, output_mapping + +from .parsers.bands import ( + BandsDatParser, + BandsRapParser, + BandsStdoutParser, +) + + +@output_mapping +class _BandsMapping: + """Typed outputs of a bands.x calculation.""" + + number_of_kpoints: Annotated[int, Spec("dat.nks")] + """Number of k-points along the band-structure path.""" + + number_of_bands: Annotated[int, Spec("dat.nbnd")] + """Number of bands written by bands.x.""" + + k_points: Annotated[np.ndarray, Spec("dat.k_points")] + """Crystal-momentum coordinates of each k-point on the path. + + Numpy array of shape `(n_kpoints, 3)`; coordinates are in the same units that the + pw.x input used for `K_POINTS` (typically `2π/alat` for `tpiba_b`, or crystal + coordinates for `crystal_b`). bands.x does not transform them. + """ + + eigenvalues: Annotated[np.ndarray, Spec("dat.eigenvalues"), Unit("eV")] + """Kohn-Sham eigenvalues along the band path, in eV. + + Numpy array of shape `(n_kpoints, n_bands)`: + + - axis 0 (`n_kpoints`): k-points in the order given by `k_points` + - axis 1 (`n_bands`): band index, ascending (energy-sorted at each k-point) + + For spin-polarised calculations, bands.x writes one filband per spin channel; this + array therefore covers a single spin channel. + """ + + high_symmetry_points: Annotated[np.ndarray, Spec("stdout.high_symmetry_points")] + """Crystal-momentum coordinates of the high-symmetry points along the path. + + Numpy array of shape `(n_high_sym, 3)`. Reported in the same coordinate system as + `k_points`. Parsed from the bands.x stdout. + """ + + high_symmetry_distances: Annotated[ + np.ndarray, Spec("stdout.high_symmetry_distances") + ] + """Cumulative path-length at each high-symmetry point. + + Numpy array of shape `(n_high_sym,)`. Units match those bands.x uses internally + (typically `2π/alat`). Suitable for placing tick labels on the x-axis of a + band-structure plot. + """ + + representations: Annotated[np.ndarray, Spec("rap.representations")] + """Symmetry-representation index per (k-point, band). + + Numpy array of shape `(n_kpoints, n_bands)` of integers. Only present when bands.x + was run with `lsym=.true.` and produced a `filband.rap` file. + """ + + is_high_symmetry: Annotated[np.ndarray, Spec("rap.is_high_symmetry")] + """Boolean array of shape `(n_kpoints,)` flagging high-symmetry k-points. + + Only present when a `filband.rap` file is available. + """ + + +class BandsOutput(BaseOutput[_BandsMapping]): + """Output of the Quantum ESPRESSO bands.x code.""" + + converters: typing.ClassVar[dict] = {} + + @classmethod + def from_dir(cls, directory: str | Path): + """Locate filband (`*.dat`, `*.dat.rap`) and bands.x stdout in `directory`.""" + directory = Path(directory) + + if not directory.is_dir(): + raise ValueError(f"Path `{directory}` is not a valid directory.") + + rap_file = next(directory.glob("*.dat.rap"), None) + + dat_file = None + for candidate in directory.glob("*.dat"): + if candidate.name.endswith(".dat.rap"): + continue + with candidate.open("r") as handle: + if "&plot" in handle.readline(): + dat_file = candidate + break + + stdout_file = None + for file in directory.iterdir(): + if not file.is_file(): + continue + with file.open("r") as handle: + header = "".join(handle.readlines(5)) + if "Program BANDS" in header: + stdout_file = file + break + + return cls.from_files(dat=dat_file, rap=rap_file, stdout=stdout_file) + + @classmethod + def from_files( + cls, + *, + dat: None | str | Path | TextIO = None, + rap: None | str | Path | TextIO = None, + stdout: None | str | Path | TextIO = None, + ): + """Parse the outputs directly from the provided files.""" + raw_outputs: dict = {} + + if dat is not None: + raw_outputs["dat"] = BandsDatParser.parse_from_file(dat) + if rap is not None: + raw_outputs["rap"] = BandsRapParser.parse_from_file(rap) + if stdout is not None: + raw_outputs["stdout"] = BandsStdoutParser.parse_from_file(stdout) + + return cls(raw_outputs=raw_outputs) diff --git a/src/qe_tools/outputs/parsers/bands.py b/src/qe_tools/outputs/parsers/bands.py new file mode 100644 index 0000000..51162db --- /dev/null +++ b/src/qe_tools/outputs/parsers/bands.py @@ -0,0 +1,116 @@ +"""Parsers for the output of Quantum ESPRESSO bands.x.""" + +from __future__ import annotations + +import re + +import numpy as np + +from dough.outputs import BaseOutputFileParser + + +_DAT_HEADER_RE = re.compile( + r"&plot\s+nbnd\s*=\s*(?P\d+)\s*,\s*nks\s*=\s*(?P\d+)\s*/" +) +_RAP_HEADER_RE = re.compile( + r"&plot_rap\s+nbnd_rap\s*=\s*(?P\d+)\s*,\s*nks_rap\s*=\s*(?P\d+)\s*/" +) +_HIGH_SYM_RE = re.compile( + r"high-symmetry point:\s*" + r"(?P[\-\d.]+)\s+(?P[\-\d.]+)\s+(?P[\-\d.]+)\s+" + r"x coordinate\s+(?P[\-\d.]+)" +) + + +def _parse_plot_header( + content: str, header_re: re.Pattern, label: str +) -> tuple[int, int, str]: + """Parse a `&plot[_rap] nbnd[_rap]=..., nks[_rap]=... /` header. + + Returns `(nbnd, nks, body)` where `body` is the remaining content after the header. + """ + match = header_re.search(content) + if match is None: + raise ValueError(f"Could not parse `{label}` header from filband file.") + return int(match.group("nbnd")), int(match.group("nks")), content[match.end() :] + + +class BandsDatParser(BaseOutputFileParser): + """Parse the ``filband`` (e.g. ``MgO-bands.dat``) output of bands.x.""" + + @staticmethod + def parse(content: str) -> dict: + nbnd, nks, body = _parse_plot_header( + content, _DAT_HEADER_RE, "&plot nbnd=..., nks=... /" + ) + + tokens = np.fromstring(body, sep=" ") + + per_kpoint = 3 + nbnd + expected = nks * per_kpoint + if tokens.size != expected: + raise ValueError( + f"filband payload has {tokens.size} numbers; expected " + f"{expected} for nks={nks}, nbnd={nbnd}." + ) + block = tokens.reshape(nks, per_kpoint) + + return { + "nbnd": nbnd, + "nks": nks, + "k_points": block[:, :3], + "eigenvalues": block[:, 3:], + } + + +class BandsRapParser(BaseOutputFileParser): + """Parse the symmetry-representation ``filband.rap`` output of bands.x.""" + + @staticmethod + def parse(content: str) -> dict: + nbnd, nks, body = _parse_plot_header( + content, _RAP_HEADER_RE, "&plot_rap nbnd_rap=..., nks_rap=... /" + ) + + lines = body.strip().splitlines() + if len(lines) != 2 * nks: + raise ValueError( + f"filband.rap has {len(lines)} body lines; expected {2 * nks} " + f"(2 per k-point) for nks={nks}." + ) + + k_points = np.empty((nks, 3), dtype=float) + is_high_symmetry = np.empty(nks, dtype=bool) + representations = np.empty((nks, nbnd), dtype=int) + for ik in range(nks): + head = lines[2 * ik].split() + k_points[ik] = [float(x) for x in head[:3]] + is_high_symmetry[ik] = head[3].upper() == "T" + representations[ik] = [int(x) for x in lines[2 * ik + 1].split()] + + return { + "nbnd": nbnd, + "nks": nks, + "k_points": k_points, + "is_high_symmetry": is_high_symmetry, + "representations": representations, + } + + +class BandsStdoutParser(BaseOutputFileParser): + """Parse the stdout of bands.x for high-symmetry point markers.""" + + @staticmethod + def parse(content: str) -> dict: + matches = list(_HIGH_SYM_RE.finditer(content)) + if not matches: + return {} + return { + "high_symmetry_points": np.array( + [ + [float(m.group("kx")), float(m.group("ky")), float(m.group("kz"))] + for m in matches + ] + ), + "high_symmetry_distances": np.array([float(m.group("x")) for m in matches]), + } diff --git a/tests/outputs/fixtures/bands/mgo/MgO-bands.dat b/tests/outputs/fixtures/bands/mgo/MgO-bands.dat new file mode 100644 index 0000000..526f5bc --- /dev/null +++ b/tests/outputs/fixtures/bands/mgo/MgO-bands.dat @@ -0,0 +1,9 @@ + &plot nbnd= 3, nks= 4 / + 1.000000 0.500000 0.000000 + -10.000 2.000 5.000 + 0.500000 0.500000 0.500000 + -10.500 1.800 4.500 + 0.000000 0.000000 0.000000 + -12.000 1.000 4.000 + 1.000000 0.000000 0.000000 + -10.800 1.500 4.700 diff --git a/tests/outputs/fixtures/bands/mgo/MgO-bands.dat.rap b/tests/outputs/fixtures/bands/mgo/MgO-bands.dat.rap new file mode 100644 index 0000000..e81a92f --- /dev/null +++ b/tests/outputs/fixtures/bands/mgo/MgO-bands.dat.rap @@ -0,0 +1,9 @@ + &plot_rap nbnd_rap= 3, nks_rap= 4 / + 1.000000 0.500000 0.000000 T + 4 5 1 + 0.500000 0.500000 0.500000 F + 2 1 2 + 0.000000 0.000000 0.000000 T + 1 1 1 + 1.000000 0.000000 0.000000 T + 4 2 1 diff --git a/tests/outputs/fixtures/bands/mgo/bands.out b/tests/outputs/fixtures/bands/mgo/bands.out new file mode 100644 index 0000000..bd0d251 --- /dev/null +++ b/tests/outputs/fixtures/bands/mgo/bands.out @@ -0,0 +1,16 @@ + + Program BANDS v.7.5 starts on 1May2026 at 7:10: 3 + + This program is part of the open-source Quantum ESPRESSO suite + +--- Removed: header banner --- + + high-symmetry point: 1.0000 0.5000 0.0000 x coordinate 0.0000 + high-symmetry point: 0.0000 0.0000 0.0000 x coordinate 1.5731 + high-symmetry point: 1.0000 0.0000 0.0000 x coordinate 2.5731 + +--- Removed: per-k-point band-following diagnostics --- + + BANDS : 6.58s CPU 7.01s WALL + + JOB DONE. diff --git a/tests/outputs/test_bands_output.py b/tests/outputs/test_bands_output.py new file mode 100644 index 0000000..29b9b73 --- /dev/null +++ b/tests/outputs/test_bands_output.py @@ -0,0 +1,22 @@ +from pathlib import Path + +from qe_tools.outputs import BandsOutput + + +def test_bands_mgo(robust_data_regression_check): + bands_directory = Path(__file__).parent / "fixtures" / "bands" / "mgo" + + bands = BandsOutput.from_dir(bands_directory) + + out = bands.get_output_dict() + snapshot = { + "number_of_kpoints": out["number_of_kpoints"], + "number_of_bands": out["number_of_bands"], + "k_points": out["k_points"], + "eigenvalues": out["eigenvalues"], + "high_symmetry_points": out["high_symmetry_points"], + "high_symmetry_distances": out["high_symmetry_distances"], + "is_high_symmetry": out["is_high_symmetry"].tolist(), + "representations": out["representations"], + } + robust_data_regression_check(snapshot) diff --git a/tests/outputs/test_bands_output/test_bands_mgo.yml b/tests/outputs/test_bands_output/test_bands_mgo.yml new file mode 100644 index 0000000..0c10bc0 --- /dev/null +++ b/tests/outputs/test_bands_output/test_bands_mgo.yml @@ -0,0 +1,60 @@ +eigenvalues: +- - -10.0 + - 2.0 + - 5.0 +- - -10.5 + - 1.8 + - 4.5 +- - -12.0 + - 1.0 + - 4.0 +- - -10.8 + - 1.5 + - 4.7 +high_symmetry_distances: +- 0.0 +- 1.5731 +- 2.5731 +high_symmetry_points: +- - 1.0 + - 0.5 + - 0.0 +- - 0.0 + - 0.0 + - 0.0 +- - 1.0 + - 0.0 + - 0.0 +is_high_symmetry: +- true +- false +- true +- true +k_points: +- - 1.0 + - 0.5 + - 0.0 +- - 0.5 + - 0.5 + - 0.5 +- - 0.0 + - 0.0 + - 0.0 +- - 1.0 + - 0.0 + - 0.0 +number_of_bands: 3.0 +number_of_kpoints: 4.0 +representations: +- - 4.0 + - 5.0 + - 1.0 +- - 2.0 + - 1.0 + - 2.0 +- - 1.0 + - 1.0 + - 1.0 +- - 4.0 + - 2.0 + - 1.0 From 88a86d749b4dd626f4a5833e1460969908406eaa Mon Sep 17 00:00:00 2001 From: Marnik Bercx Date: Fri, 1 May 2026 10:08:49 +0200 Subject: [PATCH 3/3] =?UTF-8?q?=E2=9C=A8=20`ProjwfcOutput`:=20add=20output?= =?UTF-8?q?=20class=20for=20projwfc.x?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `projwfc.x` writes projected-DOS data into one file per `(atom, wavefunction)` pair following the `.pdos_atm#N(El)_wfc#M(L)[_j#J]` naming scheme, plus a `.pdos_tot` summary. Decoding the filename, parsing the column-variable header, and reshaping the per-magnetic-quantum-number columns into a usable array is fiddly enough that callers were inevitably reinventing it. Add a dedicated `ProjwfcOutput` class plus two file parsers: - `PdosAtmWfcParser` reads a single `pdos_atm#N(El)_wfc#M(L)[_j#J]` file. It handles the spin-unpolarised, spin-polarised LSDA, and spin-orbit cases by counting `ldos`-prefixed columns and reshaping the trailing `pdos`-columns into `(n_energies, [spin,] 2*l + 1)`. - `PdosTotParser` reads `.pdos_tot` into the matching `dos_total` / `pdos_total` arrays. `ProjwfcOutput.from_dir` discovers every PDOS file in a directory via `collect_pdos_files`, sorts the resulting records numerically by `(atom, wfc, j)` so systems with more than nine atoms don't get scrambled by lexical order, and skips the consumed paths when sniffing for the projwfc.x stdout. The output exposes: - `pdos`: a flat list of records keyed by `(atom, element, wfc, l, l_label[, j])` with the parsed `energies`, `ldos`, and per-magnetic-quantum-number `pdos_m` arrays. Filtering by element or `l` is a one-line list comprehension on the caller side. - `pdos_total`: dict with `energies`, `dos_total`, `pdos_total`. --- src/qe_tools/outputs/__init__.py | 2 + src/qe_tools/outputs/parsers/projwfc.py | 152 ++++++++++++++++++ src/qe_tools/outputs/projwfc.py | 125 ++++++++++++++ .../mgo/MgO-pdos.dat.pdos_atm#1(Mg)_wfc#1(s) | 6 + .../mgo/MgO-pdos.dat.pdos_atm#1(Mg)_wfc#2(p) | 6 + .../mgo/MgO-pdos.dat.pdos_atm#2(O)_wfc#1(s) | 6 + .../mgo/MgO-pdos.dat.pdos_atm#2(O)_wfc#2(p) | 6 + .../projwfc/mgo/MgO-pdos.dat.pdos_tot | 6 + .../outputs/fixtures/projwfc/mgo/projwfc.out | 10 ++ tests/outputs/parsers/test_projwfc_parser.py | 62 +++++++ tests/outputs/test_projwfc_output.py | 17 ++ .../test_projwfc_output/test_projwfc_mgo.yml | 133 +++++++++++++++ 12 files changed, 531 insertions(+) create mode 100644 src/qe_tools/outputs/parsers/projwfc.py create mode 100644 src/qe_tools/outputs/projwfc.py create mode 100644 tests/outputs/fixtures/projwfc/mgo/MgO-pdos.dat.pdos_atm#1(Mg)_wfc#1(s) create mode 100644 tests/outputs/fixtures/projwfc/mgo/MgO-pdos.dat.pdos_atm#1(Mg)_wfc#2(p) create mode 100644 tests/outputs/fixtures/projwfc/mgo/MgO-pdos.dat.pdos_atm#2(O)_wfc#1(s) create mode 100644 tests/outputs/fixtures/projwfc/mgo/MgO-pdos.dat.pdos_atm#2(O)_wfc#2(p) create mode 100644 tests/outputs/fixtures/projwfc/mgo/MgO-pdos.dat.pdos_tot create mode 100644 tests/outputs/fixtures/projwfc/mgo/projwfc.out create mode 100644 tests/outputs/parsers/test_projwfc_parser.py create mode 100644 tests/outputs/test_projwfc_output.py create mode 100644 tests/outputs/test_projwfc_output/test_projwfc_mgo.yml diff --git a/src/qe_tools/outputs/__init__.py b/src/qe_tools/outputs/__init__.py index 461e9ef..6ca8a06 100644 --- a/src/qe_tools/outputs/__init__.py +++ b/src/qe_tools/outputs/__init__.py @@ -1,9 +1,11 @@ from .pw import PwOutput from .dos import DosOutput from .bands import BandsOutput +from .projwfc import ProjwfcOutput __all__ = ( "PwOutput", "DosOutput", "BandsOutput", + "ProjwfcOutput", ) diff --git a/src/qe_tools/outputs/parsers/projwfc.py b/src/qe_tools/outputs/parsers/projwfc.py new file mode 100644 index 0000000..cff57c0 --- /dev/null +++ b/src/qe_tools/outputs/parsers/projwfc.py @@ -0,0 +1,152 @@ +"""Parsers for the output of Quantum ESPRESSO projwfc.x.""" + +from __future__ import annotations + +import re +from io import StringIO +from pathlib import Path + +import numpy as np + +from dough.outputs import BaseOutputFileParser + + +_L_FROM_LETTER = {"s": 0, "p": 1, "d": 2, "f": 3, "g": 4} + +_PDOS_ATM_RE = re.compile( + r"\.pdos_atm#(?P\d+)\((?P[A-Za-z][A-Za-z0-9]*)\)" + r"_wfc#(?P\d+)\((?P[spdfg])\)" + r"(?:_j#(?P[\d.]+))?$" +) + + +def _split_columns(content: str) -> tuple[list[str], np.ndarray]: + header, _, body = content.partition("\n") + if not header.startswith("#"): + raise ValueError(f"Unexpected PDOS file header: {header!r}") + columns = header.lstrip("#").split() + array = np.loadtxt(StringIO(body), ndmin=2) + return columns, array + + +class PdosAtmWfcParser(BaseOutputFileParser): + """Parse a single ``.pdos_atm#N(El)_wfc#M(L)[_j#J]`` file. + + Returned dict shape: + - `energies`: shape `(n_energies,)` + - `ldos`: shape `(n_energies,)` (spin-unpolarised) or `(n_energies, 2)` with + `[:, 0]` spin-up and `[:, 1]` spin-down (spin-polarised). + - `pdos_m`: shape `(n_energies, 2*l + 1)` (spin-unpolarised) or + `(n_energies, 2, 2*l + 1)` (spin-polarised), giving the per-magnetic-quantum-number + projection. + """ + + @staticmethod + def parse(content: str) -> dict: + columns, data = _split_columns(content) + energies = data[:, 0] + # Number of `ldos` columns tells us spin / non-collinear shape: 1 -> nospin, + # 2 -> collinear LSDA (ldos_up, ldos_dw). + n_ldos = sum(1 for col in columns[1:] if col.startswith("ldos")) + ldos = data[:, 1 : 1 + n_ldos] + pdos_m = data[:, 1 + n_ldos :] + + if n_ldos == 1: + return { + "energies": energies, + "ldos": ldos[:, 0], + "pdos_m": pdos_m, + } + # Spin-polarised: pdos_m has 2*(2l+1) columns, interleaved (m1up, m1dw, ...). + # QE writes them grouped per spin per m: convention is up_m1, dw_m1, up_m2, ... + # We reshape to `(n_energies, 2, 2l+1)`. + n_m = pdos_m.shape[1] // 2 + pdos_m = pdos_m.reshape(-1, n_m, 2).swapaxes(1, 2) + return { + "energies": energies, + "ldos": ldos, + "pdos_m": pdos_m, + } + + +class PdosTotParser(BaseOutputFileParser): + """Parse a ``.pdos_tot`` file. + + Returned dict: + - `energies`: shape `(n_energies,)` + - `dos_total`: total DOS (states/eV); shape `(n_energies,)` or `(n_energies, 2)` for + spin-polarised LSDA (`[:, 0]` spin-up, `[:, 1]` spin-down). + - `pdos_total`: sum over all atomic-orbital projections; same shape as `dos_total`. + """ + + @staticmethod + def parse(content: str) -> dict: + columns, data = _split_columns(content) + energies = data[:, 0] + rest = columns[1:] + n_dos = sum(1 for col in rest if col.startswith("dos")) + n_pdos = sum(1 for col in rest if col.startswith("pdos")) + dos = data[:, 1 : 1 + n_dos] + pdos = data[:, 1 + n_dos : 1 + n_dos + n_pdos] + if n_dos == 1: + dos = dos[:, 0] + if n_pdos == 1: + pdos = pdos[:, 0] + return { + "energies": energies, + "dos_total": dos, + "pdos_total": pdos, + } + + +def parse_pdos_filename(name: str) -> dict | None: + """Extract `(atom, element, wfc, l, j)` from a `.pdos_atm#...` filename. + + Returns `None` if the filename does not match the projwfc PDOS naming scheme. + """ + match = _PDOS_ATM_RE.search(name) + if match is None: + return None + info = { + "atom": int(match.group("atom")), + "element": match.group("element"), + "wfc": int(match.group("wfc")), + "l": _L_FROM_LETTER[match.group("l")], + "l_label": match.group("l"), + } + if match.group("j"): + info["j"] = float(match.group("j")) + return info + + +def collect_pdos_files( + directory: Path, +) -> tuple[list[dict], dict | None, set[Path]]: + """Discover and parse all PDOS files in `directory`. + + Returns: + - list of records, one per `pdos_atm#N(El)_wfc#M(L)[_j#J]` file. Each record carries + the parsed identifiers (atom index, element, wfc index, l, optional j) and the + parsed numerical arrays (`energies`, `ldos`, `pdos_m`). Sorted by `(atom, wfc, j)`. + - the parsed `pdos_tot` dict (or `None` if the file is missing). + - the set of paths consumed (so callers can skip them when looking at the rest of the + directory, e.g. to find the projwfc.x stdout). + """ + records: list[dict] = [] + total: dict | None = None + consumed: set[Path] = set() + for path in directory.iterdir(): + if not path.is_file(): + continue + if path.name.endswith(".pdos_tot"): + total = PdosTotParser.parse_from_file(path) + consumed.add(path) + continue + info = parse_pdos_filename(path.name) + if info is None: + continue + info.update(PdosAtmWfcParser.parse_from_file(path)) + records.append(info) + consumed.add(path) + records.sort(key=lambda r: (r["atom"], r["wfc"], r.get("j", 0.0))) + return records, total, consumed diff --git a/src/qe_tools/outputs/projwfc.py b/src/qe_tools/outputs/projwfc.py new file mode 100644 index 0000000..858dc67 --- /dev/null +++ b/src/qe_tools/outputs/projwfc.py @@ -0,0 +1,125 @@ +"""Output of the Quantum ESPRESSO projwfc.x code.""" + +import typing +from pathlib import Path +from typing import Annotated, TextIO + +from glom import Spec + +from dough.outputs import BaseOutput, output_mapping + +from .parsers.projwfc import ( + PdosAtmWfcParser, + PdosTotParser, + collect_pdos_files, + parse_pdos_filename, +) +from .parsers.stdout import BaseStdoutParser + + +@output_mapping +class _ProjwfcMapping: + """Typed outputs of a projwfc.x calculation.""" + + pdos: Annotated[list, Spec("pdos_records")] + """Per-(atom, wavefunction) projected DOS records. + + A flat list of dicts; one entry per `.pdos_atm#N(El)_wfc#M(L)[_j#J]` file. + Each record has the keys: + + - `atom` (int): 1-based atom index in the unit cell + - `element` (str): element symbol (e.g. `"Mg"`, `"O"`) + - `wfc` (int): 1-based wavefunction (radial-channel) index for this atom + - `l` (int): orbital angular momentum quantum number (`0=s`, `1=p`, `2=d`, `3=f`) + - `l_label` (str): single-letter label for `l` (`"s"`, `"p"`, ...) + - `j` (float, optional): total angular momentum, only present for spin-orbit runs + - `energies`: numpy array of shape `(n_energies,)` in eV + - `ldos`: per-l projected DOS (states/eV). Shape `(n_energies,)` for + spin-unpolarised runs, `(n_energies, 2)` for spin-polarised LSDA with + `[:, 0]` spin-up and `[:, 1]` spin-down. + - `pdos_m`: per-magnetic-quantum-number projection (states/eV). Shape + `(n_energies, 2*l + 1)` for spin-unpolarised runs, `(n_energies, 2, 2*l + 1)` + for spin-polarised LSDA with axis 1 the spin channel and axis 2 the + magnetic quantum number `m = -l, ..., +l` in QE order. + + Filter the list with a comprehension to get the projection you want, e.g. + `[r for r in pdos if r["element"] == "Mg" and r["l_label"] == "p"]`. + """ + + pdos_total: Annotated[dict, Spec("pdos_total")] + """Total DOS and total projected DOS, parsed from `.pdos_tot`. + + Dict with keys: + + - `energies`: numpy array of shape `(n_energies,)` in eV + - `dos_total`: total DOS (states/eV); shape `(n_energies,)` or `(n_energies, 2)` for + spin-polarised LSDA (`[:, 0]` spin-up, `[:, 1]` spin-down). + - `pdos_total`: sum of all atomic-orbital projections; same shape as `dos_total`. + """ + + +class ProjwfcOutput(BaseOutput[_ProjwfcMapping]): + """Output of the Quantum ESPRESSO projwfc.x code.""" + + converters: typing.ClassVar[dict] = {} + + @classmethod + def from_dir(cls, directory: str | Path): + """Locate and parse all `.pdos_*` files plus `projwfc.x` stdout in `directory`.""" + directory = Path(directory) + + if not directory.is_dir(): + raise ValueError(f"Path `{directory}` is not a valid directory.") + + records, total, consumed = collect_pdos_files(directory) + + stdout_file = None + for file in directory.iterdir(): + if not file.is_file() or file in consumed: + continue + with file.open("r") as handle: + header = "".join(handle.readlines(5)) + if "Program PROJWFC" in header: + stdout_file = file + break + + raw_outputs: dict = {"pdos_records": records, "pdos_total": total} + if stdout_file is not None: + raw_outputs["stdout"] = BaseStdoutParser.parse_from_file(stdout_file) + + return cls(raw_outputs=raw_outputs) + + @classmethod + def from_files( + cls, + *, + pdos_files: typing.Iterable[str | Path] = (), + pdos_tot: None | str | Path | TextIO = None, + stdout: None | str | Path | TextIO = None, + ): + """Parse the outputs from explicit file lists. + + - `pdos_files`: iterable of `.pdos_atm#N(El)_wfc#M(L)[_j#J]` paths + - `pdos_tot`: `.pdos_tot` path + - `stdout`: projwfc.x standard output + """ + records: list[dict] = [] + for path in pdos_files: + path = Path(path) + info = parse_pdos_filename(path.name) + if info is None: + raise ValueError( + f"`{path.name}` does not match the projwfc PDOS naming scheme." + ) + info.update(PdosAtmWfcParser.parse_from_file(path)) + records.append(info) + + raw_outputs: dict = { + "pdos_records": records, + "pdos_total": PdosTotParser.parse_from_file(pdos_tot) + if pdos_tot is not None + else None, + } + if stdout is not None: + raw_outputs["stdout"] = BaseStdoutParser.parse_from_file(stdout) + return cls(raw_outputs=raw_outputs) diff --git a/tests/outputs/fixtures/projwfc/mgo/MgO-pdos.dat.pdos_atm#1(Mg)_wfc#1(s) b/tests/outputs/fixtures/projwfc/mgo/MgO-pdos.dat.pdos_atm#1(Mg)_wfc#1(s) new file mode 100644 index 0000000..1c946c6 --- /dev/null +++ b/tests/outputs/fixtures/projwfc/mgo/MgO-pdos.dat.pdos_atm#1(Mg)_wfc#1(s) @@ -0,0 +1,6 @@ +# E (eV) ldos(E) pdos(E) + -11.793 0.000E+00 0.000E+00 + -11.783 0.260E-02 0.260E-02 + -11.773 0.520E-02 0.520E-02 + -11.763 0.780E-02 0.780E-02 + -11.753 0.104E-01 0.104E-01 diff --git a/tests/outputs/fixtures/projwfc/mgo/MgO-pdos.dat.pdos_atm#1(Mg)_wfc#2(p) b/tests/outputs/fixtures/projwfc/mgo/MgO-pdos.dat.pdos_atm#1(Mg)_wfc#2(p) new file mode 100644 index 0000000..059f9c5 --- /dev/null +++ b/tests/outputs/fixtures/projwfc/mgo/MgO-pdos.dat.pdos_atm#1(Mg)_wfc#2(p) @@ -0,0 +1,6 @@ +# E (eV) ldos(E) pdos(E) pdos(E) pdos(E) + -11.793 0.000E+00 0.000E+00 0.000E+00 0.000E+00 + -11.783 0.118E-03 0.393E-04 0.393E-04 0.393E-04 + -11.773 0.236E-03 0.787E-04 0.787E-04 0.787E-04 + -11.763 0.354E-03 0.118E-03 0.118E-03 0.118E-03 + -11.753 0.472E-03 0.157E-03 0.157E-03 0.157E-03 diff --git a/tests/outputs/fixtures/projwfc/mgo/MgO-pdos.dat.pdos_atm#2(O)_wfc#1(s) b/tests/outputs/fixtures/projwfc/mgo/MgO-pdos.dat.pdos_atm#2(O)_wfc#1(s) new file mode 100644 index 0000000..77d7bf6 --- /dev/null +++ b/tests/outputs/fixtures/projwfc/mgo/MgO-pdos.dat.pdos_atm#2(O)_wfc#1(s) @@ -0,0 +1,6 @@ +# E (eV) ldos(E) pdos(E) + -11.793 0.000E+00 0.000E+00 + -11.783 0.150E-02 0.150E-02 + -11.773 0.300E-02 0.300E-02 + -11.763 0.450E-02 0.450E-02 + -11.753 0.600E-02 0.600E-02 diff --git a/tests/outputs/fixtures/projwfc/mgo/MgO-pdos.dat.pdos_atm#2(O)_wfc#2(p) b/tests/outputs/fixtures/projwfc/mgo/MgO-pdos.dat.pdos_atm#2(O)_wfc#2(p) new file mode 100644 index 0000000..15dd8f7 --- /dev/null +++ b/tests/outputs/fixtures/projwfc/mgo/MgO-pdos.dat.pdos_atm#2(O)_wfc#2(p) @@ -0,0 +1,6 @@ +# E (eV) ldos(E) pdos(E) pdos(E) pdos(E) + -11.793 0.000E+00 0.000E+00 0.000E+00 0.000E+00 + -11.783 0.300E-03 0.100E-03 0.100E-03 0.100E-03 + -11.773 0.600E-03 0.200E-03 0.200E-03 0.200E-03 + -11.763 0.900E-03 0.300E-03 0.300E-03 0.300E-03 + -11.753 0.120E-02 0.400E-03 0.400E-03 0.400E-03 diff --git a/tests/outputs/fixtures/projwfc/mgo/MgO-pdos.dat.pdos_tot b/tests/outputs/fixtures/projwfc/mgo/MgO-pdos.dat.pdos_tot new file mode 100644 index 0000000..f184902 --- /dev/null +++ b/tests/outputs/fixtures/projwfc/mgo/MgO-pdos.dat.pdos_tot @@ -0,0 +1,6 @@ +# E (eV) dos(E) pdos(E) + -11.793 0.000E+00 0.000E+00 + -11.783 0.671E-02 0.671E-02 + -11.773 0.134E-01 0.134E-01 + -11.763 0.201E-01 0.201E-01 + -11.753 0.268E-01 0.268E-01 diff --git a/tests/outputs/fixtures/projwfc/mgo/projwfc.out b/tests/outputs/fixtures/projwfc/mgo/projwfc.out new file mode 100644 index 0000000..7f9d089 --- /dev/null +++ b/tests/outputs/fixtures/projwfc/mgo/projwfc.out @@ -0,0 +1,10 @@ + + Program PROJWFC v.7.5 starts on 1May2026 at 7:10:18 + + This program is part of the open-source Quantum ESPRESSO suite + +--- Removed: header banner, projection table --- + + PROJWFC : 0.50s CPU 0.55s WALL + + JOB DONE. diff --git a/tests/outputs/parsers/test_projwfc_parser.py b/tests/outputs/parsers/test_projwfc_parser.py new file mode 100644 index 0000000..66a0425 --- /dev/null +++ b/tests/outputs/parsers/test_projwfc_parser.py @@ -0,0 +1,62 @@ +"""Pin the projwfc PDOS column-decoding conventions.""" + +from __future__ import annotations + +import numpy as np + +from qe_tools.outputs.parsers.projwfc import PdosAtmWfcParser, PdosTotParser + + +def test_pdos_atm_wfc_unpolarised_p(): + """Spin-unpolarised `l=1` (p) PDOS file: 1 ldos col + 3 m cols.""" + content = ( + "# E (eV) ldos(E) pdos(E) pdos(E) pdos(E)\n" + " -1.000 0.300 0.100 0.100 0.100\n" + " 0.000 0.600 0.200 0.200 0.200\n" + " 1.000 0.900 0.300 0.300 0.300\n" + ) + parsed = PdosAtmWfcParser.parse(content) + + np.testing.assert_array_equal(parsed["energies"], [-1.0, 0.0, 1.0]) + np.testing.assert_array_equal(parsed["ldos"], [0.3, 0.6, 0.9]) + np.testing.assert_array_equal( + parsed["pdos_m"], + [[0.1, 0.1, 0.1], [0.2, 0.2, 0.2], [0.3, 0.3, 0.3]], + ) + + +def test_pdos_atm_wfc_spin_polarised_reshape_axes(): + """Spin-polarised `l=1` PDOS: pin the (n_e, spin, m) axis order. + + QE writes the columns as `up_m1, dw_m1, up_m2, dw_m2, up_m3, dw_m3`. + We expect `pdos_m[:, spin, m]` with `spin=0` for up, `spin=1` for down. + """ + # ldos_up, ldos_dw, then 6 m-columns interleaved per spin + content = ( + "# E (eV) ldosup(E) ldosdw(E) " + "pdosup(E) pdosdw(E) pdosup(E) pdosdw(E) pdosup(E) pdosdw(E)\n" + # Pick distinct values for each (spin, m) so axis order is unambiguous. + "0.0 9.0 8.0 10 20 30 40 50 60\n" + ) + parsed = PdosAtmWfcParser.parse(content) + + np.testing.assert_array_equal(parsed["ldos"], [[9.0, 8.0]]) + assert parsed["pdos_m"].shape == (1, 2, 3) + # Spin-up (axis 1 = 0) follows the up_m1, up_m2, up_m3 columns. + np.testing.assert_array_equal(parsed["pdos_m"][0, 0, :], [10, 30, 50]) + # Spin-down (axis 1 = 1) follows the dw_m1, dw_m2, dw_m3 columns. + np.testing.assert_array_equal(parsed["pdos_m"][0, 1, :], [20, 40, 60]) + + +def test_pdos_tot_spin_polarised(): + """`pdos_tot` with spin-polarised columns: dosup, dosdw, pdosup, pdosdw.""" + content = ( + "# E (eV) dosup(E) dosdw(E) pdosup(E) pdosdw(E)\n" + " -1.0 1.0 2.0 10.0 20.0\n" + " 0.0 3.0 4.0 30.0 40.0\n" + ) + parsed = PdosTotParser.parse(content) + + np.testing.assert_array_equal(parsed["energies"], [-1.0, 0.0]) + np.testing.assert_array_equal(parsed["dos_total"], [[1.0, 2.0], [3.0, 4.0]]) + np.testing.assert_array_equal(parsed["pdos_total"], [[10.0, 20.0], [30.0, 40.0]]) diff --git a/tests/outputs/test_projwfc_output.py b/tests/outputs/test_projwfc_output.py new file mode 100644 index 0000000..b80a8f5 --- /dev/null +++ b/tests/outputs/test_projwfc_output.py @@ -0,0 +1,17 @@ +from pathlib import Path + +from qe_tools.outputs import ProjwfcOutput + + +def test_projwfc_mgo(robust_data_regression_check): + projwfc_directory = Path(__file__).parent / "fixtures" / "projwfc" / "mgo" + + proj = ProjwfcOutput.from_dir(projwfc_directory) + + out = proj.get_output_dict() + snapshot = { + "n_records": len(out["pdos"]), + "records": out["pdos"], + "pdos_total": out["pdos_total"], + } + robust_data_regression_check(snapshot) diff --git a/tests/outputs/test_projwfc_output/test_projwfc_mgo.yml b/tests/outputs/test_projwfc_output/test_projwfc_mgo.yml new file mode 100644 index 0000000..db9c1d3 --- /dev/null +++ b/tests/outputs/test_projwfc_output/test_projwfc_mgo.yml @@ -0,0 +1,133 @@ +n_records: 4.0 +pdos_total: + dos_total: + - 0.0 + - 0.00671 + - 0.0134 + - 0.0201 + - 0.0268 + energies: + - -11.793 + - -11.783 + - -11.773 + - -11.763 + - -11.753 + pdos_total: + - 0.0 + - 0.00671 + - 0.0134 + - 0.0201 + - 0.0268 +records: +- atom: 1.0 + element: Mg + energies: + - -11.793 + - -11.783 + - -11.773 + - -11.763 + - -11.753 + l: 0.0 + l_label: s + ldos: + - 0.0 + - 0.0026 + - 0.0052 + - 0.0078 + - 0.0104 + pdos_m: + - - 0.0 + - - 0.0026 + - - 0.0052 + - - 0.0078 + - - 0.0104 + wfc: 1.0 +- atom: 1.0 + element: Mg + energies: + - -11.793 + - -11.783 + - -11.773 + - -11.763 + - -11.753 + l: 1.0 + l_label: p + ldos: + - 0.0 + - 0.00012 + - 0.00024 + - 0.00035 + - 0.00047 + pdos_m: + - - 0.0 + - 0.0 + - 0.0 + - - 4.0e-05 + - 4.0e-05 + - 4.0e-05 + - - 8.0e-05 + - 8.0e-05 + - 8.0e-05 + - - 0.00012 + - 0.00012 + - 0.00012 + - - 0.00016 + - 0.00016 + - 0.00016 + wfc: 2.0 +- atom: 2.0 + element: O + energies: + - -11.793 + - -11.783 + - -11.773 + - -11.763 + - -11.753 + l: 0.0 + l_label: s + ldos: + - 0.0 + - 0.0015 + - 0.003 + - 0.0045 + - 0.006 + pdos_m: + - - 0.0 + - - 0.0015 + - - 0.003 + - - 0.0045 + - - 0.006 + wfc: 1.0 +- atom: 2.0 + element: O + energies: + - -11.793 + - -11.783 + - -11.773 + - -11.763 + - -11.753 + l: 1.0 + l_label: p + ldos: + - 0.0 + - 0.0003 + - 0.0006 + - 0.0009 + - 0.0012 + pdos_m: + - - 0.0 + - 0.0 + - 0.0 + - - 0.0001 + - 0.0001 + - 0.0001 + - - 0.0002 + - 0.0002 + - 0.0002 + - - 0.0003 + - 0.0003 + - 0.0003 + - - 0.0004 + - 0.0004 + - 0.0004 + wfc: 2.0