Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Custom formatter can't reformat dimensionless unit #2024

Closed
aulemahal opened this issue Jun 27, 2024 · 8 comments
Closed

Custom formatter can't reformat dimensionless unit #2024

aulemahal opened this issue Jun 27, 2024 · 8 comments

Comments

@aulemahal
Copy link
Contributor

aulemahal commented Jun 27, 2024

MWE:

import pint

@pint.register_unit_format("abc")
def short_formatter(unit, registry, **options):
    unit = registry.Unit(unit)
    return f"{unit:~D}"

u = pint.Unit("")
f"{u:abc}"

raises: "KeyError: 'dimensionless'"

Full traceback:

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Cell In[31], line 1
----> 1 f"{u:ty}"

File ~/miniforge3/envs/xclim/lib/python3.12/site-packages/pint/facets/plain/unit.py:67, in PlainUnit.__format__(self, spec)
     66 def __format__(self, spec: str) -> str:
---> 67     return self._REGISTRY.formatter.format_unit(self, spec)

File ~/miniforge3/envs/xclim/lib/python3.12/site-packages/pint/delegates/formatter/full.py:137, in FullFormatter.format_unit(self, unit, uspec, sort_func, **babel_kwds)
    135 uspec = uspec or self.default_format
    136 sort_func = sort_func or self.default_sort_func
--> 137 return self.get_formatter(uspec).format_unit(
    138     unit, uspec, sort_func=sort_func, **babel_kwds
    139 )

File ~/miniforge3/envs/xclim/lib/python3.12/site-packages/pint/delegates/formatter/_to_register.py:104, in register_unit_format.<locals>.wrapper.<locals>.NewFormatter.format_unit(self, unit, uspec, **babel_kwds)
    101 else:
    102     units = self._registry.UnitsContainer(numerator)
--> 104 return func(units, registry=self._registry)

Cell In[26], line 5, in short_formatter(unit, registry, **options)
      3 unit = registry.Unit(unit)
      4 # Print units using abbreviations (millimeter -> mm)
----> 5 return f"{unit:~D}"

File ~/miniforge3/envs/xclim/lib/python3.12/site-packages/pint/facets/plain/unit.py:67, in PlainUnit.__format__(self, spec)
     66 def __format__(self, spec: str) -> str:
---> 67     return self._REGISTRY.formatter.format_unit(self, spec)

File ~/miniforge3/envs/xclim/lib/python3.12/site-packages/pint/delegates/formatter/full.py:137, in FullFormatter.format_unit(self, unit, uspec, sort_func, **babel_kwds)
    135 uspec = uspec or self.default_format
    136 sort_func = sort_func or self.default_sort_func
--> 137 return self.get_formatter(uspec).format_unit(
    138     unit, uspec, sort_func=sort_func, **babel_kwds
    139 )

File ~/miniforge3/envs/xclim/lib/python3.12/site-packages/pint/delegates/formatter/plain.py:90, in DefaultFormatter.format_unit(self, unit, uspec, sort_func, **babel_kwds)
     79 def format_unit(
     80     self,
     81     unit: PlainUnit | Iterable[tuple[str, Any]],
   (...)
     84     **babel_kwds: Unpack[BabelKwds],
     85 ) -> str:
     86     """Format a unit (can be compound) into string
     87     given a string formatting specification and locale related arguments.
     88     """
---> 90     numerator, denominator = prepare_compount_unit(
     91         unit,
     92         uspec,
     93         sort_func=sort_func,
     94         **babel_kwds,
     95         registry=self._registry,
     96     )
     98     if babel_kwds.get("locale", None):
     99         length = babel_kwds.get("length") or ("short" if "~" in uspec else "long")

File ~/miniforge3/envs/xclim/lib/python3.12/site-packages/pint/delegates/formatter/_compound_unit_helpers.py:298, in prepare_compount_unit(unit, spec, sort_func, use_plural, length, locale, as_ratio, registry)
    296 if locale is None:
    297     if sort_func is not None:
--> 298         numerator = sort_func(numerator, registry)
    299         denominator = sort_func(denominator, registry)
    301     return map(extract2, numerator), map(extract2, denominator)

File ~/miniforge3/envs/xclim/lib/python3.12/site-packages/pint/delegates/formatter/_compound_unit_helpers.py:190, in sort_by_unit_name(items, _registry)
    187 def sort_by_unit_name(
    188     items: Iterable[tuple[str, Number, str]], _registry: UnitRegistry | None
    189 ) -> Iterable[tuple[str, Number, str]]:
--> 190     return sorted(items, key=lambda el: el[2])

File ~/miniforge3/envs/xclim/lib/python3.12/site-packages/pint/delegates/formatter/_compound_unit_helpers.py:161, in to_symbol_exponent_name(el, registry)
    157 def to_symbol_exponent_name(
    158     el: tuple[str, T], registry: UnitRegistry
    159 ) -> tuple[str, T, str]:
    160     """Convert unit name and exponent to unit symbol as display name, exponent and unit name."""
--> 161     return registry._get_symbol(el[0]), el[1], el[0]

File ~/miniforge3/envs/xclim/lib/python3.12/site-packages/pint/facets/plain/registry.py:696, in GenericPlainRegistry._get_symbol(self, name)
    695 def _get_symbol(self, name: str) -> str:
--> 696     return self._units[name].symbol

File ~/miniforge3/envs/xclim/lib/python3.12/collections/__init__.py:1015, in ChainMap.__getitem__(self, key)
   1013     except KeyError:
   1014         pass
-> 1015 return self.__missing__(key)

File ~/miniforge3/envs/xclim/lib/python3.12/collections/__init__.py:1007, in ChainMap.__missing__(self, key)
   1006 def __missing__(self, key):
-> 1007     raise KeyError(key)

KeyError: 'dimensionless'

To help the debug:

@pint.register_unit_format("abc")
def short_formatter(unit, registry, **options):
    print('unit received by formatter', type(unit), unit)
    print('registry', registry)
    unit = registry.Unit(unit)
    print('unit after registry conversion', type(unit), unit)
    return f"{unit:~D}"

u = pint.Unit("")
print('unit at creation', type(u), u)
f"{u:abc}"

prints

unit at creation <class 'pint.registry.Unit'> dimensionless
unit received by formatter <class 'pint.util.UnitsContainer'> dimensionless
registry <pint.registry.UnitRegistry object at 0x7fe882a2d190>
unit after registry conversion <class 'pint.Unit'> dimensionless

.

@aulemahal aulemahal changed the title Custom formatter receives registry missing dimensionless unit Custom formatter can't reformat dimensionless unit Jun 27, 2024
@aulemahal
Copy link
Contributor Author

I think I found the error source, but I don't know how to fix.

The custom formatter is called by NewFormatter, a class declared in the wrapper itself. This class has the format_unit method which prepares the unit with pint.delegates.formatter._compound_unit_helpers.prepare_compount_unit. In the case of a dimensionless quantity, that last function has a conditional I don't understand:

if "~" in spec:
return ([], [])
else:
return ([("dimensionless", 1)], [])

So with a spec without a "~", the first output of prepare_compount_unit is [("dimensionless", 1)]. NewFormatter goes on and wraps this into a UnitsContainer :

units = self._registry.UnitsContainer(numerator)

The resulting container prints (repr) as <UnitsContainer({'dimensionless': 1})>. However, if we go back to the initial unit, its container rather prints as repr(u._units) <UnitsContainer({})>. I thus assume the first UnitsContainer, the one sent by NewFormatter to the custom function, is wrong.

@andrewgsavage
Copy link
Collaborator

Adding return ([("dimensionless", 1)], []) was the most obvious way I saw to get the formatters to return "dimensionless" as the longform. I'm not sure what the previous behaviour was.

Why do you need to create a unit inside your custom formatter?

@aulemahal
Copy link
Contributor Author

Because the custom formatter builds on the default formatter. For calling the default one it needs a Unit object.

But I guess one solution would be to copy the formatter call from the default formatter instead of actually calling it. Hopefully, that solution would work with previous pint versions as well.

@hgrecco
Copy link
Owner

hgrecco commented Jun 28, 2024

@aulemahal Can you provide a little bit more in detail you use case? I do not understand it too well.

@aulemahal
Copy link
Contributor Author

My usecase is this : https://github.com/xarray-contrib/cf-xarray/blob/c511cc5624ef0a217e83cb0f820a7acd16b877ae/cf_xarray/units.py#L19 I myself didn't write this function, but I think the code comes from a time where this was done outside of pint, not as a custom formatter, which might explain why it looks weird.

To make things simple we begin by format the units using the short default format ("~D"). Then, using regex we split the result into its components to remove the division and exponentiation signs. Then remove the multiplication signs, replace "Δ" with "delta" and "percent" with "%".

I had not re-read the exact way this was written and I now realize this is much more complex than re-writing the units directly from the container. The only issue I foresee is that we would want the "cf" formatter to always act as a short one, as if "~cf" was written.

@hgrecco
Copy link
Owner

hgrecco commented Jun 28, 2024

If you are sure that the input is unit, then recreating is not necessary.

You can also subclass the existing formatter, this would be a direct replacement to your (I haven't tried yet)

class MyFormatter(DefaultFormatter):
    def format_unit(
        self,
        unit: PlainUnit | Iterable[tuple[str, Any]],
        uspec: str = "",
        sort_func: SortFunc | None = None,
        **babel_kwds: Unpack[BabelKwds],
    ) -> str:
        """Format a unit (can be compound) into string
        given a string formatting specification and locale related arguments.
        """
        
        if "~" not in uspec:
            uspec = "~" + uspec

        s = super().format_unit(unit, uspec, sort_func, **babal_kwds)

        # Search and replace patterns
        pat = r"(?P<inverse>(?:1 )?/ )?(?P<unit>\w+)(?: \*\* (?P<pow>\d))?"

        def repl(m):
            i, u, p = m.groups()
            p = p or (1 if i else "")
            neg = "-" if i else ""

            return f"{u}{neg}{p}"

        out, n = re.subn(pat, repl, s)

        # Remove multiplications
        out = out.replace(" * ", " ")
        # Delta degrees:
        out = out.replace("Δ°", "delta_deg")
        return out.replace("percent", "%")

# we need a better way here
ureg.formatter._formatters["cf"] = MyFormatter

and then if you look into the DefaultFormatter you can actually directly copy the parse_unit and avoid calling the super function.

see here

@hgrecco
Copy link
Owner

hgrecco commented Jun 28, 2024

By the way, I want to change the prepare_compount_unit function to stop accepting spec. The only spec that is used now is ~. So I would rather have a boolean flag about using the short version.

@aulemahal
Copy link
Contributor Author

Closing this as it makes more sense to fix the issue in cf-xarray.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants