Skip to content

Commit

Permalink
Merge pull request #140 from fvaleye/fix/rapl-negative-value
Browse files Browse the repository at this point in the history
Fix negative value with RAPL wrap-around for Linux hardware
  • Loading branch information
fvaleye authored Mar 7, 2023
2 parents 701884e + e6445a7 commit efc0665
Show file tree
Hide file tree
Showing 13 changed files with 174 additions and 163 deletions.
237 changes: 112 additions & 125 deletions poetry.lock

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
65532610987
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
65532610987
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
65532610987
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
65532610987
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
65532610987
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
65532610987
Original file line number Diff line number Diff line change
@@ -1 +1 @@
50000
70000
41 changes: 25 additions & 16 deletions tests/hardwares/test_rapl.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,19 +47,28 @@ def by_energy_uj(rapl_result: RAPLResult) -> str:
@pytest.mark.asyncio
@pytest.mark.linux
@pytest.mark.darwin
async def test_get_rapl_power_usage_max_when_0():
async def test_get_rapl_power_wrap_around_when_0():
path = f"{pathlib.Path(__file__).parent.resolve()}/data/intel-rapl2"
two_seconds_ago = datetime.datetime.now() - datetime.timedelta(seconds=2)
rapl_separator_for_windows = "T"
rapl_results = dict()
rapl_results["package-0"] = RAPLResult(
name="package", energy_uj=2, max_energy_uj=70000, timestamp=two_seconds_ago
)
rapl_results["core"] = RAPLResult(
name="core", energy_uj=1, max_energy_uj=70000, timestamp=two_seconds_ago
)
rapl = RAPL(
path=path, rapl_separator=rapl_separator_for_windows, rapl_results=rapl_results
)
host_energy_usage_expected = 35
cpu_energy_usage_expected = 35

rapl_results = await RAPL(
path=path, rapl_separator=rapl_separator_for_windows
).get_rapl_power_usage()
assert rapl_results[0].name == "package-0"
assert rapl_results[0].energy_uj == 70000.0
assert rapl_results[0].timestamp is not None
assert rapl_results[1].name == "core"
assert rapl_results[1].energy_uj == 50000.0
assert rapl_results[1].timestamp is not None
energy_report = await rapl.get_energy_report()
energy_report.convert_unit(EnergyUsageUnit.MILLIWATT)
assert round(energy_report.host_energy_usage, 0) == host_energy_usage_expected
assert round(energy_report.cpu_energy_usage, 0) == cpu_energy_usage_expected
assert energy_report.memory_energy_usage is None


@pytest.mark.asyncio
Expand All @@ -71,19 +80,19 @@ async def test_get_total_uj_one_call():
one_minute_ago = datetime.datetime.now() - datetime.timedelta(seconds=60)
rapl_results = dict()
rapl_results["package-0"] = RAPLResult(
name="package", energy_uj=50000, timestamp=one_minute_ago
name="package", energy_uj=50000, max_energy_uj=70000, timestamp=one_minute_ago
)
rapl_results["core"] = RAPLResult(
name="core", energy_uj=40000, timestamp=one_minute_ago
name="core", energy_uj=40000, max_energy_uj=70000, timestamp=one_minute_ago
)
rapl = RAPL(
path=path, rapl_separator=rapl_separator_for_windows, rapl_results=rapl_results
)
host_energy_usage_expected = 0.333
cpu_energy_usage_expected = 0.167
host_energy_usage_expected = 0.33
cpu_energy_usage_expected = 0.5

energy_report = await rapl.get_energy_report()
energy_report.convert_unit(EnergyUsageUnit.MILLIWATT)
assert round(energy_report.host_energy_usage, 3) == host_energy_usage_expected
assert round(energy_report.cpu_energy_usage, 3) == cpu_energy_usage_expected
assert round(energy_report.host_energy_usage, 2) == host_energy_usage_expected
assert round(energy_report.cpu_energy_usage, 2) == cpu_energy_usage_expected
assert energy_report.memory_energy_usage is None
2 changes: 2 additions & 0 deletions tracarbon/exporters/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pydantic import BaseModel

from tracarbon.hardwares.hardware import HardwareInfo
from tracarbon.locations import Location


class Exporter(BaseModel, metaclass=ABCMeta):
Expand Down Expand Up @@ -126,6 +127,7 @@ class MetricGenerator(BaseModel):

metrics: List[Metric]
platform: str = HardwareInfo.get_platform()
location: Optional[Location] = None

async def generate(self) -> AsyncGenerator[Metric, None]:
"""
Expand Down
4 changes: 0 additions & 4 deletions tracarbon/general_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ class EnergyConsumptionGenerator(MetricGenerator):
Energy consumption generator for energy consumption.
"""

location: Location
energy_consumption: EnergyConsumption

def __init__(self, location: Location, **data: Any) -> None:
Expand Down Expand Up @@ -50,7 +49,6 @@ class CarbonEmissionGenerator(MetricGenerator):
Carbon emission generator to generate carbon emissions.
"""

location: Location
carbon_emission: CarbonEmission
co2signal_api_key: Optional[str] = None

Expand Down Expand Up @@ -98,7 +96,6 @@ class EnergyConsumptionKubernetesGenerator(MetricGenerator):
Energy consumption generator for energy consumption of the containers.
"""

location: Location
energy_consumption: EnergyConsumption
kubernetes: Kubernetes

Expand Down Expand Up @@ -148,7 +145,6 @@ class CarbonEmissionKubernetesGenerator(MetricGenerator):
Carbon emission generator to generate carbon emissions of the containers.
"""

location: Location
carbon_emission: CarbonEmission
kubernetes: Kubernetes
co2signal_api_key: Optional[str] = None
Expand Down
23 changes: 16 additions & 7 deletions tracarbon/hardwares/rapl.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class RAPLResult(BaseModel):

name: str
energy_uj: float
max_energy_uj: float
timestamp: datetime


Expand Down Expand Up @@ -82,15 +83,16 @@ async def get_rapl_power_usage(self) -> List[RAPLResult]:
f"{file_path}/energy_uj", "r"
) as rapl_energy:
energy_uj = float(await rapl_energy.read())
max_energy_uj_value_reached = energy_uj < 1
if max_energy_uj_value_reached:
async with aiofiles.open(
f"{file_path}/max_energy_range_uj", "r"
) as max_energy_rapl_file:
energy_uj = float(await max_energy_rapl_file.read())
async with aiofiles.open(
f"{file_path}/max_energy_range_uj", "r"
) as rapl_max_energy:
max_energy_uj = float(await rapl_max_energy.read())
rapl_results.append(
RAPLResult(
name=name, energy_uj=energy_uj, timestamp=datetime.now()
name=name,
energy_uj=energy_uj,
max_energy_uj=max_energy_uj,
timestamp=datetime.now(),
)
)
except Exception as exception:
Expand All @@ -115,6 +117,13 @@ async def get_energy_report(self) -> EnergyUsage:
time_difference = (
rapl_result.timestamp - previous_rapl_result.timestamp
).total_seconds()
if previous_rapl_result.energy_uj > rapl_result.energy_uj:
logger.debug(
f"Wrap-around detected in RAPL {rapl_result.name}. The current RAPL energy value ({rapl_result.energy_uj}) is lower than previous value ({previous_rapl_result.energy_uj})."
)
rapl_result.energy_uj = (
rapl_result.energy_uj + rapl_result.max_energy_uj
)
watts = Power.watts_from_microjoules(
(
(rapl_result.energy_uj - previous_rapl_result.energy_uj)
Expand Down
22 changes: 12 additions & 10 deletions tracarbon/hardwares/sensors.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,17 +153,19 @@ def __init__(self, instance_type: str, **data: Any) -> None:

for row in reader:
if row[0] == instance_type:
data["cpu_idle"] = float(row[14].replace(",", "."))
data["cpu_at_10"] = float(row[15].replace(",", "."))
data["cpu_at_50"] = float(row[16].replace(",", "."))
data["cpu_at_100"] = float(row[17].replace(",", "."))
data["memory_idle"] = float(row[18].replace(",", "."))
data["memory_at_10"] = float(row[19].replace(",", "."))
data["memory_at_50"] = float(row[20].replace(",", "."))
data["memory_at_100"] = float(row[21].replace(",", "."))
data["has_gpu"] = float(row[22].replace(",", ".")) > 0
data["delta_full_machine"] = float(
row[26].replace(",", ".")
)
super().__init__(
cpu_idle=float(row[14].replace(",", ".")),
cpu_at_10=float(row[15].replace(",", ".")),
cpu_at_50=float(row[16].replace(",", ".")),
cpu_at_100=float(row[17].replace(",", ".")),
memory_idle=float(row[18].replace(",", ".")),
memory_at_10=float(row[19].replace(",", ".")),
memory_at_50=float(row[20].replace(",", ".")),
memory_at_100=float(row[21].replace(",", ".")),
has_gpu=float(row[22].replace(",", ".")) > 0,
delta_full_machine=float(row[26].replace(",", ".")),
**data,
)
return
Expand Down

0 comments on commit efc0665

Please sign in to comment.