Skip to content

Commit

Permalink
Document, implement, and test remaining indirect host audit fields (#…
Browse files Browse the repository at this point in the history
…15796)

* Document, implement, and test remaining indirect host audit fields

* Fix hashing
  • Loading branch information
AlanCoding authored and Peter Braun committed Feb 24, 2025
1 parent df17ac1 commit af2a986
Show file tree
Hide file tree
Showing 8 changed files with 297 additions and 115 deletions.
25 changes: 0 additions & 25 deletions awx/main/migrations/0201_eventquery.py

This file was deleted.

85 changes: 85 additions & 0 deletions awx/main/migrations/0201_indirect_managed_node_audit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Generated by Django 4.2.16 on 2025-01-29 20:13

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('main', '0200_delete_token_cleanup_job'),
]

operations = [
migrations.AddField(
model_name='job',
name='event_queries_processed',
field=models.BooleanField(default=False, help_text='Events of this job have been queried for indirect host information'),
),
migrations.CreateModel(
name='EventQuery',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('fqcn', models.CharField(help_text='Fully-qualified collection name.', max_length=255)),
('collection_version', models.CharField(help_text='Version of the collection this data applies to.', max_length=32)),
('event_query', models.JSONField(default=dict, help_text='The extensions/audit/event_query.yml file content scraped from the collection.')),
],
options={
'unique_together': {('fqcn', 'collection_version')},
},
),
migrations.CreateModel(
name='IndirectManagedNodeAudit',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created', models.DateTimeField(auto_now_add=True)),
('name', models.CharField(help_text='The Ansible name of the host that this audit record is for.', max_length=255)),
('canonical_facts', models.JSONField(default=dict, help_text='Facts about the host that will be used for managed node deduplication.')),
('facts', models.JSONField(default=dict, help_text='Non canonical facts having additional info about the managed node.')),
('events', models.JSONField(default=list, help_text='List of fully-qualified names of modules that ran against the host in the job.')),
('count', models.PositiveIntegerField(default=0, help_text='Counter of how many times registered modules were invoked on the host.')),
(
'host',
models.ForeignKey(
help_text='The host this audit record is for.',
null=True,
on_delete=django.db.models.deletion.DO_NOTHING,
related_name='host_indirect_host_audits',
to='main.host',
),
),
(
'inventory',
models.ForeignKey(
help_text='The inventory the related job ran against, and which the related host is in.',
null=True,
on_delete=django.db.models.deletion.DO_NOTHING,
related_name='inventory_indirect_host_audits',
to='main.inventory',
),
),
(
'job',
models.ForeignKey(
editable=False,
help_text='Data saved in this record only applies to this specified job.',
on_delete=django.db.models.deletion.DO_NOTHING,
related_name='job_indirect_host_audits',
to='main.job',
),
),
(
'organization',
models.ForeignKey(
help_text='Applicable organization, inferred from the related job.',
on_delete=django.db.models.deletion.DO_NOTHING,
related_name='organization_indirect_host_audits',
to='main.organization',
),
),
],
options={
'unique_together': {('name', 'job')},
},
),
]
47 changes: 0 additions & 47 deletions awx/main/migrations/0202_indirectmanagednodeaudit.py

This file was deleted.

18 changes: 0 additions & 18 deletions awx/main/migrations/0203_job_event_queries_processed.py

This file was deleted.

7 changes: 4 additions & 3 deletions awx/main/models/event_query.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from django.core.exceptions import ValidationError
from django.db import models
from django.utils.translation import gettext_lazy as _

from awx.main.models import BaseModel

Expand All @@ -14,9 +15,9 @@ class Meta:
app_label = 'main'
unique_together = ['fqcn', 'collection_version']

fqcn = models.CharField(max_length=255)
collection_version = models.CharField(max_length=32)
event_query = models.JSONField(default=dict)
fqcn = models.CharField(max_length=255, help_text=_('Fully-qualified collection name.'))
collection_version = models.CharField(max_length=32, help_text=_('Version of the collection this data applies to.'))
event_query = models.JSONField(default=dict, help_text=_('The extensions/audit/event_query.yml file content scraped from the collection.'))

def validate_unique(self, exclude=None):
try:
Expand Down
22 changes: 11 additions & 11 deletions awx/main/models/indirect_managed_node_audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from django.db.models.fields import DateTimeField, CharField, PositiveIntegerField
from django.db.models.fields.json import JSONField
from django.db.models.fields.related import ForeignKey
from django.utils.translation import gettext_lazy as _

from awx.main.models import BaseModel


Expand All @@ -21,34 +23,32 @@ class Meta:
related_name='job_indirect_host_audits',
on_delete=DO_NOTHING,
editable=False,
help_text=_('Data saved in this record only applies to this specified job.'),
)

organization = ForeignKey(
'Organization',
related_name='organization_indirect_host_audits',
on_delete=DO_NOTHING,
help_text=_('Applicable organization, inferred from the related job.'),
)

inventory = ForeignKey(
'Inventory',
related_name='inventory_indirect_host_audits',
null=True,
on_delete=DO_NOTHING,
help_text=_('The inventory the related job ran against, and which the related host is in.'),
)

host = ForeignKey(
'Host',
related_name='host_indirect_host_audits',
null=True,
on_delete=DO_NOTHING,
)
host = ForeignKey('Host', related_name='host_indirect_host_audits', null=True, on_delete=DO_NOTHING, help_text=_('The host this audit record is for.'))

name = CharField(max_length=255)
name = CharField(max_length=255, help_text=_('The Ansible name of the host that this audit record is for.'))

canonical_facts = JSONField(default=dict)
canonical_facts = JSONField(default=dict, help_text=_('Facts about the host that will be used for managed node deduplication.'))

facts = JSONField(default=dict)
facts = JSONField(default=dict, help_text=_('Non canonical facts having additional info about the managed node.'))

events = JSONField(default=list)
events = JSONField(default=list, help_text=_('List of fully-qualified names of modules that ran against the host in the job.'))

count = PositiveIntegerField(default=0)
count = PositiveIntegerField(default=0, help_text=_('Counter of how many times registered modules were invoked on the host.'))
57 changes: 54 additions & 3 deletions awx/main/tasks/host_indirect.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from typing import Tuple, Union

import yaml

Expand All @@ -13,26 +14,74 @@
logger = logging.getLogger(__name__)


class UnhashableFacts(RuntimeError):
pass


def get_hashable_form(input_data: Union[dict, list, int, float, str, bool]) -> Tuple[Union[Tuple, dict, int, float]]:
"Given a dictionary of JSON types, return something that can be hashed and is the same data"
if isinstance(input_data, (int, float, str, bool)):
return input_data # return scalars as-is
if isinstance(input_data, dict):
# Can't hash because we got a dict? Make the dict a tuple of tuples.
# Can't hash the data in the tuple in the tuple? We'll make tuples out of them too.
return tuple(sorted(((get_hashable_form(k), get_hashable_form(v)) for k, v in input_data.items())))
elif isinstance(input_data, (list, tuple)):
# Nested list data might not be hashable, and lists were never hashable in the first place
return tuple(get_hashable_form(item) for item in input_data)
raise UnhashableFacts(f'Cannonical facts contains a {type(input_data)} type which can not be hashed.')


def build_indirect_host_data(job, job_event_queries: dict[str, str]) -> list[IndirectManagedNodeAudit]:
results = []
results = {}
compiled_jq_expressions = {} # Cache for compiled jq expressions
facts_missing_logged = False
unhashable_facts_logged = False
for event in job.job_events.filter(task__in=job_event_queries.keys()).iterator():
if 'res' not in event.event_data:
continue

# Recall from cache, or process the jq expression, and loop over the jq results
jq_str_for_event = job_event_queries[event.task]
if jq_str_for_event not in compiled_jq_expressions:
compiled_jq_expressions[event.task] = jq.compile(jq_str_for_event)
compiled_jq = compiled_jq_expressions[event.task]
for data in compiled_jq.input(event.event_data['res']).all():

# From this jq result (specific to a single Ansible module), get index information about this host record
if not data.get('canonical_facts'):
if not facts_missing_logged:
logger.error(f'jq output missing canonical_facts for module {event.task} on event {event.id} using jq:{jq_str_for_event}')
continue
canonical_facts = data['canonical_facts']
try:
hashable_facts = get_hashable_form(canonical_facts)
except UnhashableFacts:
if not unhashable_facts_logged:
logger.info(f'Could not hash canonical_facts {canonical_facts}, skipping')
unhashable_facts_logged = True
continue

# Obtain the record based on the hashable canonical_facts now determined
facts = data.get('facts')
results.append(IndirectManagedNodeAudit(canonical_facts=canonical_facts, facts=facts, job=job, organization=job.organization))
return results
if hashable_facts in results:
audit_record = results[hashable_facts]
else:
audit_record = IndirectManagedNodeAudit(
canonical_facts=canonical_facts,
facts=facts,
job=job,
organization=job.organization,
name=event.host_name,
)
results[hashable_facts] = audit_record

# Increment rolling count fields
if event.task not in audit_record.events:
audit_record.events.append(event.task)
audit_record.count += 1

return list(results.values())


def fetch_job_event_query(job) -> dict[str, str]:
Expand All @@ -59,3 +108,5 @@ def save_indirect_host_entries(job_id):
job_event_queries = fetch_job_event_query(job)
records = build_indirect_host_data(job, job_event_queries)
IndirectManagedNodeAudit.objects.bulk_create(records)
job.event_queries_processed = True
job.save(update_fields=['event_queries_processed'])
Loading

0 comments on commit af2a986

Please sign in to comment.