Skip to content

Commit

Permalink
Automation Analytics Export to Ingress (#1816)
Browse files Browse the repository at this point in the history
It collects db, os and cluster data, packs them to 1+ tarballs and sends
to console.redhat.com, if enabled.
Jira AA-1757

No-Issue

Signed-off-by: Martin Slemr <[email protected]>
  • Loading branch information
slemrmartin authored Oct 11, 2023
1 parent d5d0e27 commit 998b2ee
Show file tree
Hide file tree
Showing 20 changed files with 815 additions and 97 deletions.
2 changes: 2 additions & 0 deletions CHANGES/aa-1757.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Added management command `metrics-collection-automation-analytics`.
Renamed command `analytics-export-s3` to `metrics-collection-lighspeed`.
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import logging

from django.core.management.base import BaseCommand
from galaxy_ng.app.metrics_collection.automation_analytics.collector import Collector
from galaxy_ng.app.metrics_collection.automation_analytics import data as automation_analytics_data

logger = logging.getLogger("metrics_collection.export_automation_analytics")


class Command(BaseCommand):
help = ("Django management command to export collections data to "
"ingress -> automation metrics_collection")

def add_arguments(self, parser):
parser.add_argument(
'--dry-run', dest='dry-run', action='store_true',
help='Gather metrics_collection without shipping'
)
parser.add_argument(
'--ship', dest='ship', action='store_true',
help='Enable to ship metrics to the Red Hat Cloud'
)

def handle(self, *args, **options):
"""Handle command"""

opt_ship = options.get('ship')
opt_dry_run = options.get('dry-run')

if opt_ship and opt_dry_run:
self.logger.error('Both --ship and --dry-run cannot be processed at the same time.')
return

collector = Collector(
collector_module=automation_analytics_data,
collection_type=Collector.MANUAL_COLLECTION if opt_ship else Collector.DRY_RUN,
logger=logger
)

tgzfiles = collector.gather()
if tgzfiles:
for tgz in tgzfiles:
self.stdout.write(tgz)
else:
self.stdout.write("No metrics_collection tarballs collected")
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import logging

from django.core.management.base import BaseCommand
from galaxy_ng.app.management.commands.analytics.collector import Collector
from galaxy_ng.app.management.commands.analytics import galaxy_collector
from galaxy_ng.app.metrics_collection.lightspeed.collector import Collector
from galaxy_ng.app.metrics_collection.lightspeed import data as lightspeed_data
from django.utils.timezone import now, timedelta

logger = logging.getLogger("analytics")
logger = logging.getLogger("metrics_collection.export_lightspeed")


class Command(BaseCommand):
Expand All @@ -15,14 +15,14 @@ def handle(self, *args, **options):
"""Handle command"""

collector = Collector(
collector_module=galaxy_collector,
collection_type="manual",
collector_module=lightspeed_data,
collection_type=Collector.MANUAL_COLLECTION,
logger=logger,
)

collector.gather(since=now() - timedelta(days=8), until=now() - timedelta(days=1))

print("Completed ")
self.stdout.write("Gather Analytics => S3(Lightspeed): Completed ")


if __name__ == "__main__":
Expand Down
Empty file.
69 changes: 69 additions & 0 deletions galaxy_ng/app/metrics_collection/automation_analytics/collector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from django.conf import settings

from galaxy_ng.app.metrics_collection.collector import Collector as BaseCollector
from galaxy_ng.app.metrics_collection.automation_analytics.package import Package


class Collector(BaseCollector):
@staticmethod
def _package_class():
return Package

def is_enabled(self):
if not settings.GALAXY_METRICS_COLLECTION_AUTOMATION_ANALYTICS_ENABLED:
self.logger.log(self.log_level,
"Metrics Collection for Ansible Automation Platform not enabled.")
return False
return super().is_enabled()

def _is_shipping_configured(self):
auth_valid = bool(settings.GALAXY_METRICS_COLLECTION_C_RH_C_UPLOAD_URL)

# There are two possible types of authentication
# 1) RH account - user/password
# 2) X-RH-Identity header (inside cloud or testing)
if auth_valid:
auth_valid = settings.GALAXY_METRICS_COLLECTION_AUTOMATION_ANALYTICS_AUTH_TYPE in [
Package.SHIPPING_AUTH_USERPASS,
Package.SHIPPING_AUTH_IDENTITY]
if auth_valid:
if settings.GALAXY_METRICS_COLLECTION_AUTOMATION_ANALYTICS_AUTH_TYPE == \
Package.SHIPPING_AUTH_USERPASS:
auth_valid = bool(settings.GALAXY_METRICS_COLLECTION_REDHAT_USERNAME) and \
bool(settings.GALAXY_METRICS_COLLECTION_REDHAT_PASSWORD)

if settings.GALAXY_METRICS_COLLECTION_AUTOMATION_ANALYTICS_AUTH_TYPE == \
Package.SHIPPING_AUTH_IDENTITY:
auth_valid = bool(settings.GALAXY_METRICS_COLLECTION_ORG_ID)
if not auth_valid:
self.logger.log(self.log_level, "No metrics collection, configuration is invalid. "
"Use --dry-run to gather locally without sending.")
return auth_valid

def _last_gathering(self):
# TODO: Waiting for persistent DB storage in Hub
# https://issues.redhat.com/browse/AAH-2009
# return settings.AUTOMATION_ANALYTICS_LAST_GATHER
return None

def _load_last_gathered_entries(self):
# TODO: Waiting for persistent DB storage in Hub
# https://issues.redhat.com/browse/AAH-2009
# from awx.conf.models import Setting
#
# last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
# last_gathered_entries = \
# json.loads((last_entries.value if last_entries is not None else '') or '{}',
# object_hook=datetime_hook)
last_gathered_entries = {}
return last_gathered_entries

def _save_last_gathered_entries(self, last_gathered_entries):
# TODO: Waiting for persistent DB storage in Hub
# https://issues.redhat.com/browse/AAH-2009
pass

def _save_last_gather(self):
# TODO: Waiting for persistent DB storage in Hub
# https://issues.redhat.com/browse/AAH-2009
pass
143 changes: 143 additions & 0 deletions galaxy_ng/app/metrics_collection/automation_analytics/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import os
from django.db import connection
from insights_analytics_collector import CsvFileSplitter, register
import galaxy_ng.app.metrics_collection.common_data as data


@register("config", "1.0", description="General platform configuration.", config=True)
def config(since, **kwargs):
return data.config()


@register("instance_info", "1.0", description="Node information")
def instance_info(since, **kwargs):
return data.instance_info()


@register("collections", "1.0", format="csv", description="Data on ansible_collection")
def collections(since, full_path, until, **kwargs):
query = data.collections_query()

return export_to_csv(full_path, "collections", query)


@register(
"collection_versions",
"1.0",
format="csv",
description="Data on ansible_collectionversion",
)
def collection_versions(since, full_path, until, **kwargs):
query = data.collection_versions_query()

return export_to_csv(full_path, "collection_versions", query)


@register(
"collection_version_tags",
"1.0",
format="csv",
description="Full sync: Data on ansible_collectionversion_tags"
)
def collection_version_tags(since, full_path, **kwargs):
query = data.collection_version_tags_query()
return export_to_csv(full_path, "collection_version_tags", query)


@register(
"collection_tags",
"1.0",
format="csv",
description="Data on ansible_tag"
)
def collection_tags(since, full_path, **kwargs):
query = data.collection_tags_query()
return export_to_csv(full_path, "collection_tags", query)


@register(
"collection_version_signatures",
"1.0",
format="csv",
description="Data on ansible_collectionversionsignature",
)
def collection_version_signatures(since, full_path, **kwargs):
query = data.collection_version_signatures_query()

return export_to_csv(full_path, "collection_version_signatures", query)


@register(
"signing_services",
"1.0",
format="csv",
description="Data on core_signingservice"
)
def signing_services(since, full_path, **kwargs):
query = data.signing_services_query()
return export_to_csv(full_path, "signing_services", query)


# @register(
# "collection_imports",
# "1.0",
# format="csv",
# description="Data on ansible_collectionimport",
# )
# def collection_imports(since, full_path, until, **kwargs):
# # currently no rows in the table, so no objects to base a query off
# source_query = """COPY (
# SELECT * FROM ansible_collectionimport
# ) TO STDOUT WITH CSV HEADER
# """
# return _simple_csv(full_path, "ansible_collectionimport", source_query)
#

@register(
"collection_download_logs",
"1.0",
format="csv",
description="Data from ansible_downloadlog"
)
def collection_download_logs(since, full_path, until, **kwargs):
query = data.collection_downloads_query()
return export_to_csv(full_path, "collection_download_logs", query)


@register(
"collection_download_counts",
"1.0",
format="csv",
description="Data from ansible_collectiondownloadcount"
)
def collection_download_counts(since, full_path, until, **kwargs):
query = data.collection_download_counts_query()
return export_to_csv(full_path, "collection_download_counts", query)


def _get_csv_splitter(file_path, max_data_size=209715200):
return CsvFileSplitter(filespec=file_path, max_file_size=max_data_size)


def export_to_csv(full_path, file_name, query):
copy_query = f"""COPY (
{query}
) TO STDOUT WITH CSV HEADER
"""
return _simple_csv(full_path, file_name, copy_query, max_data_size=209715200)


def _simple_csv(full_path, file_name, query, max_data_size=209715200):
file_path = _get_file_path(full_path, file_name)
tfile = _get_csv_splitter(file_path, max_data_size)

with connection.cursor() as cursor:
with cursor.copy(query) as copy:
while data := copy.read():
tfile.write(str(data, 'utf8'))

return tfile.file_list()


def _get_file_path(path, table):
return os.path.join(path, table + ".csv")
57 changes: 57 additions & 0 deletions galaxy_ng/app/metrics_collection/automation_analytics/package.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import base64
import json
from django.conf import settings

from insights_analytics_collector import Package as InsightsAnalyticsPackage


class Package(InsightsAnalyticsPackage):
CERT_PATH = "/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem"
PAYLOAD_CONTENT_TYPE = "application/vnd.redhat.automation-hub.hub_payload+tgz"

def _tarname_base(self):
timestamp = self.collector.gather_until
return f'galaxy-hub-analytics-{timestamp.strftime("%Y-%m-%d-%H%M")}'

def get_ingress_url(self):
return settings.GALAXY_METRICS_COLLECTION_C_RH_C_UPLOAD_URL

def _get_rh_user(self):
return settings.GALAXY_METRICS_COLLECTION_REDHAT_USERNAME

def _get_rh_password(self):
return settings.GALAXY_METRICS_COLLECTION_REDHAT_PASSWORD

def _get_x_rh_identity(self):
"""Auth: x-rh-identity header for HTTP POST request to cloud
Optional, if shipping_auth_mode() redefined to SHIPPING_AUTH_IDENTITY
"""
tenant_id = f"{int(settings.GALAXY_METRICS_COLLECTION_ORG_ID):07d}"
identity = {
"identity": {
"type": "User",
"account_number": tenant_id,
"user": {"is_org_admin": True},
"internal": {"org_id": tenant_id}
}
}
identity = base64.b64encode(json.dumps(identity).encode("utf8"))
return identity

def hub_version(self):
try:
config_data = self.collector.collections.get("config", {}).data or {}
parsed = json.loads(config_data)
return parsed.get('hub_version', '0.0')
except json.decoder.JSONDecodeError:
return "unknown version"

def _get_http_request_headers(self):
headers = {
'Content-Type': 'application/json',
'User-Agent': f'GalaxyNG | Red Hat Ansible Automation Platform ({self.hub_version()})'
}
return headers

def shipping_auth_mode(self):
return settings.GALAXY_METRICS_COLLECTION_AUTOMATION_ANALYTICS_AUTH_TYPE
11 changes: 11 additions & 0 deletions galaxy_ng/app/metrics_collection/collector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from django.db import connection
from insights_analytics_collector import Collector as BaseCollector


class Collector(BaseCollector):
def _is_valid_license(self):
return True

@staticmethod
def db_connection():
return connection
Loading

0 comments on commit 998b2ee

Please sign in to comment.