Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffreyaven committed Aug 22, 2023
1 parent 02eff06 commit c064d6b
Showing 1 changed file with 44 additions and 104 deletions.
148 changes: 44 additions & 104 deletions notebooks/includes/google-audit-setup.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,15 @@
"outputs": [],
"source": [
"## imports and object instantiation\n",
"import json, time, nest_asyncio, json, itertools, sys, threading\n",
"import json, time, nest_asyncio, json, itertools, sys, threading, psycopg2\n",
"from pystackql import StackQL\n",
"import pandas as pd\n",
"from IPython.display import clear_output, display, Markdown, HTML\n",
"from ipytree import Tree, Node\n",
"from psycopg2.extras import RealDictCursor\n",
"from psycopg2 import ProgrammingError\n",
"\n",
"conn = psycopg2.connect(\"dbname=stackql user=stackql host=localhost port=5444\")\n",
"\n",
"stackql = StackQL()\n",
"nest_asyncio.apply()"
Expand Down Expand Up @@ -106,6 +110,24 @@
" return exploded_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def run_stackql_query(query, debug=False):\n",
" try:\n",
" with conn.cursor(cursor_factory=RealDictCursor) as cur:\n",
" cur.execute(query)\n",
" rows = cur.fetchall()\n",
" return pd.DataFrame(rows)\n",
" except Exception as e:\n",
" if debug:\n",
" print(f\"Error executing query: {str(e)}\")\n",
" return pd.DataFrame()"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -114,48 +136,43 @@
"source": [
"# get all folders and projects function\n",
"def get_resources_recursive(entity_id, get_projects_query_fn, get_folders_query_fn, parent_display_name='organization'):\n",
" resources = []\n",
" resources_df = pd.DataFrame()\n",
"\n",
" # Query for projects\n",
" print_overwrite(f\"Searching {entity_id} for projects...\")\n",
" project_query = get_projects_query_fn(entity_id)\n",
" project_results = json.loads(stackql.execute(project_query))\n",
" projects_df = run_stackql_query(project_query)\n",
" projects_df[\"parentDisplayName\"] = parent_display_name\n",
" projects_df[\"resType\"] = \"project\"\n",
"\n",
" resources_df = pd.concat([resources_df, projects_df], ignore_index=True)\n",
"\n",
" if isinstance(project_results, list):\n",
" print_overwrite(f\"Found {len(project_results)} projects in {entity_id}\")\n",
" for proj in project_results:\n",
" proj[\"parentDisplayName\"] = parent_display_name\n",
" proj[\"resType\"] = \"project\"\n",
" resources.append(proj)\n",
" print_overwrite(f\"Found {len(projects_df)} projects in {entity_id}\")\n",
"\n",
" # Query for folders\n",
" print_overwrite(f\"Searching {entity_id} for folders...\")\n",
" folder_query = get_folders_query_fn(entity_id)\n",
" folder_results = json.loads(stackql.execute(folder_query))\n",
" folders_df = run_stackql_query(folder_query)\n",
" folders_df[\"parentDisplayName\"] = parent_display_name\n",
" folders_df[\"resType\"] = \"folder\"\n",
"\n",
" if isinstance(folder_results, list):\n",
" print_overwrite(f\"Found {len(folder_results)} folders in {entity_id}\")\n",
" for folder in folder_results:\n",
" folder[\"parentDisplayName\"] = parent_display_name\n",
" folder[\"resType\"] = \"folder\"\n",
" resources.append(folder)\n",
" resources_df = pd.concat([resources_df, folders_df], ignore_index=True)\n",
"\n",
" # Fetch resources under this folder\n",
" if 'name' in folder:\n",
" resources.extend(get_resources_recursive(folder['name'], get_projects_query_fn, get_folders_query_fn, folder['displayName']))\n",
" print_overwrite(f\"Found {len(folders_df)} folders in {entity_id}\")\n",
"\n",
" return resources\n",
" for _, folder in folders_df.iterrows():\n",
" # Fetch resources under this folder\n",
" if 'name' in folder:\n",
" child_resources_df = get_resources_recursive(folder['name'], get_projects_query_fn, get_folders_query_fn, folder['displayName'])\n",
" resources_df = pd.concat([resources_df, child_resources_df], ignore_index=True)\n",
"\n",
" return resources_df\n",
"\n",
"def get_all_resources(get_projects_query, get_folders_query):\n",
" start_time = time.time()\n",
" \n",
" # Start with the root organization to get all resources\n",
" all_resources = get_resources_recursive(\"organizations/%s\" % (org_id), get_projects_query, get_folders_query)\n",
" \n",
" # Convert list to dataframe and filter\n",
" resources_df = (pd.DataFrame(all_resources)\n",
" .loc[lambda df: df['error'].isna()]\n",
" .drop('error', axis=1, errors='ignore'))\n",
" resources_df = get_resources_recursive(\"organizations/%s\" % (org_id), get_projects_query, get_folders_query)\n",
" \n",
" # Create root node and build the tree\n",
" root = Node(\"organization\", opened=False, icon='building')\n",
Expand All @@ -174,7 +191,7 @@
" cards_data = [(\"Number of Projects\", num_projects), (\"Number of Folders\", num_folders)]\n",
" display_cards(cards_data)\n",
" \n",
" return resources_df, projects, tree\n"
" return resources_df, projects, tree"
]
},
{
Expand Down Expand Up @@ -206,77 +223,6 @@
" return bindings_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# regions and zones function\n",
"def get_all_regions_and_zones(projects_df, queries):\n",
"\n",
" start_time = time.time()\n",
"\n",
" all_results = []\n",
" for query in queries:\n",
" res = stackql.execute(query)\n",
" try:\n",
" parsed_result = json.loads(res)\n",
" all_results.extend(parsed_result)\n",
" except json.JSONDecodeError:\n",
" print(f\"Failed to parse result from query: {query}\")\n",
" print(f\"Raw result: {res}\")\n",
" \n",
" zones_df = (\n",
" pd.DataFrame(all_results)\n",
" .loc[lambda x: x['error'].isnull()]\n",
" .drop('error', axis=1)\n",
" .drop_duplicates()\n",
" )\n",
" \n",
" regions_df = pd.DataFrame(zones_df['region'].unique(), columns=['region'])\n",
" # For every combination of project and region\n",
" projects_regions_df = projects_df.assign(key=1).merge(regions_df.assign(key=1), on='key').drop('key', axis=1)\n",
" # For every combination of project and zone\n",
" projects_zones_df = projects_df.assign(key=1).merge(zones_df.assign(key=1), on='key').drop(['key', 'region'], axis=1)\n",
" \n",
" # python list variables\n",
" regions = regions_df['region'].tolist()\n",
" zones = zones_df['name'].tolist()\n",
" projects_regions = projects_regions_df.to_dict(orient='records')\n",
" projects_zones = projects_zones_df.to_dict(orient='records')\n",
"\n",
" number_of_rows = zones_df.shape[0]\n",
" number_of_projects = projects_df.shape[0]\n",
" elapsed_time = round(time.time() - start_time)\n",
"\n",
" print(f\"Found {number_of_rows} zones across {number_of_projects} projects in {elapsed_time} seconds\")\n",
"\n",
" return regions_df, zones_df, regions, zones, projects_regions, projects_zones"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# get all instances\n",
"def get_all_instances(queries):\n",
" start_time = time.time()\n",
"\n",
" res = stackql.executeQueriesAsync(queries)\n",
"\n",
" isinstances_df = pd.read_json(json.dumps(res))\n",
"\n",
" number_of_rows = isinstances_df.shape[0]\n",
" elapsed_time = round(time.time() - start_time)\n",
"\n",
" print(f\"Found {number_of_rows} instances in {elapsed_time} seconds\")\n",
"\n",
" return isinstances_df"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -303,12 +249,6 @@
"metadata": {},
"outputs": [],
"source": [
"import psycopg2\n",
"from psycopg2.extras import RealDictCursor\n",
"from psycopg2 import ProgrammingError\n",
"\n",
"conn = psycopg2.connect(\"dbname=stackql user=stackql host=localhost port=5444\")\n",
"\n",
"def run_stackql_queries(queries, debug=False):\n",
" start_time = time.time()\n",
" all_results = []\n",
Expand Down

0 comments on commit c064d6b

Please sign in to comment.