diff --git a/pyprof/nvtx/config.py b/pyprof/nvtx/config.py deleted file mode 100644 index edd9c40..0000000 --- a/pyprof/nvtx/config.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -class Config(object): - _instance = None - - # Overloading the __new__ method enables singleton behavior - def __new__(cls, *args, **kwargs): - if cls._instance is None: - cls._instance = super(Config, cls).__new__(cls) - cls.func_stack_enabled = kwargs.get("enable_function_stack", - False) or kwargs.get("capture_input_ops", False) - cls.capture_input_ops = kwargs.get("capture_input_ops", False) - cls.delay_graph_capture = kwargs.get("delay_graph_capture", False) - cls.debug_graph = kwargs.get("debug_graph", False) - return cls._instance diff --git a/pyprof/nvtx/dlprof.py b/pyprof/nvtx/dlprof.py deleted file mode 100644 index 51bee05..0000000 --- a/pyprof/nvtx/dlprof.py +++ /dev/null @@ -1,208 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import inspect as ins -from .config import Config - -def dprint(*args): - """ - dprint() - Printf debugging messages controlled by the debug_flag. - Disabled by default. - Call debug(True) from instrumentation code to enable - debug messages generated by dprint. - """ - config = Config() - if config.debug_graph: - fn = ins.currentframe().f_back.f_code.co_name - depth = len(ins.stack())-2 - print(" " * depth, f"[{fn}] {args}") - return - -class DLProf(object): - _instance = None - - # Overloading the __new__ method enables singleton behavior - def __new__(cls, *args, **kwargs): - if cls._instance is None: - cls._instance = super(DLProf, cls).__new__(cls) - cls.call_id = 0 # input op tracking identifier - cls.op_to_out_tensor_map = {} # map from tensor ptr to to call_id - cls.call_id_to_op_map = {} # map from call_id to op name - cls.patch_list = [] # used to track nested callids - # Nested dicts of this run's frame names to help uniquify them - # func_map[(partial_func_stack,frame_name)][filename+lineno] = frame_name_to_use - # - cls.func_map = {} - return cls._instance - - # Return True if the name in the hierarchy should be skipped - @classmethod - def should_skip_frame_name(cls, name, prev_name): - # wrapper_func and always_benchmark_warpper: - # Are functions in this file. If there are nested monkeypatched functions - # we don't want it to show up - # name==prev_name: - # Remove back-to-back duplicates of the same function name. - # This is common during recursion - # - for prefix in ["wrapper_func", "always_benchmark_wrapper"]: - if name.startswith(prefix): - return True - if name == prev_name: - return True - return False - - # Given a function stack, clean it up to remove unwanted fields as - # well as removing any back-to-back duplicates - @classmethod - def cleanup_func_stack(cls, func_stack, op_name): - - ret = "" - prev_fn_name = "" - suffix = "" - - x = func_stack.split("/") - for fn_name in x: - - # This is used to detect when the same torch op was called - # multiple times from the same parent function. Capture the - # count as a 'suffix' and put it on the end of the op name - # - # For example, if we end up with these: - # a/b/c/wrapper_func - # a/b/c/wrapper_func(2) - # Both would end up as a/b/c after the wrapper function is ignored - # However, we want to keep the information that the resulting torch op - # called by wrapper_func was called 2 different times from the same function 'c' - # - # This code changes "wrapper_func(2)" to "(2)" so that it doesn't get filtered - # out by should_skip_frame_name() - # - if fn_name.startswith("wrapper_func("): - suffix = fn_name.replace("wrapper_func", "") - if fn_name.startswith("always_benchmark_wrapper("): - suffix = fn_name.replace("always_benchmark_wrapper", "") - - if not DLProf.should_skip_frame_name(fn_name, prev_fn_name): - ret += "/" + fn_name - prev_fn_name = fn_name - ret += "/" + op_name + suffix - return ret - - @classmethod - def build_function_stack(cls, index, func_stack, frame_name, prev_fn, op_name, stack, ins_frame): - - # Build funcStack - fn_name = frame_name - # Capture class name - # - # Iterate through the stack frames (like a linked list) until we get - # to the detailed frame we want. This is much faster and less - # expensive than extracting the entire frame stack every time - # - # ins stack is backwards from traceback, so depth is inverse - # of current traceback depth - # - depth = len(stack) - index - for _ in range(1, depth): - ins_frame = ins_frame.f_back - - # Grab the class name if it exists - # - if 'self' in ins_frame.f_locals: - fn_name = ins_frame.f_locals['self'].__class__.__name__ + "::" + fn_name - key = (func_stack, frame_name, "") - if (fn_name in ["wrapper_func", "always_benchmark_wrapper"]): - key = (func_stack, frame_name, op_name) - - if key not in cls.func_map.keys(): - cls.func_map[key] = {} - - # If we have been to this stack depth with all the same - # information, use the stored name - # - if prev_fn in cls.func_map[key].keys(): - fn_name = cls.func_map[key][prev_fn] - else: - # If we have been do this stack depth and have called - # this function at least once but didn't hit in the dict - # above, then this is a repeat call. Postpend a count - # to the fn_name to uniquify it - # - if len(cls.func_map[key]) > 0: - fn_name = fn_name + "(" + str(1 + len(cls.func_map[key])) + ")" - - # Store this new unique stack information with the - # determined fn_name - # - cls.func_map[key][prev_fn] = fn_name - - return fn_name - - @classmethod - def capture_inputs(cls, call_id, input_callid_list, *args): - input_tensors = [] - for arg in args: - if isinstance(arg, torch.Tensor): - input_tensors.append({ - 'ptr': arg.data_ptr(), - }) - elif isinstance(arg, list) or isinstance(arg, tuple): - for item in arg: - if isinstance(item, torch.Tensor): - input_tensors.append({ - 'ptr': item.data_ptr(), - }) - if isinstance(item, list) or isinstance(item, tuple): - for item2 in item: - if isinstance(item2, torch.Tensor): - input_tensors.append({ - 'ptr': item2.data_ptr(), - }) - for input_id, _ in enumerate(input_tensors): - input_ptr = input_tensors[input_id]['ptr'] - if input_ptr in cls.op_to_out_tensor_map: - input_callid_info = cls.op_to_out_tensor_map[input_ptr] - if input_callid_info not in input_callid_list: - input_callid_list.append(input_callid_info) - dprint(f"Callid {call_id} Input tensor ptr {input_ptr} fetching saved call_id"\ - f" {input_callid_info} port 0") - else: - ## otherwise, push '-1'. this allows the input param shapes to align with the - ## input_callids when building the graph - input_callid_list.append(-1) - - @classmethod - def capture_outputs(cls, call_id, result): - output_tensors = [] - if isinstance(result, torch.Tensor): - output_tensors.append({ - 'ptr': result.data_ptr(), - }) - elif isinstance(result, list) or isinstance(result, tuple): - for item in result: - if isinstance(item, torch.Tensor): - output_tensors.append({ - 'ptr': item.data_ptr(), - }) - for out_port, _ in enumerate(output_tensors): - output_ptr = output_tensors[out_port]['ptr'] - cls.op_to_out_tensor_map[output_ptr] = f"{call_id}" - - dprint(f"call_id {call_id} output tensors {output_tensors}") diff --git a/pyprof/nvtx/nvmarker.py b/pyprof/nvtx/nvmarker.py index 33ddb97..ae6f3fe 100644 --- a/pyprof/nvtx/nvmarker.py +++ b/pyprof/nvtx/nvmarker.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -41,40 +41,6 @@ import math import json import importlib -from .config import Config -from .dlprof import DLProf -from .dlprof import dprint - -# Singleton object tracking dlprof specific information -dlprof = DLProf() -# flag to control wrapping ops in nvtx markers -wrappers_enabled = True - - -def start_graph(): - """ - start_graph() - This function is exported in __init__.py so the instrumentation code - can control which iteration to capture the network graph. - Use this in conjunction with config option --delay_graph_capture - """ - global wrappers_enabled - wrappers_enabled = True - dprint(f"Starting graph tracker wrappers enabled {wrappers_enabled}") - return - - -def stop_graph(): - """ - stop_graph() - This function is exported in __init__.py so the instrumentation code can - stop the graph capture at the end of a specific iteration. - Use this in conjunction with config option --delay_graph_capture - """ - global wrappers_enabled - wrappers_enabled = False - dprint(f"Stopping graph tracker wrappers enabled {wrappers_enabled}") - return def isfunc(mod, f): @@ -116,63 +82,14 @@ def isfunc(mod, f): # Returns a dict string with a tracemarker and function stack in it # -def traceMarker(op_name): - - config = Config() - - # Return a trace marker string and func_stack string - # - def get_trace_info(op_name): - cadena = [] - stack = traceback.extract_stack() - func_stack = "" - - # Previous frame name and line. This is the file and line - # that CALLED the frame we are in - # - prev_fn = "" - - # Starting at index of 2 to ignore this function and its parent (traceMarker). - # Intentionally leaving in wrapper_func and other functions in this file as they - # may be needed to uniquify the node name - # - for idx in range(len(stack) - 2): - frame = stack[idx] - - # Build traceMarker - # - - # Don't include any functions from this file (nvmarker.py) - # Also skip repeated back to back cases of the same file/line (recursive calls) - # - fnl = "{}:{}".format(frame.filename, frame.lineno) - if (not frame.filename.endswith("nvmarker.py") and fnl != prev_fn): - cadena.append(fnl) - - # Early exit if we aren't doing any funcStack code - # - if not config.func_stack_enabled: - continue - else: - ins_frame = ins.currentframe() - fn_name = dlprof.build_function_stack(idx, func_stack, frame.name, prev_fn, op_name, stack, ins_frame) - del ins_frame - prev_fn = fnl - - # Append this frame's info into the function stack - # - func_stack = func_stack + "/" + fn_name - - if config.func_stack_enabled: - func_stack = dlprof.cleanup_func_stack(func_stack, op_name) - - return cadena, func_stack - +def traceMarker(stack): d = {} - tm, fs = get_trace_info(op_name) - d['traceMarker'] = tm - if config.func_stack_enabled: - d['funcStack'] = fs + cadena = [] + for i in range(len(stack) - 1): + fi = stack[i] + t = f"{fi.filename}:{fi.lineno}" + cadena.append(t) + d['traceMarker'] = cadena return str(d) @@ -189,8 +106,6 @@ def modMarker(mod, fn_name, args): def add_wrapper(mod, fn_name): - config = Config() - # Get a pointer to the original function func = getattr(mod, fn_name) @@ -203,73 +118,34 @@ def add_wrapper(mod, fn_name): def wrapper_func(*args, **kwargs): - global wrappers_enabled - traceMarker_str = "" - input_callid_list = [] - - if wrappers_enabled: - - if config.capture_input_ops: - ## Stack for callids to work with nested monkey patch function calls - dlprof.patch_list.append(dlprof.call_id) - dlprof.capture_inputs(dlprof.call_id, input_callid_list, *args) + # Extract the stacktrace + stack = traceback.extract_stack() - # Push trace marker - traceMarker_str = traceMarker(fn_name) - nvtx.range_push(traceMarker_str) + # Push trace marker + nvtx.range_push(traceMarker(stack)) - # Push module marker - if s: - m = modMarker(mod, fn_name, args) - nvtx.range_push(m) + # Push module marker + if s: + m = modMarker(mod, fn_name, args) + nvtx.range_push(m) - # Create and push argument marker - # - # Disable wrappers while getting the argMarker in case it - # ends up executing another wrapped function - wrappers_enabled = False - if config.capture_input_ops: - saved_call_id = dlprof.call_id - # Keeps call_id correct when there are nested - # monkey patch functions - if dlprof.call_id != dlprof.patch_list[0]: - saved_call_id = dlprof.patch_list[0] - cadena = argMarker(mod, fn_name, args, kwargs, saved_call_id, input_callid_list) - else: - cadena = argMarker(mod, fn_name, args, kwargs) - nvtx.range_push(cadena) - wrappers_enabled = True + # Create and push argument marker + cadena = argMarker(mod, fn_name, args, kwargs) + nvtx.range_push(cadena) # Call the original function result = func(*args, **kwargs) - if wrappers_enabled: - # Pop argumet marker - nvtx.range_pop() - - # Pop module marker - if s: - nvtx.range_pop() + # Pop argumet marker + nvtx.range_pop() - # Pop trace marker + # Pop module marker + if s: nvtx.range_pop() - if config.capture_input_ops: - # Keeps call_id correct when there are nested - # monkey patch functions - saved_call_id = dlprof.call_id - if dlprof.call_id != dlprof.patch_list[0]: - saved_call_id = dlprof.patch_list[0] - dlprof.capture_outputs(saved_call_id, result) - # Store the callid -> op_name mapping - if traceMarker_str != "": - traceMarker_str = traceMarker_str.replace("\'", "\"") - traceMarker_dict = json.loads(traceMarker_str) - dlprof.call_id_to_op_map[saved_call_id] = traceMarker_dict['funcStack'] - - starting_call_id = dlprof.patch_list[0] - last_call_id = dlprof.patch_list.pop() - dlprof.call_id = dlprof.call_id + 1 + # Pop trace marker + nvtx.range_pop() + return result setattr(mod, fn_name, wrapper_func) @@ -277,12 +153,8 @@ def wrapper_func(*args, **kwargs): def argMarker(mod, op, args, kwargs, idx=-1, inputid_list=[]): #For this function args is a tuple and kwargs is a dict - config = Config() def tensor(arg, name=""): - if config.capture_input_ops: - cid = dlprof.op_to_out_tensor_map.get(arg.data_ptr(), -1) - name = dlprof.call_id_to_op_map.get(int(cid), "") a = {} a['name'] = name a['type'] = "tensor" @@ -362,9 +234,6 @@ def foo(args, name): cadena = {} cadena['mod'] = mod.__name__ cadena['op'] = op - if config.capture_input_ops: - cadena['callid'] = idx - cadena['input_callids'] = inputid_list cadena['args'] = [] foo(args, "") @@ -506,7 +375,7 @@ def patch_apex_module(modstr): if importlib.util.find_spec(modstr) is not None: mod = importlib.import_module(modstr) - for n, v in ins.getmembers(mod): + for _, v in ins.getmembers(mod): # This makes sure we don't patch random other modules that are imported by the target module # if is_same_module_or_submodule(mod, ins.getmodule(v)): @@ -524,167 +393,10 @@ def patch_apex_class(cls): add_wrapper(cls, f) -def push_nvtx_model_config(config): - """ - Helper function to dump the passed in dict config as an nvtx - marker with "model_config" key - """ - nvtx_msg = json.dumps({"model_config": config}) - nvtx.range_push(nvtx_msg) - - -def patch_dataloader_init(): - """ - Capture dataloader config (num_workers and pin_memory) and - emit a model_config nvtx range with the information - """ - mod = torch.utils.data.dataloader - old_init = mod.DataLoader.__init__ - - def new_init(self, *args, **kwargs): - - num_workers = kwargs.get("num_workers", 0) - pin_memory = kwargs.get("pin_memory", False) - - push_nvtx_model_config({"num_workers": num_workers, "pin_memory": pin_memory}) - old_init(self, *args, **kwargs) - nvtx.range_pop() - - mod.DataLoader.__init__ = new_init - - -# Flag to indicate that cudnn_benchmark_disabled has already been reported -# -cudnn_benchmark_disabled_reported = False - - -def patch_with_always_benchmark(mod, fn_name): - """ - Patch the given mod/function so that if it is ever executed and - torch.backends.cudnn.benchmark is not true, it will emit an nvtx - range to report that fact - """ - assert isfunc(mod, fn_name) - old_fn = getattr(mod, fn_name) - - def always_benchmark_wrapper(*args, **kwargs): - global cudnn_benchmark_disabled_reported - - add_nvtx = not torch.backends.cudnn.benchmark and not cudnn_benchmark_disabled_reported - if add_nvtx: - cudnn_benchmark_disabled_reported = True - push_nvtx_model_config({"cudnn_benchmark_disabled": True}) - - result = old_fn(*args, **kwargs) - - if add_nvtx: - nvtx.range_pop() - - return result - - setattr(mod, fn_name, always_benchmark_wrapper) - - -def patch_never_call(mod, fn_name, key): - """ - Patch the given mod/function. If the function is executed, emit - an nvtx_range with data indicating that 'key' was true - """ - old_fn = getattr(mod, fn_name) - - def wrapper_func(*args, **kwargs): - push_nvtx_model_config({key: True}) - result = old_fn(*args, **kwargs) - nvtx.range_pop() - return result - - setattr(mod, fn_name, wrapper_func) - - -def patch_never_call_with_args(mod, fn_name, key, bad_args): - """ - Patch the given mod/function. If the function is executed - and any of the bad args have any of the listed bad values, - emit an nvtx_range with data indicating that 'key' was true - """ - old_fn = getattr(mod, fn_name) - - def wrapper_func(*args, **kwargs): - - signature = ins.signature(old_fn) - bound = signature.bind(*args, **kwargs) - bound.apply_defaults() - - problem = False - for k, v in bound.arguments.items(): - if k in bad_args: - if v in bad_args[k]: - problem = True - - if problem: - push_nvtx_model_config({key: True}) - - result = old_fn(*args, **kwargs) - - if problem: - nvtx.range_pop() - - return result - - setattr(mod, fn_name, wrapper_func) - - -def patch_model_configs(): - """ - Patch functions that help gather high-level configuration options for the model. - All resulting nvtx ranges will have 'model_config' as the primary key - """ - - patch_dataloader_init() - - patch_with_always_benchmark(torch.nn.functional, "conv1d") - patch_with_always_benchmark(torch.nn.functional, "conv2d") - patch_with_always_benchmark(torch.nn.functional, "conv3d") - patch_with_always_benchmark(torch.nn.functional, "conv_transpose1d") - patch_with_always_benchmark(torch.nn.functional, "conv_transpose2d") - patch_with_always_benchmark(torch.nn.functional, "conv_transpose3d") - - patch_never_call(torch.autograd.detect_anomaly, "__init__", "detect_anomaly") - patch_never_call(torch.autograd, "gradcheck", "gradcheck") - patch_never_call(torch.autograd, "gradgradcheck", "gradgradcheck") - patch_never_call(torch.autograd.profiler.record_function, "__init__", "record_function") - - # Patch both AMP libraries - # - import importlib - if importlib.util.find_spec("apex") is not None and importlib.util.find_spec("apex.amp") is not None: - import apex.amp - patch_never_call_with_args(apex.amp, "initialize", "amp_enabled", {"enabled": {True}}) - patch_never_call_with_args(torch.cuda.amp, "autocast", "amp_enabled", {"enabled": {True}}) - - patch_never_call_with_args(torch.autograd.profiler.profile, "__init__", "profile", {"enabled": {True}}) - patch_never_call_with_args(torch.autograd.set_detect_anomaly, "__init__", "detect_anomaly", {"mode": {True}}) - patch_never_call_with_args(torch.autograd.profiler.emit_nvtx, "__init__", "emit_nvtx", {"enabled": {True}}) - - def init(*args, **kwargs): """ Initialize pyprof and monkey-patch Torch functions - - Kwargs: - enable_function_stack (bool): When true, function stack information - will be added to NVTX markers - capture_input_ops (bool): When true, input tensor names will be added - to NVTX markers and enable_function_stack is set to True. """ - global wrappers_enabled - - config = Config(*args, **kwargs) - - if config.delay_graph_capture: - ## Disable wrappers_enabled at init when user wants to control - ## which iteration to begin graph capture - wrappers_enabled = False print("Initializing NVTX monkey patches") @@ -692,6 +404,5 @@ def init(*args, **kwargs): patch_dataloader() patch_torch_classes() patch_torch_nn_forward_functions() - patch_model_configs() print("Done with NVTX monkey patching") diff --git a/qa/L0_function_stack/__init__.py b/qa/L0_function_stack/__init__.py deleted file mode 100644 index 3a8a2d7..0000000 --- a/qa/L0_function_stack/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import test_pyprof_function_stack.TestPyProfFuncStack as TestPyProfFuncStack diff --git a/qa/L0_function_stack/test.sh b/qa/L0_function_stack/test.sh deleted file mode 100644 index b659a64..0000000 --- a/qa/L0_function_stack/test.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. - # - # Licensed under the Apache License, Version 2.0 (the "License"); - # you may not use this file except in compliance with the License. - # You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -TEST_LOG="./func_stack.log" - - -apt-get update && \ - apt-get install -y --no-install-recommends python - -rm -f $TEST_LOG -RET=0 - -./test_pyprof_func_stack.py > $TEST_LOG 2>&1 -if [ $? -ne 0 ]; then - RET=1 -fi - -set -e - -if [ $RET -eq 0 ]; then - echo -e "\n***\n*** Test Passed\n***" -else - cat $TEST_LOG - echo -e "\n***\n*** Test FAILED\n***" -fi - -exit $RET \ No newline at end of file diff --git a/qa/L0_function_stack/test_pyprof_func_stack.py b/qa/L0_function_stack/test_pyprof_func_stack.py deleted file mode 100755 index c1f9c09..0000000 --- a/qa/L0_function_stack/test_pyprof_func_stack.py +++ /dev/null @@ -1,218 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -''' -This test exercises the tracemarker get_func_stack() functionality -''' -import inspect -import unittest - -import pyprof -from pyprof.nvtx.config import Config -from pyprof.nvtx.dlprof import DLProf - -config = Config(enable_function_stack=True) -dlprof = DLProf() - - -class TestPyProfFuncStack(unittest.TestCase): - - def __init__(self, testName): - super().__init__(testName) - - def setUp(self): - pass - - def tearDown(self): - pass - - def compare_funcstack(self, actual_tracemarker, expected_str): - # Given a funcstack string, remove TestPyProfFuncStack::__callTestMethod and everything above it - # - def remove_test_class_hierarchy(x): - separator = "/" - fn_split = x.split(separator) - split = 0 - # Find the LAST instance of run in the split - # - for i, n in enumerate(fn_split): - if (n == "TestPyProfFuncStack::_callTestMethod"): - split = i + 1 - - fn_split = fn_split[split:] - joined = separator.join(fn_split) - return joined - - tracemarker_dict = eval(actual_tracemarker) - actual_func_stack = remove_test_class_hierarchy(tracemarker_dict["funcStack"]) - self.assertEqual(expected_str, actual_func_stack, f"Expected: {expected_str}\nActual: {actual_func_stack}") - - # Basic function hierarchy test - # Function stack is func1->func2->func3->verify - # Local function 'verify' gets recognized as a member of TestPyProfFuncStack because it uses 'self' - # - def test_basic(self): - - def verify(): - tracemarker = pyprof.nvtx.nvmarker.traceMarker("opname") - self.compare_funcstack( - tracemarker, "TestPyProfFuncStack::test_basic/func1/func2/func3/TestPyProfFuncStack::verify/opname" - ) - - def func3(): - verify() - - def func2(): - func3() - - def func1(): - func2() - - func1() - - # Test that 'always_benchmark_wrapper' is ignored in hierarchy - # Test that 'wrapper_func' is ignored in hierarchy - # Function stack is func1->func2->always_benchmark_wrapper->func3->wrapper_func->verify - # Local function 'verify' gets recognized as a member of TestPyProfFuncStack because it uses 'self' - # - def test_ignore_wrapper_func(self): - - def verify(): - tracemarker = pyprof.nvtx.nvmarker.traceMarker("opname") - self.compare_funcstack( - tracemarker, - "TestPyProfFuncStack::test_ignore_wrapper_func/func1/func2/func3/TestPyProfFuncStack::verify/opname" - ) - - def wrapper_func(): - verify() - - def func3(): - wrapper_func() - - def always_benchmark_wrapper(): - func3() - - def func2(): - always_benchmark_wrapper() - - def func1(): - func2() - - func1() - - # Test that lambdas are NOT ignored in hierarchy - # Function stack is func1->func2->lambda->func3->verify - # Local function 'verify' gets recognized as a member of TestPyProfFuncStack because it uses 'self' - # - def test_ignore_lambda(self): - - def verify(): - tracemarker = pyprof.nvtx.nvmarker.traceMarker("opname") - self.compare_funcstack( - tracemarker, - "TestPyProfFuncStack::test_ignore_lambda/func1/func2//func3/TestPyProfFuncStack::verify/opname" - ) - - def func3(): - verify() - - def func2(): - x = lambda: func3() - x() - - def func1(): - func2() - - func1() - - # Test that duplicates are ignored in hierarchy - # - # Function stack is func1->func1->func1->func1->func2->verify - # Local function 'verify' gets recognized as a member of TestPyProfFuncStack because it uses 'self' - # - def test_ignore_duplicates(self): - - def verify(): - tracemarker = pyprof.nvtx.nvmarker.traceMarker("opname") - self.compare_funcstack( - tracemarker, - "TestPyProfFuncStack::test_ignore_duplicates/func1/func2/TestPyProfFuncStack::verify/opname" - ) - - def func2(): - verify() - - def func1(count): - if (count > 0): - func1(count - 1) - else: - func2() - - func1(3) - - # Function stack is func1->func2->wrapper_func. It is called 4 times. - # - # Only the 4th time is any checking done - # - # On that 4th call, it will be the 2nd time executing func2, from func1, and - # it will be the 2nd time executing wrapper_func from that 2nd call of func2. - # - # Even though wrapper_func is omitted from the func stack, its call count should - # be passed on to the opname. - # - def test_uniquified_nodes(self): - - def verify(check): - tracemarker = pyprof.nvtx.nvmarker.traceMarker("opname") - if (check): - self.compare_funcstack( - tracemarker, - "TestPyProfFuncStack::test_uniquified_nodes/func1/func2(2)/TestPyProfFuncStack::verify/opname(2)" - ) - - def wrapper_func(check): - verify(check) - - def func2(check): - wrapper_func(False) - wrapper_func(check) - - def func1(): - func2(False) - func2(True) - - func1() - - -def run_tests(test_name): - dummy = TestPyProfFuncStack(test_name) - test_cases = list( - filter(lambda x: 'test_' in x, map(lambda x: x[0], inspect.getmembers(dummy, predicate=inspect.ismethod))) - ) - print(f'Running tests for {test_name}') - suite = unittest.TestSuite() - for test_case in test_cases: - suite.addTest(TestPyProfFuncStack(test_case)) - result = unittest.TextTestRunner(verbosity=2).run(suite) - if result.wasSuccessful(): - exit(0) - else: - exit(1) - - -if __name__ == '__main__': - run_tests("test_basic")