Skip to content

Commit

Permalink
ran ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
melodywang060 committed Oct 11, 2024
1 parent 7c07bbe commit 9802cbf
Show file tree
Hide file tree
Showing 32 changed files with 899 additions and 1,269 deletions.
8 changes: 2 additions & 6 deletions extensions/rapids_notebook_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@ def walk_files(app, dir, outdir):
related_notebook_files = {}
for page in dir.glob("*"):
if page.is_dir():
related_notebook_files[page.name] = walk_files(
app, page, outdir / page.name
)
related_notebook_files[page.name] = walk_files(app, page, outdir / page.name)
else:
with contextlib.suppress(OSError):
os.remove(str(outdir / page.name))
Expand Down Expand Up @@ -59,9 +57,7 @@ def find_notebook_related_files(app, pagename, templatename, context, doctree):
path_to_output_parent = output_root / rel_page_parent

# Copy all related files to output and apply templating
related_notebook_files = walk_files(
app, path_to_page_parent, path_to_output_parent
)
related_notebook_files = walk_files(app, path_to_page_parent, path_to_output_parent)

# Make archive of related files
if related_notebook_files and len(related_notebook_files) > 1:
Expand Down
18 changes: 4 additions & 14 deletions extensions/rapids_related_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@ def read_notebook_tags(path: str) -> list[str]:
return []


def generate_notebook_grid_myst(
notebooks: list[str], env: BuildEnvironment
) -> list[str]:
def generate_notebook_grid_myst(notebooks: list[str], env: BuildEnvironment) -> list[str]:
"""Generate sphinx-design grid of notebooks in MyST markdown.
Take a list of notebook documents and render out some MyST markdown displaying those
Expand Down Expand Up @@ -75,11 +73,7 @@ def get_title_for_notebook(path: str) -> str:
if i == len(cell_source) - 1: # no next_token
continue
next_token = cell_source[i + 1]
if (
token.type == "heading_open"
and token.tag == "h1"
and next_token.type == "inline"
):
if token.type == "heading_open" and token.tag == "h1" and next_token.type == "inline":
return next_token.content
raise ValueError("No top-level heading found")

Expand Down Expand Up @@ -146,9 +140,7 @@ def add_notebook_tag_map_to_context(app, pagename, templatename, context, doctre
except KeyError:
tag_tree[root] = [suffix]
context["notebook_tag_tree"] = tag_tree
context["notebook_tags"] = [
tag for tag, pages in app.env.notebook_tag_map.items() if pagename in pages
]
context["notebook_tags"] = [tag for tag, pages in app.env.notebook_tag_map.items() if pagename in pages]


class NotebookGalleryTocTree(TocTree):
Expand All @@ -162,9 +154,7 @@ def run(self) -> list[nodes.Node]:
output += toctree

# Generate the card grid for all items in the toctree
notebooks = [
notebook for _, notebook in toctree[0].children[0].attributes["entries"]
]
notebooks = [notebook for _, notebook in toctree[0].children[0].attributes["entries"]]
grid_markdown = generate_notebook_grid_myst(notebooks=notebooks, env=self.env)
for node in parse_markdown(markdown=grid_markdown, state=self.state):
gallery += node
Expand Down
12 changes: 3 additions & 9 deletions extensions/rapids_version_templating.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,7 @@ def visit_reference(self, node: nodes.reference) -> None:
uri_str = re.sub(r"~~~(.*?)~~~", r"{{ \1 }}", uri_str)

# fill in appropriate values based on app context
node.attributes["refuri"] = re.sub(
r"(?<!\$)\{\{.*?\}\}", self.template_func, uri_str
)
node.attributes["refuri"] = re.sub(r"(?<!\$)\{\{.*?\}\}", self.template_func, uri_str)

# update the document
node.parent.replace(node, node)
Expand All @@ -61,19 +59,15 @@ def visit_Text(self, node: nodes.Text) -> None:
Replace template strings in generic text.
This roughly corresponds to HTML ``<p>``, ``<pre>``, and similar elements.
"""
new_node = nodes.Text(
re.sub(r"(?<!\$)\{\{.*?\}\}", self.template_func, node.astext())
)
new_node = nodes.Text(re.sub(r"(?<!\$)\{\{.*?\}\}", self.template_func, node.astext()))
node.parent.replace(node, new_node)

def template_func(self, match: re.Match) -> str:
"""
Replace template strings like ``{{ rapids_version }}`` with real
values like ``24.10``.
"""
return self.app.builder.templates.render_string(
source=match.group(), context=self.app.config.rapids_version
)
return self.app.builder.templates.render_string(source=match.group(), context=self.app.config.rapids_version)


def version_template(
Expand Down
20 changes: 5 additions & 15 deletions source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,12 @@
},
}
rapids_version = (
versions["stable"]
if os.environ.get("DEPLOYMENT_DOCS_BUILD_STABLE", "false") == "true"
else versions["nightly"]
versions["stable"] if os.environ.get("DEPLOYMENT_DOCS_BUILD_STABLE", "false") == "true" else versions["nightly"]
)
rapids_version["rapids_conda_channels_list"] = [
channel
for channel in rapids_version["rapids_conda_channels"].split(" ")
if channel != "-c"
channel for channel in rapids_version["rapids_conda_channels"].split(" ") if channel != "-c"
]
rapids_version["rapids_conda_packages_list"] = rapids_version[
"rapids_conda_packages"
].split(" ")
rapids_version["rapids_conda_packages_list"] = rapids_version["rapids_conda_packages"].split(" ")

# -- General configuration ---------------------------------------------------

Expand Down Expand Up @@ -94,9 +88,7 @@
# -- Options for notebooks -------------------------------------------------

nb_execution_mode = "off"
rapids_deployment_notebooks_base_url = (
"https://github.com/rapidsai/deployment/blob/main/source/"
)
rapids_deployment_notebooks_base_url = "https://github.com/rapidsai/deployment/blob/main/source/"

# -- Options for HTML output -------------------------------------------------

Expand Down Expand Up @@ -146,8 +138,6 @@
def setup(app):
app.add_css_file("https://docs.rapids.ai/assets/css/custom.css")
app.add_css_file("css/custom.css")
app.add_js_file(
"https://docs.rapids.ai/assets/js/custom.js", loading_method="defer"
)
app.add_js_file("https://docs.rapids.ai/assets/js/custom.js", loading_method="defer")
app.add_js_file("js/nav.js", loading_method="defer")
app.add_js_file("js/notebook-gallery.js", loading_method="defer")
4 changes: 1 addition & 3 deletions source/examples/rapids-1brc-single-node/notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -200,9 +200,7 @@
"source": [
"n = 1_000_000_000 # Number of rows of data to generate\n",
"\n",
"lookup_df = cudf.read_csv(\n",
" \"lookup.csv\"\n",
") # Load our lookup table of stations and their mean temperatures\n",
"lookup_df = cudf.read_csv(\"lookup.csv\") # Load our lookup table of stations and their mean temperatures\n",
"std = 10.0 # We assume temperatures are normally distributed with a standard deviation of 10\n",
"chunksize = 2e8 # Set the number of rows to generate in one go (reduce this if you run into GPU RAM limits)\n",
"filename = Path(\"measurements.txt\") # Choose where to write to\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -995,12 +995,8 @@
"\n",
"\n",
"def map_haversine(part):\n",
" pickup = cuspatial.GeoSeries.from_points_xy(\n",
" part[[\"pickup_longitude\", \"pickup_latitude\"]].interleave_columns()\n",
" )\n",
" dropoff = cuspatial.GeoSeries.from_points_xy(\n",
" part[[\"dropoff_longitude\", \"dropoff_latitude\"]].interleave_columns()\n",
" )\n",
" pickup = cuspatial.GeoSeries.from_points_xy(part[[\"pickup_longitude\", \"pickup_latitude\"]].interleave_columns())\n",
" dropoff = cuspatial.GeoSeries.from_points_xy(part[[\"dropoff_longitude\", \"dropoff_latitude\"]].interleave_columns())\n",
" return cuspatial.haversine_distance(pickup, dropoff)\n",
"\n",
"\n",
Expand Down Expand Up @@ -1506,9 +1502,7 @@
"from random import randrange\n",
"\n",
"\n",
"def generate_workload(\n",
" stages=3, min_width=1, max_width=3, variation=1, input_workload=None\n",
"):\n",
"def generate_workload(stages=3, min_width=1, max_width=3, variation=1, input_workload=None):\n",
" graph = [input_workload] if input_workload is not None else [run_haversine()]\n",
" last_width = min_width\n",
" for _ in range(stages):\n",
Expand Down Expand Up @@ -1646,35 +1640,25 @@
],
"source": [
"%%time\n",
"start_time = (datetime.datetime.now() - datetime.timedelta(minutes=15)).strftime(\n",
" \"%Y-%m-%dT%H:%M:%SZ\"\n",
")\n",
"start_time = (datetime.datetime.now() - datetime.timedelta(minutes=15)).strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
"try:\n",
" # Start with a couple of concurrent workloads\n",
" workload = generate_workload(stages=10, max_width=2)\n",
" # Then increase demand as more users appear\n",
" workload = generate_workload(\n",
" stages=5, max_width=5, min_width=3, variation=5, input_workload=workload\n",
" )\n",
" workload = generate_workload(stages=5, max_width=5, min_width=3, variation=5, input_workload=workload)\n",
" # Now reduce the workload for a longer period of time, this could be over a lunchbreak or something\n",
" workload = generate_workload(stages=30, max_width=2, input_workload=workload)\n",
" # Everyone is back from lunch and it hitting the cluster hard\n",
" workload = generate_workload(\n",
" stages=10, max_width=10, min_width=3, variation=5, input_workload=workload\n",
" )\n",
" workload = generate_workload(stages=10, max_width=10, min_width=3, variation=5, input_workload=workload)\n",
" # The after lunch rush is easing\n",
" workload = generate_workload(\n",
" stages=5, max_width=5, min_width=3, variation=5, input_workload=workload\n",
" )\n",
" workload = generate_workload(stages=5, max_width=5, min_width=3, variation=5, input_workload=workload)\n",
" # As we get towards the end of the day demand slows off again\n",
" workload = generate_workload(stages=10, max_width=2, input_workload=workload)\n",
" workload.compute()\n",
"finally:\n",
" client.close()\n",
" cluster.close()\n",
" end_time = (datetime.datetime.now() + datetime.timedelta(minutes=15)).strftime(\n",
" \"%Y-%m-%dT%H:%M:%SZ\"\n",
" )"
" end_time = (datetime.datetime.now() + datetime.timedelta(minutes=15)).strftime(\"%Y-%m-%dT%H:%M:%SZ\")"
]
},
{
Expand Down Expand Up @@ -2037,14 +2021,10 @@
" end_time,\n",
" \"1s\",\n",
")\n",
"running_pods = running_pods[\n",
" running_pods.columns.drop(list(running_pods.filter(regex=\"prepull\")))\n",
"]\n",
"running_pods = running_pods[running_pods.columns.drop(list(running_pods.filter(regex=\"prepull\")))]\n",
"nodes = p.query_range(\"count(kube_node_info)\", start_time, end_time, \"1s\")\n",
"nodes.columns = [\"Available GPUs\"]\n",
"nodes[\"Available GPUs\"] = (\n",
" nodes[\"Available GPUs\"] * 2\n",
") # We know our nodes each had 2 GPUs\n",
"nodes[\"Available GPUs\"] = nodes[\"Available GPUs\"] * 2 # We know our nodes each had 2 GPUs\n",
"nodes[\"Utilized GPUs\"] = running_pods.sum(axis=1)"
]
},
Expand Down
9 changes: 2 additions & 7 deletions source/examples/rapids-azureml-hpo/notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@
"from azure.ai.ml import MLClient\n",
"from azure.identity import DefaultAzureCredential\n",
"\n",
"\n",
"subscription_id = \"FILL IN WITH YOUR AZURE ML CREDENTIALS\"\n",
"resource_group_name = \"FILL IN WITH YOUR AZURE ML CREDENTIALS\"\n",
"workspace_name = \"FILL IN WITH YOUR AZURE ML CREDENTIALS\"\n",
Expand Down Expand Up @@ -219,9 +218,7 @@
" )\n",
" ml_client.compute.begin_create_or_update(gpu_target).result()\n",
"\n",
" print(\n",
" f\"AMLCompute with name {gpu_target.name} is created, the compute size is {gpu_target.size}\"\n",
" )"
" print(f\"AMLCompute with name {gpu_target.name} is created, the compute size is {gpu_target.size}\")"
]
},
{
Expand Down Expand Up @@ -488,9 +485,7 @@
"\n",
"\n",
"# Define the limits for this sweep\n",
"sweep_job.set_limits(\n",
" max_total_trials=10, max_concurrent_trials=2, timeout=18000, trial_timeout=3600\n",
")\n",
"sweep_job.set_limits(max_total_trials=10, max_concurrent_trials=2, timeout=18000, trial_timeout=3600)\n",
"\n",
"\n",
"# Specify your experiment details\n",
Expand Down
48 changes: 12 additions & 36 deletions source/examples/rapids-azureml-hpo/rapids_csp_azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,7 @@ def load_hyperparams(self, model_name="XGBoost"):
self.log_to_file(str(error))
return

def load_data(
self, filename="dataset.orc", col_labels=None, y_label="ArrDelayBinary"
):
def load_data(self, filename="dataset.orc", col_labels=None, y_label="ArrDelayBinary"):
"""
Loading the data into the object from the filename and based on the columns that we are
interested in. Also, generates y_label from 'ArrDelay' column to convert this into a binary
Expand Down Expand Up @@ -185,9 +183,7 @@ def load_data(

elif "multi" in self.compute_type:
self.log_to_file("\n\tReading using dask dataframe")
dataset = dask.dataframe.read_parquet(
target_filename, columns=col_labels
)
dataset = dask.dataframe.read_parquet(target_filename, columns=col_labels)

elif "GPU" in self.compute_type:
# GPU Reading Option
Expand All @@ -205,9 +201,7 @@ def load_data(

elif "multi" in self.compute_type:
self.log_to_file("\n\tReading using dask_cudf")
dataset = dask_cudf.read_parquet(
target_filename, columns=col_labels
)
dataset = dask_cudf.read_parquet(target_filename, columns=col_labels)

# cast all columns to float32
for col in dataset.columns:
Expand All @@ -222,14 +216,10 @@ def load_data(
dataset = dataset.fillna(0.0) # Filling the null values. Needed for dask-cudf

self.log_to_file(f"\n\tIngestion completed in {ingestion_timer.duration}")
self.log_to_file(
f"\n\tDataset descriptors: {dataset.shape}\n\t{dataset.dtypes}"
)
self.log_to_file(f"\n\tDataset descriptors: {dataset.shape}\n\t{dataset.dtypes}")
return dataset, col_labels, y_label, ingestion_timer.duration

def split_data(
self, dataset, y_label, train_size=0.8, random_state=0, shuffle=True
):
def split_data(self, dataset, y_label, train_size=0.8, random_state=0, shuffle=True):
"""
Splitting data into train and test split, has appropriate imports for different compute modes.
CPU compute - Uses sklearn, we manually filter y_label column in the split call
Expand Down Expand Up @@ -321,13 +311,9 @@ def train_model(self, X_train, y_train, model_params):

try:
if self.model_type == "XGBoost":
trained_model, training_time = self.fit_xgboost(
X_train, y_train, model_params
)
trained_model, training_time = self.fit_xgboost(X_train, y_train, model_params)
elif self.model_type == "RandomForest":
trained_model, training_time = self.fit_random_forest(
X_train, y_train, model_params
)
trained_model, training_time = self.fit_random_forest(X_train, y_train, model_params)
except Exception as error:
self.log_to_file("\n\n!error during model training: " + str(error))
self.log_to_file(f"\n\tFinished training in {training_time:.4f} s")
Expand All @@ -354,9 +340,7 @@ def fit_xgboost(self, X_train, y_train, model_params):
)
elif "multi" in self.compute_type:
self.log_to_file("\n\tTraining multi-GPU XGBoost")
train_DMatrix = xgboost.dask.DaskDMatrix(
self.client, data=X_train, label=y_train
)
train_DMatrix = xgboost.dask.DaskDMatrix(self.client, data=X_train, label=y_train)
trained_model = xgboost.dask.train(
self.client,
dtrain=train_DMatrix,
Expand Down Expand Up @@ -441,12 +425,8 @@ def evaluate_test_perf(self, trained_model, X_test, y_test, threshold=0.5):
try:
if self.model_type == "XGBoost":
if "multi" in self.compute_type:
test_DMatrix = xgboost.dask.DaskDMatrix(
self.client, data=X_test, label=y_test
)
xgb_pred = xgboost.dask.predict(
self.client, trained_model, test_DMatrix
).compute()
test_DMatrix = xgboost.dask.DaskDMatrix(self.client, data=X_test, label=y_test)
xgb_pred = xgboost.dask.predict(self.client, trained_model, test_DMatrix).compute()
xgb_pred = (xgb_pred > threshold) * 1.0
test_accuracy = accuracy_score(y_test.compute(), xgb_pred)
elif "single" in self.compute_type:
Expand All @@ -459,13 +439,9 @@ def evaluate_test_perf(self, trained_model, X_test, y_test, threshold=0.5):
if "multi" in self.compute_type:
cuml_pred = trained_model.predict(X_test).compute()
self.log_to_file("\n\tPrediction complete")
test_accuracy = accuracy_score(
y_test.compute(), cuml_pred, convert_dtype=True
)
test_accuracy = accuracy_score(y_test.compute(), cuml_pred, convert_dtype=True)
elif "single" in self.compute_type:
test_accuracy = trained_model.score(
X_test, y_test.astype("int32")
)
test_accuracy = trained_model.score(X_test, y_test.astype("int32"))

except Exception as error:
self.log_to_file("\n\n!error during inference: " + str(error))
Expand Down
Loading

0 comments on commit 9802cbf

Please sign in to comment.