Skip to content

Commit 872a1d3

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent fcd20c0 commit 872a1d3

File tree

10 files changed

+40
-42
lines changed

10 files changed

+40
-42
lines changed

src/distilabel/distiset.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -509,9 +509,9 @@ def load_from_disk(
509509
)
510510
dest_distiset_path = distiset_path
511511

512-
assert fs.isdir(
513-
original_distiset_path
514-
), "`distiset_path` must be a `PathLike` object pointing to a folder or a URI of a remote filesystem."
512+
assert fs.isdir(original_distiset_path), (
513+
"`distiset_path` must be a `PathLike` object pointing to a folder or a URI of a remote filesystem."
514+
)
515515

516516
has_config = False
517517
has_artifacts = False

src/distilabel/pipeline/batch_manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,10 +231,10 @@ def from_step(
231231
input_batch_size=getattr(step, "input_batch_size", None),
232232
data={predecessor: [] for predecessor in predecessors},
233233
convergence_step=convergence_step,
234-
next_expected_seq_no={predecessor: (0, 0) for predecessor in predecessors},
234+
next_expected_seq_no=dict.fromkeys(predecessors, (0, 0)),
235235
step_signature=step.signature,
236236
use_cache=step.use_cache,
237-
step_offset={predecessor: (0, 0) for predecessor in predecessors},
237+
step_offset=dict.fromkeys(predecessors, (0, 0)),
238238
)
239239

240240
def _get_seq_no(self) -> int:

src/distilabel/pipeline/write_buffer.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,9 @@ def __init__(
6565
step: [] for step in leaf_steps
6666
}
6767
# TODO: make this configurable
68-
self._buffers_dump_batch_size: Dict[str, int] = {
69-
step: 50 for step in leaf_steps
70-
}
68+
self._buffers_dump_batch_size: Dict[str, int] = dict.fromkeys(leaf_steps, 50)
7169
self._buffer_last_schema = {}
72-
self._buffers_last_file: Dict[str, int] = {step: 1 for step in leaf_steps}
70+
self._buffers_last_file: Dict[str, int] = dict.fromkeys(leaf_steps, 1)
7371
self._steps_cached = steps_cached or {}
7472
self._logger = logging.getLogger("distilabel.write_buffer")
7573

src/distilabel/steps/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def _infer_step_name(
101101
base_name = "_".join(parts[:-1])
102102
while name in step_names:
103103
idx = int(name.split("_")[-1])
104-
name = f"{base_name}_{idx+1}"
104+
name = f"{base_name}_{idx + 1}"
105105
return name
106106

107107

src/distilabel/steps/tasks/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ def _output_on_failure(
211211
a new field `distilabel_meta` with the raw output of the LLM.
212212
"""
213213
# Create a dictionary with the outputs of the task (every output set to None)
214-
outputs = {output: None for output in self.outputs}
214+
outputs = dict.fromkeys(self.outputs)
215215
outputs["model_name"] = self.llm.model_name # type: ignore
216216
outputs = self._create_metadata(
217217
outputs,

src/distilabel/steps/tasks/improving_text_embeddings.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def format_output(
6666
A Python dictionary with the parsed output based on the `keys` property.
6767
"""
6868
if output is None:
69-
return {key: None for key in self.keys}
69+
return dict.fromkeys(self.keys)
7070

7171
def escape_backslashes_in_values(s):
7272
# Regular expression to match the key-value pairs in the dictionary
@@ -100,7 +100,7 @@ def replace_backslashes(match):
100100
pass
101101

102102
if not isinstance(output, dict):
103-
return {key: None for key in self.keys}
103+
return dict.fromkeys(self.keys)
104104

105105
return {key: output.get(key, None) for key in self.keys}
106106

src/distilabel/steps/tasks/math_shepherd/completer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ def _auto_label(
485485
self._logger.info("Completer failed due to empty completion")
486486
continue
487487
if completion[-1] == golden_answers[instruction_i]:
488-
label = f" { self.tags[0]}"
488+
label = f" {self.tags[0]}"
489489
# If we found one, it's enough as we are doing Hard Estimation
490490
continue
491491
# In case we had no solutions from the previous step, otherwise we would have

src/distilabel/steps/tasks/text_generation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ def unload(self) -> None:
229229
@property
230230
def inputs(self) -> "StepColumns":
231231
"""The input for the task is the `instruction` by default, or the `columns` given as input."""
232-
columns = {column: True for column in self.columns}
232+
columns = dict.fromkeys(self.columns, True)
233233
columns["system_prompt"] = False
234234
return columns
235235

src/distilabel/utils/mkdocs/components_gallery.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -296,9 +296,9 @@ def _generate_steps_pages(self, src_dir: Path, steps: list) -> List[str]:
296296
docstring["icon"] = _STEPS_CATEGORY_TO_ICON.get(first_category, "")
297297

298298
if docstring["icon"]:
299-
assert (
300-
docstring["icon"] in _STEPS_CATEGORY_TO_ICON.values()
301-
), f"Icon {docstring['icon']} not found in _STEPS_CATEGORY_TO_ICON"
299+
assert docstring["icon"] in _STEPS_CATEGORY_TO_ICON.values(), (
300+
f"Icon {docstring['icon']} not found in _STEPS_CATEGORY_TO_ICON"
301+
)
302302

303303
name = step["name"]
304304

@@ -364,9 +364,9 @@ def _generate_tasks_pages(self, src_dir: Path, tasks: list) -> List[str]:
364364
first_category = docstring["categories"][0]
365365
docstring["icon"] = _STEPS_CATEGORY_TO_ICON.get(first_category, "")
366366
if docstring["icon"]:
367-
assert (
368-
docstring["icon"] in _STEPS_CATEGORY_TO_ICON.values()
369-
), f"Icon {docstring['icon']} not found in _STEPS_CATEGORY_TO_ICON"
367+
assert docstring["icon"] in _STEPS_CATEGORY_TO_ICON.values(), (
368+
f"Icon {docstring['icon']} not found in _STEPS_CATEGORY_TO_ICON"
369+
)
370370

371371
name = task["name"]
372372

tests/unit/models/embeddings/test_llamacpp.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,9 @@ def test_normalize_embeddings(self, test_inputs):
115115
for result in results:
116116
# Check if the embedding is normalized (L2 norm should be close to 1)
117117
norm = np.linalg.norm(result)
118-
assert np.isclose(
119-
norm, 1.0, atol=1e-6
120-
), f"Norm is {norm}, expected close to 1.0"
118+
assert np.isclose(norm, 1.0, atol=1e-6), (
119+
f"Norm is {norm}, expected close to 1.0"
120+
)
121121

122122
def test_normalize_embeddings_false(self, test_inputs):
123123
"""
@@ -129,15 +129,15 @@ def test_normalize_embeddings_false(self, test_inputs):
129129
for result in results:
130130
# Check if the embedding is not normalized (L2 norm should not be close to 1)
131131
norm = np.linalg.norm(result)
132-
assert not np.isclose(
133-
norm, 1.0, atol=1e-6
134-
), f"Norm is {norm}, expected not close to 1.0"
132+
assert not np.isclose(norm, 1.0, atol=1e-6), (
133+
f"Norm is {norm}, expected not close to 1.0"
134+
)
135135

136136
# Additional check: ensure that at least one embedding has a norm significantly different from 1
137137
norms = [np.linalg.norm(result) for result in results]
138-
assert any(
139-
not np.isclose(norm, 1.0, atol=0.1) for norm in norms
140-
), "Expected at least one embedding with norm not close to 1.0"
138+
assert any(not np.isclose(norm, 1.0, atol=0.1) for norm in norms), (
139+
"Expected at least one embedding with norm not close to 1.0"
140+
)
141141

142142
def test_encode_batch(self) -> None:
143143
"""
@@ -149,20 +149,20 @@ def test_encode_batch(self) -> None:
149149
inputs = [f"This is test sentence {i}" for i in range(batch_size)]
150150
results = self.embeddings.encode(inputs=inputs)
151151

152-
assert (
153-
len(results) == batch_size
154-
), f"Expected {batch_size} results, got {len(results)}"
152+
assert len(results) == batch_size, (
153+
f"Expected {batch_size} results, got {len(results)}"
154+
)
155155
for result in results:
156-
assert (
157-
len(result) == 384
158-
), f"Expected embedding dimension 384, got {len(result)}"
156+
assert len(result) == 384, (
157+
f"Expected embedding dimension 384, got {len(result)}"
158+
)
159159

160160
# Test with a large batch to ensure it doesn't cause issues
161161
large_batch = ["Large batch test" for _ in range(100)]
162162
large_results = self.embeddings.encode(inputs=large_batch)
163-
assert (
164-
len(large_results) == 100
165-
), f"Expected 100 results for large batch, got {len(large_results)}"
163+
assert len(large_results) == 100, (
164+
f"Expected 100 results for large batch, got {len(large_results)}"
165+
)
166166

167167
def test_encode_batch_consistency(self) -> None:
168168
"""
@@ -180,6 +180,6 @@ def test_encode_batch_consistency(self) -> None:
180180
batch_result = self.embeddings.encode([input_text, "Another sentence"])[0]
181181

182182
# Compare the embeddings
183-
assert np.allclose(
184-
single_result, batch_result, atol=1e-5
185-
), "Embeddings are not consistent between single and batch processing"
183+
assert np.allclose(single_result, batch_result, atol=1e-5), (
184+
"Embeddings are not consistent between single and batch processing"
185+
)

0 commit comments

Comments
 (0)