@@ -176,19 +176,19 @@ def __post_init__(self):
176
176
177
177
178
178
@dataclass
179
- class PermutableAttributeValue :
180
- """Value to be used for the attribute."""
179
+ class SampledAttributeValue :
180
+ """Value to be sampled for the attribute."""
181
181
182
182
id : str
183
183
"""ID to be used when referencing the attribute value during synthesis."""
184
184
185
- value : str
186
- """Value to be used for the attribute.
187
- Referenced as {attribute_id.value }"""
185
+ name : str
186
+ """Plaintext name of the attribute value .
187
+ Referenced as {attribute_id}"""
188
188
189
189
description : str
190
190
"""Description of the attribute value.
191
- Referenced as {attribute_id.value. description}"""
191
+ Referenced as {attribute_id.description}"""
192
192
193
193
sample_rate : Optional [float ] = None
194
194
"""Sample rate for the attribute value. If not specified, will assume uniform
@@ -197,34 +197,34 @@ class PermutableAttributeValue:
197
197
def __post_init__ (self ):
198
198
"""Verifies/populates params."""
199
199
if not self .id :
200
- raise ValueError ("PermutableAttributeValue .id cannot be empty." )
201
- if not self .value :
202
- raise ValueError ("PermutableAttributeValue.value cannot be empty." )
200
+ raise ValueError ("SampledAttributeValue .id cannot be empty." )
201
+ if not self .name :
202
+ raise ValueError ("SampledAttributeValue.name cannot be empty." )
203
203
if not self .description :
204
- raise ValueError ("PermutableAttributeValue .description cannot be empty." )
204
+ raise ValueError ("SampledAttributeValue .description cannot be empty." )
205
205
if self .sample_rate is not None and (
206
206
self .sample_rate < 0 or self .sample_rate > 1
207
207
):
208
208
raise ValueError (
209
- "PermutableAttributeValue .sample_rate must be between 0 and 1."
209
+ "SampledAttributeValue .sample_rate must be between 0 and 1."
210
210
)
211
211
212
212
213
213
@dataclass
214
- class PermutableAttribute :
215
- """Attributes to be varied across the dataset."""
214
+ class SampledAttribute :
215
+ """Attributes to be sampled across the dataset."""
216
216
217
217
id : str
218
218
"""ID to be used when referencing the attribute during synthesis."""
219
219
220
- attribute : str
221
- """Plaintext name of the attribute. Referenced as {attribute_id }"""
220
+ name : str
221
+ """Plaintext name of the attribute. Referenced as {id.parent }"""
222
222
223
223
description : str
224
- """Description of the attribute. Referenced as {attribute_id .description}"""
224
+ """Description of the attribute. Referenced as {id.parent .description}"""
225
225
226
- possible_values : list [PermutableAttributeValue ]
227
- """Type of the attribute."""
226
+ possible_values : list [SampledAttributeValue ]
227
+ """Values to be sampled for the attribute."""
228
228
229
229
def get_value_distribution (self ) -> dict [str , float ]:
230
230
"""Get the distribution of attribute values."""
@@ -236,13 +236,13 @@ def get_value_distribution(self) -> dict[str, float]:
236
236
def __post_init__ (self ):
237
237
"""Verifies/populates params."""
238
238
if not self .id :
239
- raise ValueError ("PermutableAttribute .id cannot be empty." )
240
- if not self .attribute :
241
- raise ValueError ("PermutableAttribute.attribute cannot be empty." )
239
+ raise ValueError ("SampledAttribute .id cannot be empty." )
240
+ if not self .name :
241
+ raise ValueError ("SampledAttribute.name cannot be empty." )
242
242
if not self .description :
243
- raise ValueError ("PermutableAttribute .description cannot be empty." )
243
+ raise ValueError ("SampledAttribute .description cannot be empty." )
244
244
if not self .possible_values :
245
- raise ValueError ("PermutableAttribute .possible_values cannot be empty." )
245
+ raise ValueError ("SampledAttribute .possible_values cannot be empty." )
246
246
247
247
value_ids = []
248
248
sample_rates = []
@@ -252,9 +252,7 @@ def __post_init__(self):
252
252
253
253
value_ids_set = set (value_ids )
254
254
if len (value_ids ) != len (value_ids_set ):
255
- raise ValueError (
256
- "PermutableAttribute.possible_values must have unique IDs."
257
- )
255
+ raise ValueError ("SampledAttribute.possible_values must have unique IDs." )
258
256
259
257
# Normalize sample rates
260
258
normalized_sample_rates = []
@@ -267,7 +265,7 @@ def __post_init__(self):
267
265
undefined_sample_rate_count += 1
268
266
269
267
if defined_sample_rate > 1.0 :
270
- raise ValueError ("PermutableAttribute .possible_values must sum to 1.0." )
268
+ raise ValueError ("SampledAttribute .possible_values must sum to 1.0." )
271
269
272
270
# Assign remaining sample rate to undefined sample rates
273
271
remaining_sample_rate = 1.0 - defined_sample_rate
@@ -517,7 +515,7 @@ class GeneralSynthesisParams(BaseParams):
517
515
Examples will be enumerated during sampling, and attributes can be referenced as
518
516
attributes when generating new attributes."""
519
517
520
- permutable_attributes : Optional [list [PermutableAttribute ]] = None
518
+ sampled_attributes : Optional [list [SampledAttribute ]] = None
521
519
"""Attributes to be varied across the dataset.
522
520
523
521
Attributes each have a set of possible values which will be randomly sampled
@@ -636,18 +634,18 @@ def _check_example_source_attribute_ids(self, all_attribute_ids: set[str]) -> No
636
634
for new_key in example_keys :
637
635
self ._check_attribute_ids (all_attribute_ids , new_key )
638
636
639
- def _check_permutable_attribute_ids (self , all_attribute_ids : set [str ]) -> None :
640
- """Check attribute IDs from permutable attributes for uniqueness."""
641
- if self .permutable_attributes is None :
637
+ def _check_sampled_attribute_ids (self , all_attribute_ids : set [str ]) -> None :
638
+ """Check attribute IDs from sampled attributes for uniqueness."""
639
+ if self .sampled_attributes is None :
642
640
return
643
641
644
- if len (self .permutable_attributes ) == 0 :
642
+ if len (self .sampled_attributes ) == 0 :
645
643
raise ValueError (
646
- "GeneralSynthesisParams.permutable_attributes cannot be empty."
644
+ "GeneralSynthesisParams.sampled_attributes cannot be empty."
647
645
)
648
646
649
- for permutable_attribute in self .permutable_attributes :
650
- attribute_id = permutable_attribute .id
647
+ for sampled_attribute in self .sampled_attributes :
648
+ attribute_id = sampled_attribute .id
651
649
self ._check_attribute_ids (all_attribute_ids , attribute_id )
652
650
653
651
def _check_generated_attribute_ids (self , all_attribute_ids : set [str ]) -> None :
@@ -716,7 +714,7 @@ def __post_init__(self):
716
714
self ._check_dataset_source_attribute_ids (all_attribute_ids )
717
715
self ._check_document_source_attribute_ids (all_attribute_ids )
718
716
self ._check_example_source_attribute_ids (all_attribute_ids )
719
- self ._check_permutable_attribute_ids (all_attribute_ids )
717
+ self ._check_sampled_attribute_ids (all_attribute_ids )
720
718
self ._check_generated_attribute_ids (all_attribute_ids )
721
719
self ._check_transformed_attribute_ids (all_attribute_ids )
722
720
self ._check_passthrough_attribute_ids ()
0 commit comments