Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions configs/config_all.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,10 @@ process:
timeout: 30 # The timeout in seconds for each HTTP request.
max_concurrent: 10 # Maximum concurrent downloads.
- fix_unicode_mapper: # fix unicode errors in text.
- generate_challenging_qa_mapper: # mapper to generate challenging question and answer pairs.
hf_model: 'Qwen/Qwen2.5-VL-7B-Instruct' # Huggingface model id.
category: 'Mathematical Reasoning' # The category for the current QA.
model_name: 'Qwen' # The name for the current model.
- generate_qa_from_examples_mapper: # mapper to generate question and answer pairs from examples.
hf_model: 'Qwen/Qwen2.5-7B-Instruct' # Model name on huggingface to generate question and answer pairs.
seed_file: 'demos/data/demo-dataset-chatml.jsonl' # Path to the seed file in chatml format.
Expand Down
2 changes: 2 additions & 0 deletions data_juicer/ops/mapper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from .extract_support_text_mapper import ExtractSupportTextMapper
from .extract_tables_from_html_mapper import ExtractTablesFromHtmlMapper
from .fix_unicode_mapper import FixUnicodeMapper
from .generate_challenging_qa_mapper import GenerateChallengingQAMapper
from .generate_qa_from_examples_mapper import GenerateQAFromExamplesMapper
from .generate_qa_from_text_mapper import GenerateQAFromTextMapper
from .image_blur_mapper import ImageBlurMapper
Expand Down Expand Up @@ -126,6 +127,7 @@
"FixUnicodeMapper",
"GenerateQAFromExamplesMapper",
"GenerateQAFromTextMapper",
"GenerateChallengingQAMapper",
"HumanPreferenceAnnotationMapper",
"ImageBlurMapper",
"ImageCaptioningFromGPT4VMapper",
Expand Down
Loading
Loading