Skip to content

Commit 39f145f

Browse files
author
Thomas Hanke
committed
fix optional data_url usage
1 parent 0808131 commit 39f145f

1 file changed

Lines changed: 25 additions & 1 deletion

File tree

app.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -750,8 +750,32 @@ def apply_mapping(
750750
duplicate_for_table = mapping_dict.get("use_template_rowwise", False)
751751
logging.info(f"use_template_rowwise: {duplicate_for_table}")
752752

753-
# PHASE 1: Download all source files using helper function
753+
# PHASE 0: If opt_data_url is provided, replace the original data source URL in YARRRML
754+
# This MUST happen BEFORE converting YARRRML to RML
754755
sources = mapping_dict.get("sources", {})
756+
if opt_data_url and sources:
757+
# Get the first source URL (the one we want to replace)
758+
first_source_name = next(iter(sources))
759+
original_data_url = sources[first_source_name]["access"].strip("/")
760+
761+
# Replace ALL occurrences of the original data URL with the new one in the YARRRML string
762+
# This ensures the RML rules will have the correct URL from the start
763+
if isinstance(mapping_data, bytes):
764+
mapping_data_str = mapping_data.decode('utf-8')
765+
else:
766+
mapping_data_str = mapping_data
767+
768+
# Replace the data URL in the YARRRML content
769+
mapping_data_str = mapping_data_str.replace(original_data_url, opt_data_url.strip("/"))
770+
mapping_data = mapping_data_str.encode('utf-8') if isinstance(mapping_data, bytes) else mapping_data_str
771+
772+
# Re-parse the modified YAML to update mapping_dict
773+
mapping_dict = yaml.safe_load(mapping_data)
774+
sources = mapping_dict.get("sources", {})
775+
776+
logging.info(f"Replaced data source URL in YARRRML: {original_data_url} -> {opt_data_url}")
777+
778+
# PHASE 1: Download all source files using helper function
755779
url_mapping, primary_data_url, filename = download_sources(
756780
sources, opt_data_url, authorization
757781
)

0 commit comments

Comments
 (0)