@@ -750,8 +750,32 @@ def apply_mapping(
750750 duplicate_for_table = mapping_dict .get ("use_template_rowwise" , False )
751751 logging .info (f"use_template_rowwise: { duplicate_for_table } " )
752752
753- # PHASE 1: Download all source files using helper function
753+ # PHASE 0: If opt_data_url is provided, replace the original data source URL in YARRRML
754+ # This MUST happen BEFORE converting YARRRML to RML
754755 sources = mapping_dict .get ("sources" , {})
756+ if opt_data_url and sources :
757+ # Get the first source URL (the one we want to replace)
758+ first_source_name = next (iter (sources ))
759+ original_data_url = sources [first_source_name ]["access" ].strip ("/" )
760+
761+ # Replace ALL occurrences of the original data URL with the new one in the YARRRML string
762+ # This ensures the RML rules will have the correct URL from the start
763+ if isinstance (mapping_data , bytes ):
764+ mapping_data_str = mapping_data .decode ('utf-8' )
765+ else :
766+ mapping_data_str = mapping_data
767+
768+ # Replace the data URL in the YARRRML content
769+ mapping_data_str = mapping_data_str .replace (original_data_url , opt_data_url .strip ("/" ))
770+ mapping_data = mapping_data_str .encode ('utf-8' ) if isinstance (mapping_data , bytes ) else mapping_data_str
771+
772+ # Re-parse the modified YAML to update mapping_dict
773+ mapping_dict = yaml .safe_load (mapping_data )
774+ sources = mapping_dict .get ("sources" , {})
775+
776+ logging .info (f"Replaced data source URL in YARRRML: { original_data_url } -> { opt_data_url } " )
777+
778+ # PHASE 1: Download all source files using helper function
755779 url_mapping , primary_data_url , filename = download_sources (
756780 sources , opt_data_url , authorization
757781 )
0 commit comments