diff --git a/libs/core/langchain_core/utils/html.py b/libs/core/langchain_core/utils/html.py index 880a2edc00eed..b6bbf5cf22157 100644 --- a/libs/core/langchain_core/utils/html.py +++ b/libs/core/langchain_core/utils/html.py @@ -43,7 +43,7 @@ def find_all_links( pattern: Regex to use for extracting links from raw HTML. Returns: - all links + A list of all links found in the HTML. """ pattern = pattern or DEFAULT_LINK_REGEX return list(set(re.findall(pattern, raw_html))) @@ -73,7 +73,7 @@ def extract_sub_links( exception. Otherwise, raise the exception. Returns: - sub links. + A list of absolute paths to sub links. """ base_url_to_use = base_url if base_url is not None else url parsed_base_url = urlparse(base_url_to_use)