@@ -302,30 +302,24 @@ def extractall(
302302
303303
304304def get_filename_from_url (data_url : str ) -> str :
305+ """
306+ Get the filename from the URL link.
307+ """
305308 try :
309+ response = requests .head (data_url , allow_redirects = True )
310+ content_disposition = response .headers .get ("Content-Disposition" )
311+ if content_disposition :
312+ filename = re .findall ('filename="?([^";]+)"?' , content_disposition )
313+ if filename :
314+ return str (filename [0 ])
306315 if "drive.google.com" in data_url :
307- response = requests .head (data_url , allow_redirects = True )
308- cd = response .headers .get ("Content-Disposition" ) # Normal size file case
309- if cd :
310- filename = cd .split ('filename="' )[1 ].split ('"' )[0 ]
311- return filename
312316 response = requests .get (data_url )
313- if "text/html" in response .headers .get ("Content-Type" , "" ): # Big size file case
317+ if "text/html" in response .headers .get ("Content-Type" , "" ):
314318 soup = BeautifulSoup (response .text , "html.parser" )
315319 filename_div = soup .find ("span" , {"class" : "uc-name-size" })
316320 if filename_div :
317- filename = filename_div .find ("a" ).text
318- return filename
319- return None
320- else :
321- response = requests .head (data_url , allow_redirects = True )
322- content_disposition = response .headers .get ("Content-Disposition" )
323- if content_disposition :
324- filename = re .findall ("filename=(.+)" , content_disposition )
325- return filename [0 ].strip ('"' ).strip ("'" )
326- else :
327- filename = _basename (data_url )
328- return filename
321+ return str (filename_div .find ("a" ).text )
322+ return _basename (data_url )
329323 except Exception as e :
330324 raise Exception (f"Error processing URL: { e } " ) from e
331325
0 commit comments