@@ -690,12 +690,12 @@ def _download_file(
690
690
"%s - found no digests in hashlib for any of %s" , path , str (digests )
691
691
)
692
692
693
- # TODO: how do we discover the total size????
694
- # TODO: do not do it in-place, but rather into some "hidden" file
695
693
resuming = False
696
694
attempt = 0
697
- nattempts = 3 # number to do, could be incremented if we downloaded a little
698
- while attempt <= nattempts :
695
+ attempts_allowed : int = (
696
+ 3 # number to do, could be incremented if we downloaded a little
697
+ )
698
+ while attempt <= attempts_allowed :
699
699
attempt += 1
700
700
try :
701
701
if digester :
@@ -724,7 +724,6 @@ def _download_file(
724
724
downloaded_digest .update (block )
725
725
downloaded += len (block )
726
726
downloaded_in_attempt += len (block )
727
- # TODO: yield progress etc
728
727
out : dict [str , Any ] = {"done" : downloaded }
729
728
if size :
730
729
if downloaded > size and not warned :
@@ -737,7 +736,6 @@ def _download_file(
737
736
size ,
738
737
)
739
738
out ["done%" ] = 100 * downloaded / size
740
- # TODO: ETA etc
741
739
yield out
742
740
dldir .append (block )
743
741
break
@@ -749,87 +747,36 @@ def _download_file(
749
747
# Catching RequestException lets us retry on timeout & connection
750
748
# errors (among others) in addition to HTTP status errors.
751
749
except requests .RequestException as exc :
752
- sleep_amount = random .random () * 5 * attempt
753
- if os .environ .get ("DANDI_DOWNLOAD_AGGRESSIVE_RETRY" ):
754
- # in such a case if we downloaded a little more --
755
- # consider it a successful attempt
756
- if downloaded_in_attempt > 0 :
757
- lgr .debug (
758
- "%s - download failed on attempt #%d: %s, "
759
- "but did download %d bytes, so considering "
760
- "it a success and incrementing number of allowed attempts." ,
761
- path ,
762
- attempt ,
763
- exc ,
764
- downloaded_in_attempt ,
765
- )
766
- nattempts += 1
767
- # TODO: actually we should probably retry only on selected codes,
768
- if exc .response is not None :
769
- if exc .response .status_code not in (
770
- 400 , # Bad Request, but happened with gider:
771
- # https://github.com/dandi/dandi-cli/issues/87
772
- * RETRY_STATUSES ,
773
- ):
774
- lgr .debug (
775
- "%s - download failed due to response %d: %s" ,
776
- path ,
777
- exc .response .status_code ,
778
- exc ,
779
- )
780
- yield {"status" : "error" , "message" : str (exc )}
781
- return
782
- elif retry_after := exc .response .headers .get ("Retry-After" ):
783
- # playing safe
784
- if not str (retry_after ).isdigit ():
785
- # our code is wrong, do not crash but issue warning so
786
- # we might get report/fix it up
787
- lgr .warning (
788
- "%s - download failed due to response %d with non-integer"
789
- " Retry-After=%r: %s" ,
790
- path ,
791
- exc .response .status_code ,
792
- retry_after ,
793
- exc ,
794
- )
795
- yield {"status" : "error" , "message" : str (exc )}
796
- return
797
- sleep_amount = int (retry_after )
798
- lgr .debug (
799
- "%s - download failed due to response %d with "
800
- "Retry-After=%d: %s, will sleep and retry" ,
801
- path ,
802
- exc .response .status_code ,
803
- sleep_amount ,
804
- exc ,
805
- )
806
- else :
807
- lgr .debug ("%s - download failed: %s" , path , exc )
808
- yield {"status" : "error" , "message" : str (exc )}
809
- return
810
- elif attempt >= nattempts :
811
- lgr .debug (
812
- "%s - download failed after %d attempts: %s" , path , attempt , exc
813
- )
750
+ attempts_allowed_or_not = _check_if_more_attempts_allowed (
751
+ path = path ,
752
+ exc = exc ,
753
+ attempt = attempt ,
754
+ attempts_allowed = attempts_allowed ,
755
+ downloaded_in_attempt = downloaded_in_attempt ,
756
+ )
757
+ if not attempts_allowed :
814
758
yield {"status" : "error" , "message" : str (exc )}
815
759
return
816
- # if is_access_denied(exc) or attempt >= 2:
817
- # raise
818
- # sleep a little and retry
819
- else :
820
- lgr .debug (
821
- "%s - download failed on attempt #%d: %s, will sleep a bit and retry" ,
822
- path ,
823
- attempt ,
824
- exc ,
825
- )
826
- time .sleep (sleep_amount )
760
+ # for clear(er) typing, here we get only with int
761
+ assert isinstance (attempts_allowed_or_not , int )
762
+ attempts_allowed = attempts_allowed_or_not
827
763
else :
828
764
lgr .warning ("downloader logic: We should not be here!" )
829
765
766
+ final_digest = None
830
767
if downloaded_digest and not resuming :
831
768
assert downloaded_digest is not None
832
769
final_digest = downloaded_digest .hexdigest () # we care only about hex
770
+ elif digests :
771
+ if resuming :
772
+ lgr .debug ("%s - resumed download. Need to check full checksum." , path )
773
+ else :
774
+ assert not downloaded_digest
775
+ lgr .debug (
776
+ "%s - no digest was checked online. Need to check full checksum" , path
777
+ )
778
+ final_digest = get_digest (path , algo )
779
+ if final_digest :
833
780
if digest_callback is not None :
834
781
assert isinstance (algo , str )
835
782
digest_callback (algo , final_digest )
@@ -842,6 +789,7 @@ def _download_file(
842
789
yield {"checksum" : "ok" }
843
790
lgr .debug ("%s - verified that has correct %s %s" , path , algo , digest )
844
791
else :
792
+ lgr .debug ("%s - no digests were provided" , path )
845
793
# shouldn't happen with more recent metadata etc
846
794
yield {
847
795
"checksum" : "-" ,
@@ -1085,6 +1033,86 @@ def downloads_gen():
1085
1033
yield {"status" : "done" }
1086
1034
1087
1035
1036
+ def _check_if_more_attempts_allowed (
1037
+ path : Path ,
1038
+ exc : requests .RequestException ,
1039
+ attempt : int ,
1040
+ attempts_allowed : int ,
1041
+ downloaded_in_attempt : int ,
1042
+ ) -> int | None :
1043
+ """Check if we should retry the download, return potentially adjusted 'attempts_allowed'"""
1044
+ sleep_amount = random .random () * 5 * attempt
1045
+ if os .environ .get ("DANDI_DOWNLOAD_AGGRESSIVE_RETRY" ):
1046
+ # in such a case if we downloaded a little more --
1047
+ # consider it a successful attempt
1048
+ if downloaded_in_attempt > 0 :
1049
+ lgr .debug (
1050
+ "%s - download failed on attempt #%d: %s, "
1051
+ "but did download %d bytes, so considering "
1052
+ "it a success and incrementing number of allowed attempts." ,
1053
+ path ,
1054
+ attempt ,
1055
+ exc ,
1056
+ downloaded_in_attempt ,
1057
+ )
1058
+ attempts_allowed += 1
1059
+ # TODO: actually we should probably retry only on selected codes,
1060
+ if exc .response is not None :
1061
+ if exc .response .status_code not in (
1062
+ 400 , # Bad Request, but happened with gider:
1063
+ # https://github.com/dandi/dandi-cli/issues/87
1064
+ * RETRY_STATUSES ,
1065
+ ):
1066
+ lgr .debug (
1067
+ "%s - download failed due to response %d: %s" ,
1068
+ path ,
1069
+ exc .response .status_code ,
1070
+ exc ,
1071
+ )
1072
+ return None
1073
+ elif retry_after := exc .response .headers .get ("Retry-After" ):
1074
+ # playing safe
1075
+ if not str (retry_after ).isdigit ():
1076
+ # our code is wrong, do not crash but issue warning so
1077
+ # we might get report/fix it up
1078
+ lgr .warning (
1079
+ "%s - download failed due to response %d with non-integer"
1080
+ " Retry-After=%r: %s" ,
1081
+ path ,
1082
+ exc .response .status_code ,
1083
+ retry_after ,
1084
+ exc ,
1085
+ )
1086
+ return None
1087
+ sleep_amount = int (retry_after )
1088
+ lgr .debug (
1089
+ "%s - download failed due to response %d with "
1090
+ "Retry-After=%d: %s, will sleep and retry" ,
1091
+ path ,
1092
+ exc .response .status_code ,
1093
+ sleep_amount ,
1094
+ exc ,
1095
+ )
1096
+ else :
1097
+ lgr .debug ("%s - download failed: %s" , path , exc )
1098
+ return None
1099
+ elif attempt >= attempts_allowed :
1100
+ lgr .debug ("%s - download failed after %d attempts: %s" , path , attempt , exc )
1101
+ return None
1102
+ # if is_access_denied(exc) or attempt >= 2:
1103
+ # raise
1104
+ # sleep a little and retry
1105
+ else :
1106
+ lgr .debug (
1107
+ "%s - download failed on attempt #%d: %s, will sleep a bit and retry" ,
1108
+ path ,
1109
+ attempt ,
1110
+ exc ,
1111
+ )
1112
+ time .sleep (sleep_amount )
1113
+ return attempts_allowed
1114
+
1115
+
1088
1116
def pairing (p : str , gen : Iterator [dict ]) -> Iterator [tuple [str , dict ]]:
1089
1117
for d in gen :
1090
1118
yield (p , d )
0 commit comments