From 410663dbcaba019ca3d3744946348b56a635480b Mon Sep 17 00:00:00 2001 From: pphillips99 Date: Tue, 16 Jul 2024 13:53:27 -0500 Subject: [PATCH] fixup and tests to extract_report() to handle documented inputs (#530) --- parsedmarc/__init__.py | 21 +- samples/extract_report/changed-input.xml | 592 ++++++++++++++++++++++ samples/extract_report/nice-input.xml | 592 ++++++++++++++++++++++ samples/extract_report/nice-input.xml.gz | Bin 0 -> 931 bytes samples/extract_report/nice-input.xml.zip | Bin 0 -> 1076 bytes tests.py | 77 ++- 6 files changed, 1272 insertions(+), 10 deletions(-) create mode 100644 samples/extract_report/changed-input.xml create mode 100644 samples/extract_report/nice-input.xml create mode 100644 samples/extract_report/nice-input.xml.gz create mode 100644 samples/extract_report/nice-input.xml.zip diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 8d94b428..9d46e2c4 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -591,14 +591,19 @@ def extract_report(input_): str: The extracted text """ + def is_base64(s): + base64_regex = re.compile(r'^[A-Za-z0-9+/=]+\Z') + return bool(base64_regex.match(s)) + try: - file_object = BytesIO() - if type(input_) is str: - try: - file_object = BytesIO(b64decode(input_)) - except binascii.Error: - pass - if file_object is None: + file_object = None + if isinstance(input_, str): + if is_base64(input_): + try: + file_object = BytesIO(b64decode(input_)) + except binascii.Error: + pass + else: file_object = open(input_, "rb") elif type(input_) is bytes: file_object = BytesIO(input_) @@ -613,7 +618,7 @@ def extract_report(input_): errors='ignore') elif header.startswith(MAGIC_GZIP): report = zlib.decompress( - file_object.getvalue(), + file_object.read(), zlib.MAX_WBITS | 16).decode(errors='ignore') elif header.startswith(MAGIC_XML) or header.startswith(MAGIC_JSON): report = file_object.read().decode(errors='ignore') diff --git a/samples/extract_report/changed-input.xml b/samples/extract_report/changed-input.xml new file mode 100644 index 00000000..31eacf5a --- /dev/null +++ b/samples/extract_report/changed-input.xml @@ -0,0 +1,592 @@ + + + + fred.com + noreply-dmarc-support@google.com + https://support.google.com/a/answer/2466580 + 11038226378739404135 + + 1718236800 + 1718323199 + + + + example.com + r + r +

none

+ none + 100 + none +
+ + + 209.85.220.69 + 1 + + none + fail + pass + + + + example.com + + + + example.com + pass + + + + + + 209.85.220.41 + 2 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + gmail.com + pass + + + + + + 54.240.48.90 + 40 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.8.31 + 40 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.8.33 + 33 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.48.92 + 40 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.48.110 + 24 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 209.85.220.41 + 12 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + connectivityu.com + pass + + + + + + 2607:f8b0:4864:20::132 + 1 + + none + pass + pass + + + + example.com + + + + example.com + pass + google + + + example.com + pass + + + + + + 54.240.8.83 + 36 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.8.96 + 27 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.48.95 + 25 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 209.85.220.69 + 2252 + + none + pass + pass + + + + example.com + + + + example.com + pass + google + + + example.com + pass + + + + + + 54.240.48.94 + 46 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.8.88 + 37 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 209.85.220.55 + 1 + + none + pass + pass + + + + example.com + + + + example.com + pass + google + + + example.com + pass + + + + + + 54.240.48.93 + 24 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 209.85.220.41 + 23 + + none + pass + pass + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + example.com + pass + + + + + + 209.85.220.41 + 24 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + rphvac.com + none + + + + + + 209.85.220.41 + 359 + + none + pass + pass + + + + example.com + + + + example.com + pass + google + + + example.com + pass + + + +
diff --git a/samples/extract_report/nice-input.xml b/samples/extract_report/nice-input.xml new file mode 100644 index 00000000..ce28c8e8 --- /dev/null +++ b/samples/extract_report/nice-input.xml @@ -0,0 +1,592 @@ + + + + google.com + noreply-dmarc-support@google.com + https://support.google.com/a/answer/2466580 + 11038226378739404135 + + 1718236800 + 1718323199 + + + + example.com + r + r +

none

+ none + 100 + none +
+ + + 209.85.220.69 + 1 + + none + fail + pass + + + + example.com + + + + example.com + pass + + + + + + 209.85.220.41 + 2 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + gmail.com + pass + + + + + + 54.240.48.90 + 40 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.8.31 + 40 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.8.33 + 33 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.48.92 + 40 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.48.110 + 24 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 209.85.220.41 + 12 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + connectivityu.com + pass + + + + + + 2607:f8b0:4864:20::132 + 1 + + none + pass + pass + + + + example.com + + + + example.com + pass + google + + + example.com + pass + + + + + + 54.240.8.83 + 36 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.8.96 + 27 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.48.95 + 25 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 209.85.220.69 + 2252 + + none + pass + pass + + + + example.com + + + + example.com + pass + google + + + example.com + pass + + + + + + 54.240.48.94 + 46 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.8.88 + 37 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 209.85.220.55 + 1 + + none + pass + pass + + + + example.com + + + + example.com + pass + google + + + example.com + pass + + + + + + 54.240.48.93 + 24 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 209.85.220.41 + 23 + + none + pass + pass + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + example.com + pass + + + + + + 209.85.220.41 + 24 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + rphvac.com + none + + + + + + 209.85.220.41 + 359 + + none + pass + pass + + + + example.com + + + + example.com + pass + google + + + example.com + pass + + + +
diff --git a/samples/extract_report/nice-input.xml.gz b/samples/extract_report/nice-input.xml.gz new file mode 100644 index 0000000000000000000000000000000000000000..ed74d0e8748dbe72cf7b7ff9458f03b52038e765 GIT binary patch literal 931 zcmV;U16=$ciwFo@`fX+a18!+!Wi4rLaCLMpcx`L|?VDS7;vf)!-}@^(eQz%0V)Ufx zKJ72qea=BbBHIL5NYw7HUuZ76t!C}^*t*J-aTs8j;e!EV{IIFY$qEZe7`?xBE$4QE zDPkF+#r^H~Z(r}C+sQ+66XzIb3y2<*o5>^=m@`qQ6|P|hYp7*P%0!V;SmC5#tSGUC zSQXoYu+_m8BxORGYN33(%PJ_)oou*n?#phqR_1x@dfRns%vnj$Q_7n~N#qg_ zhA`zGn&1trc<-@J)+)g4kyMF@?e?J)N}fN<^k*yLDijpQHt)X6ULbo3M|I-rap{Lv zM0+L5Tg&%Zsho?L=nWQZJ-9@&Mj)ILo&aZNMS%r?V};5$+k##dv4*PdW4nFm#a(}R z1@oWQqp7DHP?t25OM~q91Y9UX4who_i z*%W`ZZZ8Ri-Z(y)gTS|duNa7|nR6He-!U8*4o=0vJB&L83z6lG><9)0gTghT;2lOm zml+fc3Ri)G&X9oF77Po91W4#~i9G8hn}R8=A?Qa?4H z4MgZn=Xtbn=6)3VbKuP9t~Zi1o$RvwAQ9o zN_**P$AH2t9Ld0^1_gt{m7t)nW`YqG0>gq~;nG<+@&y$Df{#CsnzZ}>PP_F2>yO-h zo4ngBuou6;ZjYeR$Pv`!-3En=M`0(|2u2cbbH8U2jkhNnBNu$;cSWe zzkPM3%j-45a30&ju_%~*!Ju$56a-(cAR4!Agp+|e z=2vdo=a0E*r4`%^j4ZE!iowL&+0o^X6a*UX|JJst&DVR9^kvCQySMr`YR@@3Dcxp? z@VN4BeSC)AuC-}zzunk%YI31O!ixl#?}bY+szxU z9u!qcb()#Aux8h3EsYCvb9UstJDs;RZ2juZD_MO67n{12dS#rOlp0%Xx&7_BRa@6) zIZFraJa&k!PJ(UX;#*a*`*U|lvz-*WUb=^);lgaU;QVLS`(Cqd+^d`a^=d?+mV)lC ziy9Z1#9P$fUFmHQck6q#cIua>Gt$m4mHTk#rq4~M-^bqAM@v`TF3ma<>3Jde)NEJJ zCqn#f22bT?J>Xqa@qVMpGi??pPf4qd$Mm{nfJP9GlC>+uIyCsx0n+2TS0!; zmhazJF1psQqb9k`^kipJ=8A1c3=S)uWnKNR=USfQ^-HUN8Roo7)!LAAs_OQWzEJlw zEqU`^tL+Lbjc;qrRd4>g$$sq}_K&?EzuZ0ik!5*Ty7d9uk2%b>^FG!jzN=UdXGDA1=to<3*mJqx>;%?GwOte5CmGqYHN`M#ly1fSp0 zgVsIPtlSfy_P*g@V{JZUy6}Olo8xpz5gxAa#-?)<8g7d5aJR9ZZF=1&Bi@joU|`mwMy1ejtUb&^m{)R<9wa6y5Q z2uu7r1*z?8=6cNcZxTOqW?jOhcQ@|+WB+Cz;LXS+$BZj4O91mY0|O%vFKGm^&~r8` aBxj@L>Hu$6HjrvYAPfQ0r-0d=fdK$#