From 1fb82ad71050dceebe7cb52aff4a48827b9ba8f0 Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Wed, 25 May 2022 10:47:06 +0200
Subject: [PATCH 01/23] Run unittests automatically

Use github actions for Continuous Integration
---
 .github/workflows/unittests.yml | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 .github/workflows/unittests.yml

diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
new file mode 100644
index 000000000..4543663fa
--- /dev/null
+++ b/.github/workflows/unittests.yml
@@ -0,0 +1,24 @@
+name: Python package
+
+on: [push]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.9"]
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+      - name: Test with unittests
+        run: |
+          python -m unittest discover
\ No newline at end of file

From 2c3b3405ee02041a3e754888c1e06da1ad8d7e4e Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Wed, 25 May 2022 11:00:49 +0200
Subject: [PATCH 02/23] Run on ubuntu,win,mac, py2.7,3.5,newest, pypy 3.9

Could not find a way to say "newest pypy"; "pypy3" or "pypy3.x" did not
work
---
 .github/workflows/unittests.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
index 4543663fa..70eda8a23 100644
--- a/.github/workflows/unittests.yml
+++ b/.github/workflows/unittests.yml
@@ -4,10 +4,11 @@ on: [push]
 
 jobs:
   build:
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: ["3.9"]
+        os: ["ubuntu-latest", "windows-latest", "macos-latest"]
+        python-version: ["2.7", "3.5", "3.x", "pypy-3.9"]
 
     steps:
       - uses: actions/checkout@v3
@@ -19,6 +20,5 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-      - name: Test with unittests
-        run: |
-          python -m unittest discover
\ No newline at end of file
+      - name: Run unittests
+        run: python -m unittest discover
\ No newline at end of file

From bc9c94605ce1e30a797138870b75be27b8b4b4e9 Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Wed, 25 May 2022 11:28:51 +0200
Subject: [PATCH 03/23] Only run on push/PR to master

---
 .github/workflows/unittests.yml | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
index 70eda8a23..0757cfb1b 100644
--- a/.github/workflows/unittests.yml
+++ b/.github/workflows/unittests.yml
@@ -1,6 +1,10 @@
 name: Python package
 
-on: [push]
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
 
 jobs:
   build:
@@ -19,6 +23,6 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+          pip install -r requirements.txt
       - name: Run unittests
         run: python -m unittest discover
\ No newline at end of file

From 6502a7773199ef28dab0e122e8197a4fe7ff65e8 Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Wed, 25 May 2022 12:58:35 +0200
Subject: [PATCH 04/23] Fail fast in unittests

Save computing time by failing immediately if any test fails. If we
return early from one VM, the others are stopped, as well, so this
saves lots of parallel time.
---
 .github/workflows/unittests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
index 0757cfb1b..a700c3727 100644
--- a/.github/workflows/unittests.yml
+++ b/.github/workflows/unittests.yml
@@ -25,4 +25,4 @@ jobs:
           python -m pip install --upgrade pip
           pip install -r requirements.txt
       - name: Run unittests
-        run: python -m unittest discover
\ No newline at end of file
+        run: python -m unittest discover -f
\ No newline at end of file

From 658858e975835476f21130054461f4e3f88e021a Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Thu, 24 Nov 2022 11:49:52 +0100
Subject: [PATCH 05/23] Add debug output to workflow preparation

Want this to check why requirements installation does not happen as expected
---
 .github/workflows/unittests.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
index a700c3727..d7a070263 100644
--- a/.github/workflows/unittests.yml
+++ b/.github/workflows/unittests.yml
@@ -22,6 +22,8 @@ jobs:
           python-version: ${{ matrix.python-version }}
       - name: Install dependencies
         run: |
+          python -c "import sys; import platform; print(sys.version); print(sys.platform); print(platform.python_implementation()); print(platform.system())"
+          cat requirements.txt
           python -m pip install --upgrade pip
           pip install -r requirements.txt
       - name: Run unittests

From 26f922b51330876ea440d23c370a923a2e5bfaa0 Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Fri, 25 Nov 2022 10:29:18 +0100
Subject: [PATCH 06/23] tests: Fix tests on windows

(1) Cannot open a temp file for writing from 2 python processes
    simultaneously
(2) Need proper environment setup for Popen (thanks to metatoaster's
    reply on stackoverflow https://stackoverflow.com/a/72845540/4405656)
(3) Need different os-separator, so replace hard-coded '/' with
    os.path.join or os.sep
(4) Minor cleanup of imports & whitespace in test_issue_*.py

There are still 2 tests that should be adapted to Windows, but those are
lower-prio in my eyes.

Also, unittest run on my windows VM creates many resource warnings due to
non-closed file descriptors. Should address that some (future) day.
---
 tests/common/log_helper/test_log_helper.py |  5 +++-
 tests/common/test_encoding_handler.py      | 30 ++++++++++++----------
 tests/ftguess/test_basic.py                |  9 +++----
 tests/msodde/test_basic.py                 |  4 +--
 tests/oleid/test_basic.py                  | 23 +++++++++--------
 tests/oleid/test_issue_166.py              |  9 ++++---
 tests/rtfobj/test_issue_185.py             |  8 +++---
 tests/rtfobj/test_issue_251.py             |  8 +++---
 8 files changed, 54 insertions(+), 42 deletions(-)

diff --git a/tests/common/log_helper/test_log_helper.py b/tests/common/log_helper/test_log_helper.py
index f9b20a08b..b67b47ede 100644
--- a/tests/common/log_helper/test_log_helper.py
+++ b/tests/common/log_helper/test_log_helper.py
@@ -11,6 +11,7 @@
 import subprocess
 from tests.common.log_helper import log_helper_test_main
 from tests.common.log_helper import log_helper_test_imported
+import os
 from os.path import dirname, join, relpath, abspath
 
 from tests.test_utils import PROJECT_ROOT
@@ -169,10 +170,12 @@ def _run_test(self, args, should_succeed=True, run_third_party=False):
         else:
             all_args.append(TEST_FILE)
         all_args.extend(args)
+        env = os.environ.copy()
+        env['PYTHONPATH'] = PROJECT_ROOT
         child = subprocess.Popen(
             all_args,
             shell=False,
-            env={'PYTHONPATH': PROJECT_ROOT},
+            env=env,
             universal_newlines=True,
             cwd=PROJECT_ROOT,
             stdin=None,
diff --git a/tests/common/test_encoding_handler.py b/tests/common/test_encoding_handler.py
index 42d4565c0..e608f963a 100644
--- a/tests/common/test_encoding_handler.py
+++ b/tests/common/test_encoding_handler.py
@@ -14,7 +14,13 @@
 
 @contextmanager
 def temp_file(just_name=True):
-    """Context manager that creates temp file and deletes it in the end"""
+    """
+    Context manager that creates temp file and deletes it in the end.
+
+    If `just_name` is `False` this yields (file-name, open-file-handle),
+    if `just_name` is `True` this yields just the file-name (and closes
+    the file-handle if we are on windows)
+    """
     tmp_descriptor = None
     tmp_name = None
     tmp_handle = None
@@ -24,8 +30,12 @@ def temp_file(just_name=True):
         # we create our own file handle since we want to be able to close the
         # file and open it again for reading.
         # We keep the os-level descriptor open so file name is still reserved
-        # for us
+        # for us ... except for Windows where it is not possible for another
+        # process to write to that handle
         if just_name:
+            if sys.platform.startswith('win'):
+                os.close(tmp_descriptor)
+                tmp_descriptor = None
             yield tmp_name
         else:
             tmp_handle = open(tmp_name, 'wb')
@@ -51,11 +61,7 @@ def test_print(self):
                    shell=True)
 
     def test_print_redirect(self):
-        """
-        Test redirection of unicode output to files does not raise error
-
-        TODO: test this on non-linux OSs
-        """
+        """Test redirection of unicode output to files does not raise error."""
         with temp_file() as tmp_file:
             check_call('{python} {this_file} print > {tmp_file}'
                        .format(python=sys.executable, this_file=__file__,
@@ -63,7 +69,7 @@ def test_print_redirect(self):
                        shell=True)
 
     @unittest.skipIf(not sys.platform.startswith('linux'),
-                     'Only tested on linux sofar')
+                     'Need to adapt this test to Windows')
     def test_print_no_lang(self):
         """
         Test redirection of unicode output to files does not raise error
@@ -89,11 +95,7 @@ def test_uopen(self):
                 self.fail(cpe.output)
 
     def test_uopen_redirect(self):
-        """
-        Test redirection of unicode output to files does not raise error
-
-        TODO: test this on non-linux OSs
-        """
+        """Test redirection of unicode output to files does not raise error."""
         with temp_file(False) as (tmp_handle, tmp_file):
             tmp_handle.write(FILE_TEXT.encode('utf8'))
             tmp_handle.close()
@@ -109,7 +111,7 @@ def test_uopen_redirect(self):
                     self.fail(cpe.output)
 
     @unittest.skipIf(not sys.platform.startswith('linux'),
-                     'Only tested on linux sofar')
+                     'Need to adapt this test to Windows')
     def test_uopen_no_lang(self):
         """
         Test that uopen in a C-LANG environment is ok
diff --git a/tests/ftguess/test_basic.py b/tests/ftguess/test_basic.py
index 3c6311847..12b95db2b 100644
--- a/tests/ftguess/test_basic.py
+++ b/tests/ftguess/test_basic.py
@@ -1,8 +1,7 @@
 """Test ftguess"""
-
 import unittest
 import os
-from os.path import splitext
+from os.path import splitext, join
 from oletools import ftguess
 
 # Directory with test data, independent of current working directory
@@ -47,7 +46,7 @@ def test_all(self):
             before_dot, extension = splitext(filename)
             if extension == '.zip':
                 extension = splitext(before_dot)[1]
-            elif filename in ('basic/empty', 'basic/text'):
+            elif filename in (join('basic', 'empty'), join('basic', 'text')):
                 extension = '.csv'    # have just like that
             elif not extension:
                 self.fail('Could not find extension for test sample {0}'
@@ -55,7 +54,7 @@ def test_all(self):
             extension = extension[1:]      # remove the leading '.'
 
             # encrypted files are mostly not recognized (yet?), except .xls
-            if filename.startswith('encrypted/'):
+            if filename.startswith('encrypted' + os.sep):
                 if extension == 'xls':
                     expect = ftguess.FType_Excel97
                 else:
@@ -69,7 +68,7 @@ def test_all(self):
                 # not implemented yet
                 expect = ftguess.FType_Unknown
 
-            elif filename == 'basic/encrypted.docx':
+            elif filename == join('basic', 'encrypted.docx'):
                 expect = ftguess.FType_Generic_OLE
 
             elif 'excel5' in filename:
diff --git a/tests/msodde/test_basic.py b/tests/msodde/test_basic.py
index 7eed57998..2b54fbe90 100644
--- a/tests/msodde/test_basic.py
+++ b/tests/msodde/test_basic.py
@@ -67,11 +67,11 @@ def test_invalid_none(self):
 
     def test_invalid_empty(self):
         """ check that empty file argument leads to non-zero exit status """
-        self.do_test_validity(join(BASE_DIR, 'basic/empty'), Exception)
+        self.do_test_validity(join(BASE_DIR, 'basic', 'empty'), Exception)
 
     def test_invalid_text(self):
         """ check that text file argument leads to non-zero exit status """
-        self.do_test_validity(join(BASE_DIR, 'basic/text'), Exception)
+        self.do_test_validity(join(BASE_DIR, 'basic', 'text'), Exception)
 
     def test_encrypted(self):
         """
diff --git a/tests/oleid/test_basic.py b/tests/oleid/test_basic.py
index e37ad61ab..381680c8f 100644
--- a/tests/oleid/test_basic.py
+++ b/tests/oleid/test_basic.py
@@ -85,14 +85,14 @@ def test_external_rels(self):
         for filename, value_dict in self.oleids:
             # print('Debugging: testing file {0}'.format(filename))
             self.assertEqual(value_dict['ext_rels'],
-                             '/external_link/' in filename)
+                             os.sep + 'external_link' + os.sep in filename)
 
     def test_objectpool(self):
         """Test indicator for ObjectPool stream in ole files."""
         for filename, value_dict in self.oleids:
             # print('Debugging: testing file {0}'.format(filename))
-            if (filename.startswith('oleobj/sample_with_')
-                        or filename.startswith('oleobj/embedded')) \
+            if (filename.startswith(join('oleobj', 'sample_with_'))
+                        or filename.startswith(join('oleobj', 'embedded'))) \
                     and (filename.endswith('.doc') 
                          or filename.endswith('.dot')):
                 self.assertTrue(value_dict['ObjectPool'])
@@ -101,6 +101,15 @@ def test_objectpool(self):
 
     def test_macros(self):
         """Test indicator for macros."""
+        find_vba = (
+            join('ooxml', 'dde-in-excel2003.xml'),    # not really
+            join('encrypted', 'autostart-encrypt-standardpassword.xls'),
+            join('msodde', 'dde-in-csv.csv'),     # "Windows" "calc.exe"
+            join('msodde', 'dde-in-excel2003.xml'),   # same as above
+            join('oleform', 'oleform-PR314.docm'),
+            join('basic', 'empty'),                   # WTF?
+            join('basic', 'text'),
+        )
         for filename, value_dict in self.oleids:
             # TODO: we need a sample file with xlm macros
             before_dot, suffix = splitext(filename)
@@ -114,13 +123,7 @@ def test_macros(self):
             self.assertIn(value_dict['xlm'], ('Unknown', 'No'))
 
             # "macro detection" in text files leads to interesting results:
-            if filename in ('ooxml/dde-in-excel2003.xml',    # not really
-                            'encrypted/autostart-encrypt-standardpassword.xls',
-                            'msodde/dde-in-csv.csv',     # "Windows" "calc.exe"
-                            'msodde/dde-in-excel2003.xml',   # same as above
-                            'oleform/oleform-PR314.docm',
-                            'basic/empty',                   # WTF?
-                            'basic/text'):                   # no macros!
+            if filename in find_vba:                   # no macros!
                 self.assertEqual(value_dict['vba'], 'Yes')
             else:
                 self.assertEqual(value_dict['vba'], 'No')
diff --git a/tests/oleid/test_issue_166.py b/tests/oleid/test_issue_166.py
index c350c003a..47d57ef02 100644
--- a/tests/oleid/test_issue_166.py
+++ b/tests/oleid/test_issue_166.py
@@ -2,17 +2,17 @@
 Test if oleid detects encrypted documents
 """
 
-import unittest, sys, os
-
-from tests.test_utils import DATA_BASE_DIR
+import unittest
 from os.path import join
+from tests.test_utils import DATA_BASE_DIR
 
 from oletools import oleid
 
+
 class TestEncryptedDocumentDetection(unittest.TestCase):
     def test_encrypted_document_detection(self):
         """ Run oleid and check if the document is flagged as encrypted """
-        filename = join(DATA_BASE_DIR, 'basic/encrypted.docx')
+        filename = join(DATA_BASE_DIR, 'basic', 'encrypted.docx')
 
         oleid_instance = oleid.OleID(filename)
         indicators = oleid_instance.check()
@@ -21,6 +21,7 @@ def test_encrypted_document_detection(self):
 
         self.assertEqual(is_encrypted, True)
 
+
 # just in case somebody calls this file as a script
 if __name__ == '__main__':
     unittest.main()
\ No newline at end of file
diff --git a/tests/rtfobj/test_issue_185.py b/tests/rtfobj/test_issue_185.py
index cbfc97f63..a91b5635e 100644
--- a/tests/rtfobj/test_issue_185.py
+++ b/tests/rtfobj/test_issue_185.py
@@ -1,16 +1,18 @@
-import unittest, sys, os
-
+import unittest
+from os.path import join
 from tests.test_utils import testdata_reader
 from oletools import rtfobj
 
+
 class TestRtfObjIssue185(unittest.TestCase):
     def test_skip_space_after_bin_control_word(self):
-        data = testdata_reader.read_encrypted('rtfobj/issue_185.rtf.zip')
+        data = testdata_reader.read_encrypted(join('rtfobj', 'issue_185.rtf.zip'))
         rtfp = rtfobj.RtfObjParser(data)
         rtfp.parse()
         objects = rtfp.objects
 
         self.assertTrue(len(objects) == 1)
 
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/rtfobj/test_issue_251.py b/tests/rtfobj/test_issue_251.py
index 9968538f9..021a97ede 100644
--- a/tests/rtfobj/test_issue_251.py
+++ b/tests/rtfobj/test_issue_251.py
@@ -1,16 +1,18 @@
-import unittest, sys, os
-
+import unittest
+from os.path import join
 from tests.test_utils import testdata_reader
 from oletools import rtfobj
 
+
 class TestRtfObjIssue251(unittest.TestCase):
     def test_bin_no_param(self):
-        data = testdata_reader.read('rtfobj/issue_251.rtf')
+        data = testdata_reader.read(join('rtfobj', 'issue_251.rtf'))
         rtfp = rtfobj.RtfObjParser(data)
         rtfp.parse()
         objects = rtfp.objects
 
         self.assertTrue(len(objects) == 1)
 
+
 if __name__ == '__main__':
     unittest.main()

From 3c3b70934252c10f1a971b72fbaf58da49460b3e Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Thu, 24 Nov 2022 12:31:47 +0100
Subject: [PATCH 07/23] tests: Deal with warnings in output

Do not assume a known length of output when checking it. Only assume that
meta information about oletool used is first. Ignore warnings and messages
that come after that to identify the actual result we want to check.
---
 tests/olevba/test_basic.py  |  8 ++++++--
 tests/olevba/test_crypto.py | 23 ++++++++++++-----------
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/tests/olevba/test_basic.py b/tests/olevba/test_basic.py
index 5be1269a8..dd917d3a8 100644
--- a/tests/olevba/test_basic.py
+++ b/tests/olevba/test_basic.py
@@ -120,10 +120,14 @@ def test_xlm(self):
                                                  args=[full_name, ] + ADD_ARGS,
                                                  accept_nonzero_exit=True)
             output = json.loads(out_str)
-            self.assertEqual(len(output), 2)
+            self.assertGreaterEqual(len(output), 2)
             self.assertEqual(output[0]['type'], 'MetaInformation')
             self.assertEqual(output[0]['script_name'], 'olevba')
-            result = output[1]
+            for entry in output[1:]:
+                if entry['type'] in ('msg', 'warning'):
+                    continue    # ignore messages
+                result = entry
+                break
             self.assertTrue(result['json_conversion_successful'])
             if suffix in ('.xlsb', '.xltm', '.xlsm'):
                 # TODO: cannot extract xlm macros for these types yet
diff --git a/tests/olevba/test_crypto.py b/tests/olevba/test_crypto.py
index 06000610d..1c7313263 100644
--- a/tests/olevba/test_crypto.py
+++ b/tests/olevba/test_crypto.py
@@ -40,7 +40,7 @@ def test_autostart(self):
                                          exclude_stderr=True)
             data = json.loads(output, object_pairs_hook=OrderedDict)
             # debug: json.dump(data, sys.stdout, indent=4)
-            self.assertIn(len(data), (3, 4))
+            self.assertGreaterEqual(len(data), 3)
 
             # first 2 parts: general info about script and file
             self.assertIn('script_name', data[0])
@@ -53,22 +53,23 @@ def test_autostart(self):
             self.assertEqual(data[1]['type'], 'OLE')
             self.assertTrue(data[1]['json_conversion_successful'])
 
-            # possible VBA stomping warning
-            if len(data) == 4:
-                self.assertEqual(data[2]['type'], 'msg')
-                self.assertIn('VBA stomping', data[2]['msg'])
+            for entry in data[2:]:
+                if entry['type'] in ('msg', 'warning'):
+                    continue
+                result = entry
+                break
 
             # last part is the actual result
-            self.assertEqual(data[-1]['container'], example_file)
-            self.assertNotEqual(data[-1]['file'], example_file)
-            self.assertEqual(data[-1]['type'], "OpenXML")
-            analysis = data[-1]['analysis']
+            self.assertEqual(result['container'], example_file)
+            self.assertNotEqual(result['file'], example_file)
+            self.assertEqual(result['type'], "OpenXML")
+            analysis = result['analysis']
             self.assertEqual(analysis[0]['type'], 'AutoExec')
             self.assertEqual(analysis[0]['keyword'], 'Auto_Open')
-            macros = data[-1]['macros']
+            macros = result['macros']
             self.assertEqual(macros[0]['vba_filename'], 'Modul1.bas')
             self.assertIn('Sub Auto_Open()', macros[0]['code'])
-            self.assertTrue(data[-1]['json_conversion_successful'])
+            self.assertTrue(result['json_conversion_successful'])
 
 
 if __name__ == '__main__':

From 6bc04064530d2138f207be18ba7325863afd46b7 Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Thu, 2 May 2019 17:46:33 +0200
Subject: [PATCH 08/23] tests: Add warnings to log_helper test

Encountered an example where a 3rd-party library issued a warning that
messed up the json output. Create test to reproduce this.
---
 .../log_helper/log_helper_test_imported.py    |  8 ++++++
 .../common/log_helper/log_helper_test_main.py | 13 ++++++++-
 tests/common/log_helper/test_log_helper.py    | 28 +++++++++++++++++++
 3 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/tests/common/log_helper/log_helper_test_imported.py b/tests/common/log_helper/log_helper_test_imported.py
index 1be8181a5..45b41d7c1 100644
--- a/tests/common/log_helper/log_helper_test_imported.py
+++ b/tests/common/log_helper/log_helper_test_imported.py
@@ -4,6 +4,7 @@
 """
 
 from oletools.common.log_helper import log_helper
+import warnings
 
 DEBUG_MESSAGE = 'imported: debug log'
 INFO_MESSAGE = 'imported: info log'
@@ -11,7 +12,10 @@
 ERROR_MESSAGE = 'imported: error log'
 CRITICAL_MESSAGE = 'imported: critical log'
 RESULT_MESSAGE = 'imported: result log'
+
 RESULT_TYPE = 'imported: result'
+ACTUAL_WARNING = 'Feature XYZ provided by this module might be deprecated at '\
+    'some point in the future ... or not'
 
 logger = log_helper.get_or_create_silent_logger('test_imported')
 
@@ -27,3 +31,7 @@ def log():
     logger.error(ERROR_MESSAGE)
     logger.critical(CRITICAL_MESSAGE)
     logger.info(RESULT_MESSAGE, type=RESULT_TYPE)
+
+
+def warn():
+    warnings.warn(ACTUAL_WARNING)
diff --git a/tests/common/log_helper/log_helper_test_main.py b/tests/common/log_helper/log_helper_test_main.py
index c82f9bcf8..cd8cf6baf 100644
--- a/tests/common/log_helper/log_helper_test_main.py
+++ b/tests/common/log_helper/log_helper_test_main.py
@@ -2,6 +2,7 @@
 
 import sys
 import logging
+import warnings
 from tests.common.log_helper import log_helper_test_imported
 from oletools.common.log_helper import log_helper
 
@@ -11,7 +12,9 @@
 ERROR_MESSAGE = 'main: error log'
 CRITICAL_MESSAGE = 'main: critical log'
 RESULT_MESSAGE = 'main: result log'
+
 RESULT_TYPE = 'main: result'
+ACTUAL_WARNING = 'Warnings can pop up anywhere, have to be prepared!'
 
 logger = log_helper.get_or_create_silent_logger('test_main')
 
@@ -24,7 +27,8 @@ def enable_logging():
 
 def main(args):
     """
-    Try to cover possible logging scenarios. For each scenario covered, here's the expected args and outcome:
+    Try to cover possible logging scenarios. For each scenario covered, here's
+    the expected args and outcome:
     - Log without enabling: ['<level>']
         * logging when being imported - should never print
     - Log as JSON without enabling: ['as-json', '<level>']
@@ -35,6 +39,8 @@ def main(args):
         * logging as JSON when being run as script - should log messages as JSON
     - Enable, log as JSON and throw: ['enable', 'as-json', 'throw', '<level>']
         * should produce JSON-compatible output, even after an unhandled exception
+    - Enable, log as JSON and warn: ['enable', 'as-json', 'warn', '<level>']
+        * should produce JSON-compatible output, even after a warning
     """
 
     # the level should always be the last argument passed
@@ -42,6 +48,7 @@ def main(args):
     use_json = 'as-json' in args
     throw = 'throw' in args
     percent_autoformat = '%-autoformat' in args
+    warn = 'warn' in args
 
     log_helper_test_imported.logger.setLevel(logging.ERROR)
 
@@ -53,6 +60,10 @@ def main(args):
     if throw:
         raise Exception('An exception occurred before ending the logging')
 
+    if warn:
+        warnings.warn(ACTUAL_WARNING)
+        log_helper_test_imported.warn()
+
     log_helper.end_logging()
 
 
diff --git a/tests/common/log_helper/test_log_helper.py b/tests/common/log_helper/test_log_helper.py
index b67b47ede..9929f6aa0 100644
--- a/tests/common/log_helper/test_log_helper.py
+++ b/tests/common/log_helper/test_log_helper.py
@@ -142,6 +142,32 @@ def test_import_by_third_party_enabled(self):
         self.assertIn('INFO:test_main:main: info log', output)
         self.assertIn('INFO:test_imported:imported: info log', output)
 
+    def test_json_correct_on_warnings(self):
+        """
+        Test that even on warnings our JSON is always correct
+        """
+        output = self._run_test(['enable', 'as-json', 'warn', 'warning'])
+        expected_messages = [
+            log_helper_test_main.WARNING_MESSAGE,
+            log_helper_test_main.ERROR_MESSAGE,
+            log_helper_test_main.CRITICAL_MESSAGE,
+            log_helper_test_imported.WARNING_MESSAGE,
+            log_helper_test_imported.ERROR_MESSAGE,
+            log_helper_test_imported.CRITICAL_MESSAGE,
+        ]
+
+        for msg in expected_messages:
+            self.assertIn(msg, output)
+
+        # last two entries of output should be warnings
+        jout = json.loads(output)
+        self.assertEqual(jout[-2]['level'], 'WARNING')
+        self.assertEqual(jout[-1]['level'], 'WARNING')
+        self.assertEqual(jout[-2]['type'], 'warning')
+        self.assertEqual(jout[-1]['type'], 'warning')
+        self.assertIn(log_helper_test_main.ACTUAL_WARNING, jout[-2]['msg'])
+        self.assertIn(log_helper_test_imported.ACTUAL_WARNING, jout[-1]['msg'])
+
     def _assert_json_messages(self, output, messages):
         try:
             json_data = json.loads(output)
@@ -163,6 +189,8 @@ def _run_test(self, args, should_succeed=True, run_third_party=False):
         When arg `run_third_party` is `True`, we do not run the `TEST_FILE` as
         main moduel but the `TEST_FILE_3RD_PARTY` and return contents of
         `stderr` instead of `stdout`.
+
+        TODO: use tests.utils.call_and_capture
         """
         all_args = [PYTHON_EXECUTABLE, ]
         if run_third_party:

From 80791d859f38c9e5402bb652e2cc3a1178f62765 Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Fri, 3 May 2019 17:46:33 +0200
Subject: [PATCH 09/23] log_helper: Capture warnings into logging

We set logging.captureWarnings to True and use our logging framework for
python's builtin warnings logger. Warnings from that logger will have
"type=warning" per default (as opposed to "type=msg").
---
 oletools/common/log_helper/_json_formatter.py | 18 ++++++++++++++----
 oletools/common/log_helper/_logger_adapter.py | 12 +++++++++++-
 oletools/common/log_helper/log_helper.py      |  6 ++++++
 3 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/oletools/common/log_helper/_json_formatter.py b/oletools/common/log_helper/_json_formatter.py
index 8e1c6609b..0c109df2d 100644
--- a/oletools/common/log_helper/_json_formatter.py
+++ b/oletools/common/log_helper/_json_formatter.py
@@ -18,12 +18,22 @@ def format(self, record):
         the output JSON-compatible. The only exception is when printing the first line,
         so we need to keep track of it.
 
-        We assume that all input comes from the OletoolsLoggerAdapter which
-        ensures that there is a `type` field in the record. Otherwise will have
-        to add a try-except around the access to `record.type`.
+        The resulting text is just a json dump of the :py:class:`logging.LogRecord`
+        object that is received as input, so no %-formatting or similar is done. Raw
+        unformatted message and formatting arguments are contained in fields `msg` and
+        `args` of the output.
+
+        Arg `record` has a `type` field when created by `OletoolLoggerAdapter`. If not
+        (e.g. captured warnings or output from third-party libraries), we add one.
         """
         json_dict = dict(msg=record.msg.replace('\n', ' '), level=record.levelname)
-        json_dict['type'] = record.type
+        try:
+            json_dict['type'] = record.type
+        except AttributeError:
+            if record.name == 'py.warnings':   # this is the name of the logger
+                json_dict['type'] = 'warning'
+            else:
+                json_dict['type'] = 'msg'
         formatted_message = '    ' + json.dumps(json_dict)
 
         if self._is_first_line:
diff --git a/oletools/common/log_helper/_logger_adapter.py b/oletools/common/log_helper/_logger_adapter.py
index dfc748f89..483144589 100644
--- a/oletools/common/log_helper/_logger_adapter.py
+++ b/oletools/common/log_helper/_logger_adapter.py
@@ -7,6 +7,7 @@ class OletoolsLoggerAdapter(logging.LoggerAdapter):
     Adapter class for all loggers returned by the logging module.
     """
     _json_enabled = None
+    _is_warn_logger = False   # this is always False
 
     def print_str(self, message, **kwargs):
         """
@@ -44,7 +45,10 @@ def process(self, msg, kwargs):
             kwargs['extra']['type'] = kwargs['type']
             del kwargs['type']    # downstream loggers cannot deal with this
         if 'type' not in kwargs['extra']:
-            kwargs['extra']['type'] = 'msg'   # type will be added to LogRecord
+            if self._is_warn_logger:
+                kwargs['extra']['type'] = 'warning'    # this will add field
+            else:
+                kwargs['extra']['type'] = 'msg'        # 'type' to LogRecord
         return msg, kwargs
 
     def set_json_enabled_function(self, json_enabled):
@@ -53,6 +57,12 @@ def set_json_enabled_function(self, json_enabled):
         """
         self._json_enabled = json_enabled
 
+    def set_warnings_logger(self):
+        """Make this the logger for warnings"""
+        # create a object attribute that shadows the class attribute which is
+        # always False
+        self._is_warn_logger = True
+
     def level(self):
         """Return current level of logger."""
         return self.logger.level
diff --git a/oletools/common/log_helper/log_helper.py b/oletools/common/log_helper/log_helper.py
index 9ec9843ae..448f8939d 100644
--- a/oletools/common/log_helper/log_helper.py
+++ b/oletools/common/log_helper/log_helper.py
@@ -152,6 +152,11 @@ def enable_logging(self, use_json=False, level='warning', log_format=DEFAULT_MES
         self._use_json = use_json
         sys.excepthook = self._get_except_hook(sys.excepthook)
 
+        # make sure warnings do not mess up our output
+        logging.captureWarnings(True)
+        warn_logger = self.get_or_create_silent_logger('py.warnings')
+        warn_logger.set_warnings_logger()
+
         # since there could be loggers already created we go through all of them
         # and set their levels to 0 so they will use the root logger's level
         for name in self._all_names:
@@ -174,6 +179,7 @@ def end_logging(self):
 
         # end logging
         self._all_names = set()
+        logging.captureWarnings(False)
         logging.shutdown()
 
         # end json list

From c788ceaf2310d421c68596ceafe3e3f0fc46f859 Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Fri, 3 May 2019 17:58:23 +0200
Subject: [PATCH 10/23] tests: Add another warnings test

---
 tests/common/log_helper/test_log_helper.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tests/common/log_helper/test_log_helper.py b/tests/common/log_helper/test_log_helper.py
index 9929f6aa0..446b432cf 100644
--- a/tests/common/log_helper/test_log_helper.py
+++ b/tests/common/log_helper/test_log_helper.py
@@ -168,6 +168,24 @@ def test_json_correct_on_warnings(self):
         self.assertIn(log_helper_test_main.ACTUAL_WARNING, jout[-2]['msg'])
         self.assertIn(log_helper_test_imported.ACTUAL_WARNING, jout[-1]['msg'])
 
+    def test_warnings(self):
+        """Check that warnings are captured and printed correctly"""
+        output = self._run_test(['enable', 'warn', 'warning'])
+
+        expect = '\n'.join([
+            'WARNING  ' + log_helper_test_main.WARNING_MESSAGE,
+            'ERROR    ' + log_helper_test_main.ERROR_MESSAGE,
+            'CRITICAL ' + log_helper_test_main.CRITICAL_MESSAGE,
+            'WARNING  ' + log_helper_test_imported.WARNING_MESSAGE,
+            'ERROR    ' + log_helper_test_imported.ERROR_MESSAGE,
+            'CRITICAL ' + log_helper_test_imported.CRITICAL_MESSAGE,
+            'WARNING  ' + log_helper_test_main.ACTUAL_WARNING,
+            '  warnings.warn(ACTUAL_WARNING)',   # warnings include source line
+            'WARNING  ' + log_helper_test_imported.ACTUAL_WARNING,
+            '  warnings.warn(ACTUAL_WARNING)',   # warnings include source line
+        ])
+        self.assertEqual(output, expect)
+
     def _assert_json_messages(self, output, messages):
         try:
             json_data = json.loads(output)

From dc8c0a9df2315a6873c40e0d0595123bb4dfb275 Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Wed, 20 Apr 2022 13:03:50 +0200
Subject: [PATCH 11/23] tests: Add source position to warning messages

Warning message includes source file and line number, hope this is not
dependent on python version..
---
 tests/common/log_helper/test_log_helper.py | 30 +++++++++++++++++++---
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/tests/common/log_helper/test_log_helper.py b/tests/common/log_helper/test_log_helper.py
index 446b432cf..aa260377c 100644
--- a/tests/common/log_helper/test_log_helper.py
+++ b/tests/common/log_helper/test_log_helper.py
@@ -172,6 +172,25 @@ def test_warnings(self):
         """Check that warnings are captured and printed correctly"""
         output = self._run_test(['enable', 'warn', 'warning'])
 
+        # find out which line contains the call to warnings.warn:
+        warnings_line = None
+        with open(TEST_FILE, 'rt') as reader:
+            for line_idx, line in enumerate(reader):
+                if 'warnings.warn' in line:
+                    warnings_line = line_idx + 1
+                    break
+        self.assertNotEqual(warnings_line, None)
+
+        imported_file = join(dirname(abspath(__file__)),
+                             'log_helper_test_imported.py')
+        imported_line = None
+        with open(imported_file, 'rt') as reader:
+            for line_idx, line in enumerate(reader):
+                if 'warnings.warn' in line:
+                    imported_line = line_idx + 1
+                    break
+        self.assertNotEqual(imported_line, None)
+
         expect = '\n'.join([
             'WARNING  ' + log_helper_test_main.WARNING_MESSAGE,
             'ERROR    ' + log_helper_test_main.ERROR_MESSAGE,
@@ -179,12 +198,15 @@ def test_warnings(self):
             'WARNING  ' + log_helper_test_imported.WARNING_MESSAGE,
             'ERROR    ' + log_helper_test_imported.ERROR_MESSAGE,
             'CRITICAL ' + log_helper_test_imported.CRITICAL_MESSAGE,
-            'WARNING  ' + log_helper_test_main.ACTUAL_WARNING,
+            'WARNING  {0}:{1}: UserWarning: {2}'
+                .format(TEST_FILE, warnings_line, log_helper_test_main.ACTUAL_WARNING),
             '  warnings.warn(ACTUAL_WARNING)',   # warnings include source line
-            'WARNING  ' + log_helper_test_imported.ACTUAL_WARNING,
+            '',
+            'WARNING  {0}:{1}: UserWarning: {2}'
+                .format(imported_file, imported_line, log_helper_test_imported.ACTUAL_WARNING),
             '  warnings.warn(ACTUAL_WARNING)',   # warnings include source line
         ])
-        self.assertEqual(output, expect)
+        self.assertEqual(output.strip(), expect)
 
     def _assert_json_messages(self, output, messages):
         try:
@@ -205,7 +227,7 @@ def _run_test(self, args, should_succeed=True, run_third_party=False):
         we might get errors or false positives between sequential tests runs)
 
         When arg `run_third_party` is `True`, we do not run the `TEST_FILE` as
-        main moduel but the `TEST_FILE_3RD_PARTY` and return contents of
+        main module but the `TEST_FILE_3RD_PARTY` and return contents of
         `stderr` instead of `stdout`.
 
         TODO: use tests.utils.call_and_capture

From 28c0449c0f9f9431eaec782ad22239d5a19f67d0 Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Fri, 24 Jun 2022 11:01:37 +0200
Subject: [PATCH 12/23] tests: Add 2 tests for json formatting

Need to make sure that json output is formatted since e.g. warning messages
are created like this:
   log.warning('%s', actual_message)
so without proper formatting the message is just "%s".

Also test that exception info is added as usual.
---
 .../common/log_helper/log_helper_test_main.py |  7 ++++++
 tests/common/log_helper/test_log_helper.py    | 22 ++++++++++++++++---
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/tests/common/log_helper/log_helper_test_main.py b/tests/common/log_helper/log_helper_test_main.py
index cd8cf6baf..e69f7c4da 100644
--- a/tests/common/log_helper/log_helper_test_main.py
+++ b/tests/common/log_helper/log_helper_test_main.py
@@ -49,6 +49,7 @@ def main(args):
     throw = 'throw' in args
     percent_autoformat = '%-autoformat' in args
     warn = 'warn' in args
+    exc_info = 'exc-info' in args
 
     log_helper_test_imported.logger.setLevel(logging.ERROR)
 
@@ -64,6 +65,12 @@ def main(args):
         warnings.warn(ACTUAL_WARNING)
         log_helper_test_imported.warn()
 
+    if exc_info:
+        try:
+            raise Exception('This is an exception')
+        except Exception:
+            logger.exception('Caught exception')  # has exc_info=True
+
     log_helper.end_logging()
 
 
diff --git a/tests/common/log_helper/test_log_helper.py b/tests/common/log_helper/test_log_helper.py
index aa260377c..122674322 100644
--- a/tests/common/log_helper/test_log_helper.py
+++ b/tests/common/log_helper/test_log_helper.py
@@ -17,8 +17,7 @@
 from tests.test_utils import PROJECT_ROOT
 
 # test file we use as "main" module
-TEST_FILE = relpath(join(dirname(abspath(__file__)), 'log_helper_test_main.py'),
-                    PROJECT_ROOT)
+TEST_FILE = join(dirname(abspath(__file__)), 'log_helper_test_main.py')
 
 # test file simulating a third party main module that only imports oletools
 TEST_FILE_3RD_PARTY = relpath(join(dirname(abspath(__file__)),
@@ -27,6 +26,8 @@
 
 PYTHON_EXECUTABLE = sys.executable
 
+PERCENT_FORMAT_OUTPUT = 'The answer is 47.'
+
 
 class TestLogHelper(unittest.TestCase):
     def test_it_doesnt_log_when_not_enabled(self):
@@ -114,7 +115,7 @@ def test_logs_type_in_json(self):
     def test_percent_autoformat(self):
         """Test that auto-formatting of log strings with `%` works."""
         output = self._run_test(['enable', '%-autoformat', 'info'])
-        self.assertIn('The answer is 47.', output)
+        self.assertIn(PERCENT_FORMAT_OUTPUT, output)
 
     def test_json_correct_on_exceptions(self):
         """
@@ -208,6 +209,21 @@ def test_warnings(self):
         ])
         self.assertEqual(output.strip(), expect)
 
+    def test_json_percent_formatting(self):
+        """Test that json-output has formatting args included in output."""
+        output = self._run_test(['enable', 'as-json', '%-autoformat', 'info'])
+        json.loads(output)    # check that this does not raise, so json is valid
+        self.assertIn(PERCENT_FORMAT_OUTPUT, output)
+
+    def test_json_exception_formatting(self):
+        """Test that json-output has formatted exception info in output"""
+        output = self._run_test(['enable', 'as-json', 'exc-info', 'info'])
+        json.loads(output)    # check that this does not raise, so json is valid
+        self.assertIn('Caught exception', output)      # actual log message
+        self.assertIn('This is an exception', output)    # message of caught exception
+        self.assertIn('Traceback (most recent call last)', output)    # start of trace
+        self.assertIn(TEST_FILE, output)        # part of trace
+
     def _assert_json_messages(self, output, messages):
         try:
             json_data = json.loads(output)

From 7f31567463290273a48ad11066c78264c91a72f4 Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Fri, 24 Jun 2022 11:04:22 +0200
Subject: [PATCH 13/23] log_helper: Format text in JsonFormatter

Use a standard logging.Formatter to do %-formatting and other stuff (i.e.
the regular formatter job) before converting to Json.
---
 oletools/common/log_helper/_json_formatter.py | 21 ++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/oletools/common/log_helper/_json_formatter.py b/oletools/common/log_helper/_json_formatter.py
index 0c109df2d..1fae7e356 100644
--- a/oletools/common/log_helper/_json_formatter.py
+++ b/oletools/common/log_helper/_json_formatter.py
@@ -5,12 +5,17 @@
 class JsonFormatter(logging.Formatter):
     """
     Format every message to be logged as a JSON object
+
+    Uses the standard :py:class:`logging.Formatter` with standard arguments
+    to do the actual formatting, could save and use a user-supplied formatter
+    instead.
     """
     _is_first_line = True
 
     def __init__(self, other_logger_has_first_line=False):
         if other_logger_has_first_line:
             self._is_first_line = False
+        self.msg_formatter = logging.Formatter()    # could adjust this
 
     def format(self, record):
         """
@@ -18,15 +23,17 @@ def format(self, record):
         the output JSON-compatible. The only exception is when printing the first line,
         so we need to keep track of it.
 
-        The resulting text is just a json dump of the :py:class:`logging.LogRecord`
-        object that is received as input, so no %-formatting or similar is done. Raw
-        unformatted message and formatting arguments are contained in fields `msg` and
-        `args` of the output.
+        The actual conversion from :py:class:`logging.LogRecord` to a text message
+        (i.e. %-formatting, adding exception information, etc.) is delegated to the
+        standard :py:class:`logging.Formatter.
 
-        Arg `record` has a `type` field when created by `OletoolLoggerAdapter`. If not
-        (e.g. captured warnings or output from third-party libraries), we add one.
+        The dumped json structure contains fields `msg` with the formatted message,
+        `level` with the log-level of the message and `type`, which is created by
+        :py:class:`oletools.common.log_helper.OletoolsLoggerAdapter` or added here
+        (for input from e.g. captured warnings, third-party libraries)
         """
-        json_dict = dict(msg=record.msg.replace('\n', ' '), level=record.levelname)
+        msg = self.msg_formatter.format(record)
+        json_dict = dict(msg=msg.replace('\n', ' '), level=record.levelname)
         try:
             json_dict['type'] = record.type
         except AttributeError:

From 58bb3b81e76cfa6f46eaa57e686a7bac625c0f48 Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Fri, 24 Jun 2022 11:31:16 +0200
Subject: [PATCH 14/23] log_helper: Test and improve error-handling

If there is an error in log-handling (e.g. wrong args to logger call) do
not fail or raise but produce something helpful
---
 oletools/common/log_helper/_json_formatter.py | 20 ++++++++++++++-----
 .../common/log_helper/log_helper_test_main.py |  7 +++++++
 tests/common/log_helper/test_log_helper.py    |  6 ++++++
 3 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/oletools/common/log_helper/_json_formatter.py b/oletools/common/log_helper/_json_formatter.py
index 1fae7e356..b1b9beb48 100644
--- a/oletools/common/log_helper/_json_formatter.py
+++ b/oletools/common/log_helper/_json_formatter.py
@@ -32,15 +32,25 @@ def format(self, record):
         :py:class:`oletools.common.log_helper.OletoolsLoggerAdapter` or added here
         (for input from e.g. captured warnings, third-party libraries)
         """
-        msg = self.msg_formatter.format(record)
-        json_dict = dict(msg=msg.replace('\n', ' '), level=record.levelname)
+        json_dict = dict(msg='', level='', type='')
         try:
+            msg = self.msg_formatter.format(record)
+            json_dict['msg'] = msg.replace('\n', ' ')
+            json_dict['level'] = record.levelname
             json_dict['type'] = record.type
-        except AttributeError:
-            if record.name == 'py.warnings':   # this is the name of the logger
+        except AttributeError:     # most probably: record has no "type" field
+            if record.name == 'py.warnings':   # this is from python's warning-capture logger
                 json_dict['type'] = 'warning'
             else:
-                json_dict['type'] = 'msg'
+                json_dict['type'] = 'msg'      # message of unknown origin
+        except Exception as exc:
+            try:
+                json_dict['msg'] = "Ignore {0} when formatting '{1}': {2}".format(type(exc), record.msg, exc)
+            except Exception as exc2:
+                json_dict['msg'] = 'Caught {0} in logging'.format(str(exc2))
+            json_dict['type'] = 'log-warning'
+            json_dict['level'] = 'warning'
+
         formatted_message = '    ' + json.dumps(json_dict)
 
         if self._is_first_line:
diff --git a/tests/common/log_helper/log_helper_test_main.py b/tests/common/log_helper/log_helper_test_main.py
index e69f7c4da..aeba55562 100644
--- a/tests/common/log_helper/log_helper_test_main.py
+++ b/tests/common/log_helper/log_helper_test_main.py
@@ -50,6 +50,7 @@ def main(args):
     percent_autoformat = '%-autoformat' in args
     warn = 'warn' in args
     exc_info = 'exc-info' in args
+    wrong_log_args = 'wrong-log-args' in args
 
     log_helper_test_imported.logger.setLevel(logging.ERROR)
 
@@ -71,6 +72,12 @@ def main(args):
         except Exception:
             logger.exception('Caught exception')  # has exc_info=True
 
+    if wrong_log_args:
+        logger.info('Opening file /dangerous/file/with-%s-in-name')
+        logger.info('The result is %f')
+        logger.info('No result', 1.23)
+        logger.info('The result is %f', 'bla')
+
     log_helper.end_logging()
 
 
diff --git a/tests/common/log_helper/test_log_helper.py b/tests/common/log_helper/test_log_helper.py
index 122674322..4df2f6481 100644
--- a/tests/common/log_helper/test_log_helper.py
+++ b/tests/common/log_helper/test_log_helper.py
@@ -224,6 +224,12 @@ def test_json_exception_formatting(self):
         self.assertIn('Traceback (most recent call last)', output)    # start of trace
         self.assertIn(TEST_FILE, output)        # part of trace
 
+    def test_json_wrong_args(self):
+        """Test that too many or missing args do not raise exceptions inside logger"""
+        output = self._run_test(['enable', 'as-json', 'wrong-log-args', 'info'])
+        json.loads(output)    # check that this does not raise, so json is valid
+        # do not care about actual contents of output
+
     def _assert_json_messages(self, output, messages):
         try:
             json_data = json.loads(output)

From ea2a7fbe937bc6d7a49a739e5cf940314cb37f26 Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Thu, 24 Nov 2022 12:38:30 +0100
Subject: [PATCH 15/23] tests: Fix log-helper test on windows

---
 tests/common/log_helper/test_log_helper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/common/log_helper/test_log_helper.py b/tests/common/log_helper/test_log_helper.py
index 4df2f6481..5d2131bd2 100644
--- a/tests/common/log_helper/test_log_helper.py
+++ b/tests/common/log_helper/test_log_helper.py
@@ -222,7 +222,7 @@ def test_json_exception_formatting(self):
         self.assertIn('Caught exception', output)      # actual log message
         self.assertIn('This is an exception', output)    # message of caught exception
         self.assertIn('Traceback (most recent call last)', output)    # start of trace
-        self.assertIn(TEST_FILE, output)        # part of trace
+        self.assertIn(TEST_FILE.replace('\\', '\\\\'), output)        # part of trace
 
     def test_json_wrong_args(self):
         """Test that too many or missing args do not raise exceptions inside logger"""

From 3af250c3ff7b2e521fb28a89b01db19af3ae3d09 Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Thu, 19 Jan 2023 11:07:16 +0100
Subject: [PATCH 16/23] Run only latest py2, py3 and pypy3

Availability of python versions in cached runner images is subject to
change and specification in
https://raw.githubusercontent.com/actions/python-versions/main/versions-manifest.json
did not help
---
 .github/workflows/unittests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
index d7a070263..2fcd7f427 100644
--- a/.github/workflows/unittests.yml
+++ b/.github/workflows/unittests.yml
@@ -12,7 +12,7 @@ jobs:
     strategy:
       matrix:
         os: ["ubuntu-latest", "windows-latest", "macos-latest"]
-        python-version: ["2.7", "3.5", "3.x", "pypy-3.9"]
+        python-version: ["2.x", "3.x", "pypy-3.9"]
 
     steps:
       - uses: actions/checkout@v3

From f309ee6819d1479a41c9114d93029be55c7c383a Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Thu, 19 Jan 2023 10:31:04 +0100
Subject: [PATCH 17/23] Update silencing of wrong pylint warnings

Pylint is just wrong in a few cases (admittedly: those are hard), so can
safely ignore these warnings.

Also ignore errors in thirdparty, this is not really our code.

Also remove a few old "pylint silencers" ("# pylint: disable=...") that
are not used any more in v3.10 of pylint
---
 oletools/common/io_encoding.py   |  2 +-
 oletools/mraptor_milter.py       |  8 +--
 oletools/msodde.py               | 92 ++++++++++++++++----------------
 oletools/oleobj.py               |  2 +-
 oletools/olevba.py               |  2 +-
 oletools/ooxml.py                |  2 +-
 oletools/ppt_parser.py           | 14 ++---
 oletools/rtfobj.py               |  2 +-
 oletools/xls_parser.py           | 80 +++++++++++++--------------
 tests/ooxml/test_zip_sub_file.py |  4 +-
 10 files changed, 104 insertions(+), 104 deletions(-)

diff --git a/oletools/common/io_encoding.py b/oletools/common/io_encoding.py
index b32d82d22..0bf4db239 100644
--- a/oletools/common/io_encoding.py
+++ b/oletools/common/io_encoding.py
@@ -58,7 +58,7 @@
 if PY3:
     from builtins import open as builtin_open
 else:
-    from __builtin__ import open as builtin_open
+    from __builtin__ import open as builtin_open    # pylint: disable=import-error
 
 # -- CONSTANTS ----------------------------------------------------------------
 #: encoding to use for redirection if no good encoding can be found
diff --git a/oletools/mraptor_milter.py b/oletools/mraptor_milter.py
index eaf01f6ec..36d38e189 100644
--- a/oletools/mraptor_milter.py
+++ b/oletools/mraptor_milter.py
@@ -69,7 +69,7 @@
 
 # --- IMPORTS ----------------------------------------------------------------
 
-import Milter
+import Milter         # not part of requirements, therefore: # pylint: disable=import-error
 import io
 import time
 import email
@@ -78,7 +78,7 @@
 import logging
 import logging.handlers
 import datetime
-import StringIO
+import StringIO      # not part of requirements, therefore: # pylint: disable=import-error
 
 from socket import AF_INET6
 
@@ -96,7 +96,7 @@
 
 from oletools import olevba, mraptor
 
-from Milter.utils import parse_addr
+from Milter.utils import parse_addr  # not part of requirements, therefore: # pylint: disable=import-error
 
 from zipfile import is_zipfile
 
@@ -389,7 +389,7 @@ def main():
 
     # Using daemonize:
     # See http://daemonize.readthedocs.io/en/latest/
-    from daemonize import Daemonize
+    from daemonize import Daemonize    # not part of requirements, therefore: # pylint: disable=import-error
     daemon = Daemonize(app="mraptor_milter", pid=PIDFILE, action=main)
     daemon.start()
 
diff --git a/oletools/msodde.py b/oletools/msodde.py
index 303d97476..49fa42c4c 100644
--- a/oletools/msodde.py
+++ b/oletools/msodde.py
@@ -149,69 +149,69 @@
 #          switches_with_args, switches_without_args, format_switches)
 FIELD_BLACKLIST = (
     # date and time:
-    ('CREATEDATE', 0, 0, '', 'hs',  'datetime'),                 # pylint: disable=bad-whitespace
-    ('DATE',       0, 0, '', 'hls', 'datetime'),                 # pylint: disable=bad-whitespace
-    ('EDITTIME',   0, 0, '', '',    'numeric'),                  # pylint: disable=bad-whitespace
-    ('PRINTDATE',  0, 0, '', 'hs',  'datetime'),                 # pylint: disable=bad-whitespace
-    ('SAVEDATE',   0, 0, '', 'hs',  'datetime'),                 # pylint: disable=bad-whitespace
-    ('TIME',       0, 0, '', '',    'datetime'),                 # pylint: disable=bad-whitespace
+    ('CREATEDATE', 0, 0, '', 'hs',  'datetime'),
+    ('DATE',       0, 0, '', 'hls', 'datetime'),
+    ('EDITTIME',   0, 0, '', '',    'numeric'),
+    ('PRINTDATE',  0, 0, '', 'hs',  'datetime'),
+    ('SAVEDATE',   0, 0, '', 'hs',  'datetime'),
+    ('TIME',       0, 0, '', '',    'datetime'),
     # exclude document automation (we hate the "auto" in "automation")
     # (COMPARE, DOCVARIABLE, GOTOBUTTON, IF, MACROBUTTON, PRINT)
     # document information
-    ('AUTHOR',      0, 1, '', '',   'string'),                   # pylint: disable=bad-whitespace
-    ('COMMENTS',    0, 1, '', '',   'string'),                   # pylint: disable=bad-whitespace
-    ('DOCPROPERTY', 1, 0, '', '',   'string/numeric/datetime'),  # pylint: disable=bad-whitespace
-    ('FILENAME',    0, 0, '', 'p',  'string'),                   # pylint: disable=bad-whitespace
-    ('FILESIZE',    0, 0, '', 'km', 'numeric'),                  # pylint: disable=bad-whitespace
-    ('KEYWORDS',    0, 1, '', '',   'string'),                   # pylint: disable=bad-whitespace
-    ('LASTSAVEDBY', 0, 0, '', '',   'string'),                   # pylint: disable=bad-whitespace
-    ('NUMCHARS',    0, 0, '', '',   'numeric'),                  # pylint: disable=bad-whitespace
-    ('NUMPAGES',    0, 0, '', '',   'numeric'),                  # pylint: disable=bad-whitespace
-    ('NUMWORDS',    0, 0, '', '',   'numeric'),                  # pylint: disable=bad-whitespace
-    ('SUBJECT',     0, 1, '', '',   'string'),                   # pylint: disable=bad-whitespace
-    ('TEMPLATE',    0, 0, '', 'p',  'string'),                   # pylint: disable=bad-whitespace
-    ('TITLE',       0, 1, '', '',   'string'),                   # pylint: disable=bad-whitespace
+    ('AUTHOR',      0, 1, '', '',   'string'),
+    ('COMMENTS',    0, 1, '', '',   'string'),
+    ('DOCPROPERTY', 1, 0, '', '',   'string/numeric/datetime'),
+    ('FILENAME',    0, 0, '', 'p',  'string'),
+    ('FILESIZE',    0, 0, '', 'km', 'numeric'),
+    ('KEYWORDS',    0, 1, '', '',   'string'),
+    ('LASTSAVEDBY', 0, 0, '', '',   'string'),
+    ('NUMCHARS',    0, 0, '', '',   'numeric'),
+    ('NUMPAGES',    0, 0, '', '',   'numeric'),
+    ('NUMWORDS',    0, 0, '', '',   'numeric'),
+    ('SUBJECT',     0, 1, '', '',   'string'),
+    ('TEMPLATE',    0, 0, '', 'p',  'string'),
+    ('TITLE',       0, 1, '', '',   'string'),
     # equations and formulas
     # exlude '=' formulae because they have different syntax (and can be bad)
-    ('ADVANCE', 0, 0, 'dlruxy', '', ''),                         # pylint: disable=bad-whitespace
-    ('SYMBOL',  1, 0, 'fs', 'ahju', ''),                         # pylint: disable=bad-whitespace
+    ('ADVANCE', 0, 0, 'dlruxy', '', ''),
+    ('SYMBOL',  1, 0, 'fs', 'ahju', ''),
     # form fields
-    ('FORMCHECKBOX', 0, 0, '', '', ''),                          # pylint: disable=bad-whitespace
-    ('FORMDROPDOWN', 0, 0, '', '', ''),                          # pylint: disable=bad-whitespace
-    ('FORMTEXT', 0, 0, '', '', ''),                              # pylint: disable=bad-whitespace
+    ('FORMCHECKBOX', 0, 0, '', '', ''),
+    ('FORMDROPDOWN', 0, 0, '', '', ''),
+    ('FORMTEXT', 0, 0, '', '', ''),
     # index and tables
-    ('INDEX', 0, 0, 'bcdefghklpsz', 'ry', ''),                   # pylint: disable=bad-whitespace
+    ('INDEX', 0, 0, 'bcdefghklpsz', 'ry', ''),
     # exlude RD since that imports data from other files
-    ('TA',  0, 0, 'clrs', 'bi', ''),                             # pylint: disable=bad-whitespace
-    ('TC',  1, 0, 'fl', 'n', ''),                                # pylint: disable=bad-whitespace
-    ('TOA', 0, 0, 'bcdegls', 'fhp', ''),                         # pylint: disable=bad-whitespace
-    ('TOC', 0, 0, 'abcdflnopst', 'huwxz', ''),                   # pylint: disable=bad-whitespace
-    ('XE',  1, 0, 'frty', 'bi', ''),                             # pylint: disable=bad-whitespace
+    ('TA',  0, 0, 'clrs', 'bi', ''),
+    ('TC',  1, 0, 'fl', 'n', ''),
+    ('TOA', 0, 0, 'bcdegls', 'fhp', ''),
+    ('TOC', 0, 0, 'abcdflnopst', 'huwxz', ''),
+    ('XE',  1, 0, 'frty', 'bi', ''),
     # links and references
     # exclude AUTOTEXT and AUTOTEXTLIST since we do not like stuff with 'AUTO'
-    ('BIBLIOGRAPHY', 0, 0, 'lfm', '', ''),                       # pylint: disable=bad-whitespace
-    ('CITATION', 1, 0, 'lfspvm', 'nty', ''),                     # pylint: disable=bad-whitespace
+    ('BIBLIOGRAPHY', 0, 0, 'lfm', '', ''),
+    ('CITATION', 1, 0, 'lfspvm', 'nty', ''),
     # exclude HYPERLINK since we are allergic to URLs
     # exclude INCLUDEPICTURE and INCLUDETEXT (other file or maybe even URL?)
     # exclude LINK and REF (could reference other files)
-    ('NOTEREF', 1, 0, '', 'fhp', ''),                            # pylint: disable=bad-whitespace
-    ('PAGEREF', 1, 0, '', 'hp', ''),                             # pylint: disable=bad-whitespace
-    ('QUOTE', 1, 0, '', '', 'datetime'),                         # pylint: disable=bad-whitespace
-    ('STYLEREF', 1, 0, '', 'lnprtw', ''),                        # pylint: disable=bad-whitespace
+    ('NOTEREF', 1, 0, '', 'fhp', ''),
+    ('PAGEREF', 1, 0, '', 'hp', ''),
+    ('QUOTE', 1, 0, '', '', 'datetime'),
+    ('STYLEREF', 1, 0, '', 'lnprtw', ''),
     # exclude all Mail Merge commands since they import data from other files
     # (ADDRESSBLOCK, ASK, COMPARE, DATABASE, FILLIN, GREETINGLINE, IF,
     #  MERGEFIELD, MERGEREC, MERGESEQ, NEXT, NEXTIF, SET, SKIPIF)
     # Numbering
-    ('LISTNUM',      0, 1, 'ls', '', ''),                        # pylint: disable=bad-whitespace
-    ('PAGE',         0, 0, '', '', 'numeric'),                   # pylint: disable=bad-whitespace
-    ('REVNUM',       0, 0, '', '', ''),                          # pylint: disable=bad-whitespace
-    ('SECTION',      0, 0, '', '', 'numeric'),                   # pylint: disable=bad-whitespace
-    ('SECTIONPAGES', 0, 0, '', '', 'numeric'),                   # pylint: disable=bad-whitespace
-    ('SEQ',          1, 1, 'rs', 'chn', 'numeric'),              # pylint: disable=bad-whitespace
-    # user information                                           # pylint: disable=bad-whitespace
-    ('USERADDRESS', 0, 1, '', '', 'string'),                     # pylint: disable=bad-whitespace
-    ('USERINITIALS', 0, 1, '', '', 'string'),                    # pylint: disable=bad-whitespace
-    ('USERNAME', 0, 1, '', '', 'string'),                        # pylint: disable=bad-whitespace
+    ('LISTNUM',      0, 1, 'ls', '', ''),
+    ('PAGE',         0, 0, '', '', 'numeric'),
+    ('REVNUM',       0, 0, '', '', ''),
+    ('SECTION',      0, 0, '', '', 'numeric'),
+    ('SECTIONPAGES', 0, 0, '', '', 'numeric'),
+    ('SEQ',          1, 1, 'rs', 'chn', 'numeric'),
+    # user information
+    ('USERADDRESS', 0, 1, '', '', 'string'),
+    ('USERINITIALS', 0, 1, '', '', 'string'),
+    ('USERNAME', 0, 1, '', '', 'string'),
 )
 
 FIELD_DDE_REGEX = re.compile(r'^\s*dde(auto)?\s+', re.I)
diff --git a/oletools/oleobj.py b/oletools/oleobj.py
index eef96b660..f31010bf8 100644
--- a/oletools/oleobj.py
+++ b/oletools/oleobj.py
@@ -180,7 +180,7 @@ def enable_logging():
     NULL_CHAR = '\x00'
 else:
     # Python 3.x
-    NULL_CHAR = 0     # pylint: disable=redefined-variable-type
+    NULL_CHAR = 0
     xrange = range    # pylint: disable=redefined-builtin, invalid-name
 
 OOXML_RELATIONSHIP_TAG = '{http://schemas.openxmlformats.org/package/2006/relationships}Relationship'
diff --git a/oletools/olevba.py b/oletools/olevba.py
index 52ffd5126..00f1eb93f 100644
--- a/oletools/olevba.py
+++ b/oletools/olevba.py
@@ -3087,7 +3087,7 @@ def open_ppt(self):
         log.info('Check whether OLE file is PPT')
         try:
             ppt = ppt_parser.PptParser(self.ole_file, fast_fail=True)
-            for vba_data in ppt.iter_vba_data():
+            for vba_data in ppt.iter_vba_data():    # pylint: disable=no-value-for-parameter
                 self.append_subfile(None, vba_data, container='PptParser')
             log.info('File is PPT')
             self.ole_file.close()  # just in case
diff --git a/oletools/ooxml.py b/oletools/ooxml.py
index 57fd16fd6..9522b98d7 100644
--- a/oletools/ooxml.py
+++ b/oletools/ooxml.py
@@ -160,7 +160,7 @@ def debug_str(elem):
 def isstr(some_var):
     """ version-independent test for isinstance(some_var, (str, unicode)) """
     if sys.version_info.major == 2:
-        return isinstance(some_var, basestring)  # true for str and unicode
+        return isinstance(some_var, basestring)  # true for str and unicode   # pylint: disable=undefined-variable
     return isinstance(some_var, str)         # there is no unicode
 
 
diff --git a/oletools/ppt_parser.py b/oletools/ppt_parser.py
index 93b75a4bb..0ba797431 100644
--- a/oletools/ppt_parser.py
+++ b/oletools/ppt_parser.py
@@ -1377,7 +1377,7 @@ def parse_document_persist_object(self, stream):
         # first identified in step 3 of Part 1, that is, the UserEditAtom
         # record closest to the end of the stream.
         if self.persist_object_directory is None:
-            self.parse_persist_object_directory()
+            self.parse_persist_object_directory()    # pylint: disable=no-value-for-parameter
 
         # Step 2: Lookup the value of the docPersistIdRef field in the persist
         # object directory constructed in step 8 of Part 1 to find the stream
@@ -1462,7 +1462,7 @@ def search_vba_info(self, stream):
                                 rec_len=VBAInfoAtom.RECORD_LENGTH)
 
         # try parse
-        for idx in self.search_pattern(pattern):
+        for idx in self.search_pattern(pattern):    # pylint: disable=no-value-for-parameter
             # assume that in stream at idx there is a VBAInfoContainer
             stream.seek(idx)
             log.debug('extracting at idx {0}'.format(idx))
@@ -1515,7 +1515,7 @@ def search_vba_storage(self, stream):
             pattern = obj_type.generate_pattern()
 
             # try parse
-            for idx in self.search_pattern(pattern):
+            for idx in self.search_pattern(pattern):    # pylint: disable=no-value-for-parameter
                 # assume a ExternalObjectStorage in stream at idx
                 stream.seek(idx)
                 log.debug('extracting at idx {0}'.format(idx))
@@ -1589,7 +1589,7 @@ def iter_vba_data(self, stream):
 
         n_infos = 0
         n_macros = 0
-        for info in self.search_vba_info():
+        for info in self.search_vba_info(stream):
             n_infos += 1
             if info.vba_info_atom.f_has_macros > 0:
                 n_macros += 1
@@ -1597,13 +1597,13 @@ def iter_vba_data(self, stream):
         #       --> no vba-info, so all storages probably ActiveX or other OLE
         n_storages = 0
         n_compressed = 0
-        for storage in self.search_vba_storage():
+        for storage in self.search_vba_storage():    # pylint: disable=no-value-for-parameter
             n_storages += 1
             if storage.is_compressed:
                 n_compressed += 1
-                yield self.decompress_vba_storage(storage)
+                yield self.decompress_vba_storage(storage)    # pylint: disable=no-value-for-parameter
             else:
-                yield self.read_vba_storage_data(storage)
+                yield self.read_vba_storage_data(storage)    # pylint: disable=no-value-for-parameter
 
         log.info('found {0} infos ({1} with macros) and {2} storages '
                  '({3} compressed)'
diff --git a/oletools/rtfobj.py b/oletools/rtfobj.py
index f0b4e654e..c8d203326 100644
--- a/oletools/rtfobj.py
+++ b/oletools/rtfobj.py
@@ -337,7 +337,7 @@ def get_logger(name, level=logging.CRITICAL+1):
     BACKSLASH = '\\'
     BRACE_OPEN = '{'
     BRACE_CLOSE = '}'
-    UNICODE_TYPE = unicode
+    UNICODE_TYPE = unicode   # pylint: disable=undefined-variable
 else:
     # Python 3.x - Integers
     BACKSLASH = ord('\\')
diff --git a/oletools/xls_parser.py b/oletools/xls_parser.py
index 2f0bdad42..7abb96fe4 100644
--- a/oletools/xls_parser.py
+++ b/oletools/xls_parser.py
@@ -229,46 +229,46 @@ def record_class_for_type(cls, rec_type):
 
 # records that appear often but do not need their own XlsRecord subclass (yet)
 FREQUENT_RECORDS = dict([
-    ( 156, 'BuiltInFnGroupCount'),             # pylint: disable=bad-whitespace
-    (2147, 'BookExt'),                         # pylint: disable=bad-whitespace
-    ( 442, 'CodeName'),                        # pylint: disable=bad-whitespace
-    (  66, 'CodePage'),                        # pylint: disable=bad-whitespace
-    (4195, 'Dat'),                             # pylint: disable=bad-whitespace
-    (2154, 'DataLabExt'),                      # pylint: disable=bad-whitespace
-    (2155, 'DataLabExtContents'),              # pylint: disable=bad-whitespace
-    ( 215, 'DBCell'),                          # pylint: disable=bad-whitespace
-    ( 220, 'DbOrParmQry'),                     # pylint: disable=bad-whitespace
-    (2051, 'DBQueryExt'),                      # pylint: disable=bad-whitespace
-    (2166, 'DConn'),                           # pylint: disable=bad-whitespace
-    (  35, 'ExternName'),                      # pylint: disable=bad-whitespace
-    (  23, 'ExternSheet'),                     # pylint: disable=bad-whitespace
-    ( 255, 'ExtSST'),                          # pylint: disable=bad-whitespace
-    (2052, 'ExtString'),                       # pylint: disable=bad-whitespace
-    (2151, 'FeatHdr'),                         # pylint: disable=bad-whitespace
-    (  91, 'FileSharing'),                     # pylint: disable=bad-whitespace
-    (1054, 'Format'),                          # pylint: disable=bad-whitespace
-    (  49, 'Font'),                            # pylint: disable=bad-whitespace
-    (2199, 'GUIDTypeLib'),                     # pylint: disable=bad-whitespace
-    ( 440, 'HLink'),                           # pylint: disable=bad-whitespace
-    ( 225, 'InterfaceHdr'),                    # pylint: disable=bad-whitespace
-    ( 226, 'InterfaceEnd'),                    # pylint: disable=bad-whitespace
-    ( 523, 'Index'),                           # pylint: disable=bad-whitespace
-    (  24, 'Lbl'),                             # pylint: disable=bad-whitespace
-    ( 193, 'Mms'),                             # pylint: disable=bad-whitespace
-    (  93, 'Obj'),                             # pylint: disable=bad-whitespace
-    (4135, 'ObjectLink'),                      # pylint: disable=bad-whitespace
-    (2058, 'OleDbConn'),                       # pylint: disable=bad-whitespace
-    ( 222, 'OleObjectSize'),                   # pylint: disable=bad-whitespace
-    (2214, 'RichTextStream'),                  # pylint: disable=bad-whitespace
-    (2146, 'SheetExt'),                        # pylint: disable=bad-whitespace
-    (1212, 'ShrFmla'),                         # pylint: disable=bad-whitespace
-    (2060, 'SxViewExt'),                       # pylint: disable=bad-whitespace
-    (2136, 'SxViewLink'),                      # pylint: disable=bad-whitespace
-    (2049, 'WebPub'),                          # pylint: disable=bad-whitespace
-    ( 224, 'XF (formatting)'),                 # pylint: disable=bad-whitespace
-    (2173, 'XFExt (formatting)'),              # pylint: disable=bad-whitespace
-    ( 659, 'Style'),                           # pylint: disable=bad-whitespace
-    (2194, 'StyleExt')                         # pylint: disable=bad-whitespace
+    ( 156, 'BuiltInFnGroupCount'),
+    (2147, 'BookExt'),
+    ( 442, 'CodeName'),
+    (  66, 'CodePage'),
+    (4195, 'Dat'),
+    (2154, 'DataLabExt'),
+    (2155, 'DataLabExtContents'),
+    ( 215, 'DBCell'),
+    ( 220, 'DbOrParmQry'),
+    (2051, 'DBQueryExt'),
+    (2166, 'DConn'),
+    (  35, 'ExternName'),
+    (  23, 'ExternSheet'),
+    ( 255, 'ExtSST'),
+    (2052, 'ExtString'),
+    (2151, 'FeatHdr'),
+    (  91, 'FileSharing'),
+    (1054, 'Format'),
+    (  49, 'Font'),
+    (2199, 'GUIDTypeLib'),
+    ( 440, 'HLink'),
+    ( 225, 'InterfaceHdr'),
+    ( 226, 'InterfaceEnd'),
+    ( 523, 'Index'),
+    (  24, 'Lbl'),
+    ( 193, 'Mms'),
+    (  93, 'Obj'),
+    (4135, 'ObjectLink'),
+    (2058, 'OleDbConn'),
+    ( 222, 'OleObjectSize'),
+    (2214, 'RichTextStream'),
+    (2146, 'SheetExt'),
+    (1212, 'ShrFmla'),
+    (2060, 'SxViewExt'),
+    (2136, 'SxViewLink'),
+    (2049, 'WebPub'),
+    ( 224, 'XF (formatting)'),
+    (2173, 'XFExt (formatting)'),
+    ( 659, 'Style'),
+    (2194, 'StyleExt')
 ])
 
 #: records found in xlsb binary parts
diff --git a/tests/ooxml/test_zip_sub_file.py b/tests/ooxml/test_zip_sub_file.py
index 6e6085b21..5998fbefc 100644
--- a/tests/ooxml/test_zip_sub_file.py
+++ b/tests/ooxml/test_zip_sub_file.py
@@ -111,8 +111,8 @@ def test_seek_forward(self):
         self.assertEqual(self.subfile.tell(), self.compare.tell())
 
         # seek backward (only implemented case: back to start)
-        self.subfile.seek(-self.subfile.tell(), os.SEEK_CUR)
-        self.compare.seek(-self.compare.tell(), os.SEEK_CUR)
+        self.subfile.seek(-1 * self.subfile.tell(), os.SEEK_CUR)
+        self.compare.seek(-1 * self.compare.tell(), os.SEEK_CUR)
         self.assertEqual(self.subfile.read(1), self.compare.read(1))
         self.assertEqual(self.subfile.tell(), self.compare.tell())
 

From 34d8310beafea8a98ed0d3511f2d34ed671e0597 Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Thu, 19 Jan 2023 11:56:14 +0100
Subject: [PATCH 18/23] Fix unittests for py2

---
 tests/oleid/test_basic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/oleid/test_basic.py b/tests/oleid/test_basic.py
index 381680c8f..3adefa6b9 100644
--- a/tests/oleid/test_basic.py
+++ b/tests/oleid/test_basic.py
@@ -126,7 +126,7 @@ def test_macros(self):
             if filename in find_vba:                   # no macros!
                 self.assertEqual(value_dict['vba'], 'Yes')
             else:
-                self.assertEqual(value_dict['vba'], 'No')
+                self.assertIn(value_dict['vba'], ('No', 'Error'))
 
     def test_flash(self):
         """Test indicator for flash."""

From 6ae89c9aefe140eb404cf1a17d508a0b093cc7c4 Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Thu, 19 Jan 2023 10:39:36 +0100
Subject: [PATCH 19/23] Run pylint automatically when building with py3

I do not see how I could keep the code clean for both for pylint2 and
pylint3 at the same time, so do not even try.
---
 .github/workflows/unittests.yml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
index 2fcd7f427..fff771fe3 100644
--- a/.github/workflows/unittests.yml
+++ b/.github/workflows/unittests.yml
@@ -13,6 +13,9 @@ jobs:
       matrix:
         os: ["ubuntu-latest", "windows-latest", "macos-latest"]
         python-version: ["2.x", "3.x", "pypy-3.9"]
+        include:
+          - python-version: 3.x
+            runlint: 1
 
     steps:
       - uses: actions/checkout@v3
@@ -26,5 +29,9 @@ jobs:
           cat requirements.txt
           python -m pip install --upgrade pip
           pip install -r requirements.txt
+          pip install pylint
+      - name: Run pylint
+        if: ${{ matrix.runlint }}
+        run: pylint -E --ignore=thirdparty oletools tests
       - name: Run unittests
         run: python -m unittest discover -f
\ No newline at end of file

From ca27b066a5eab4bb61bde07e7734432d2135db22 Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Thu, 19 Jan 2023 13:28:51 +0100
Subject: [PATCH 20/23] Disable some unittests for PyPy on Windows

Decrypting test samples "on the fly" using a generator causes trouble when
removing the temp file, PyPy/Windows thinks the file is still being used.
---
 tests/msodde/test_basic.py          | 18 ++++++++++++++++++
 tests/test_utils/testdata_reader.py |  3 +++
 2 files changed, 21 insertions(+)

diff --git a/tests/msodde/test_basic.py b/tests/msodde/test_basic.py
index 2b54fbe90..c75dc2c95 100644
--- a/tests/msodde/test_basic.py
+++ b/tests/msodde/test_basic.py
@@ -9,6 +9,7 @@
 from __future__ import print_function
 
 import unittest
+from platform import python_implementation
 import sys
 import os
 from os.path import join, basename
@@ -19,8 +20,21 @@
     DATA_BASE_DIR as BASE_DIR
 
 
+# Check whether we run with PyPy on windows because that causes trouble
+# when using the :py:func:`tests.test_utils.decrypt_sample`.
+#
+# :return: `(do_skip, explanation)` where `do_skip` is `True` iff running
+# PyPy on Windows; `explanation` is a simple text string
+SKIP_PYPY_WIN = (
+    python_implementation().lower().startswith('pypy')
+            and sys.platform.lower().startswith('win'),
+    "On PyPy there is a problem with deleting temp files for decrypt_sample"
+)
+
+
 class TestReturnCode(unittest.TestCase):
     """ check return codes and exception behaviour (not text output) """
+    @unittest.skipIf(*SKIP_PYPY_WIN)
     def test_valid_doc(self):
         """ check that a valid doc file leads to 0 exit status """
         for filename in (
@@ -44,6 +58,7 @@ def test_valid_docm(self):
             self.do_test_validity(join(BASE_DIR, 'msodde',
                                        filename + '.docm'))
 
+    @unittest.skipIf(*SKIP_PYPY_WIN)
     def test_valid_xml(self):
         """ check that xml leads to 0 exit status """
         for filename in (
@@ -140,6 +155,7 @@ def get_dde_from_output(output):
         """
         return [o for o in output.splitlines()]
 
+    @unittest.skipIf(*SKIP_PYPY_WIN)
     def test_with_dde(self):
         """ check that dde links appear on stdout """
         filename = 'dde-test-from-office2003.doc.zip'
@@ -158,6 +174,7 @@ def test_no_dde(self):
         self.assertEqual(len(self.get_dde_from_output(output)), 0,
                          msg='Found dde links in output of ' + filename)
 
+    @unittest.skipIf(*SKIP_PYPY_WIN)
     def test_with_dde_utf16le(self):
         """ check that dde links appear on stdout """
         filename = 'dde-test-from-office2013-utf_16le-korean.doc.zip'
@@ -179,6 +196,7 @@ def test_excel(self):
                              msg='unexpected output for dde-test.{0}: {1}'
                                  .format(extn, output))
 
+    @unittest.skipIf(*SKIP_PYPY_WIN)
     def test_xml(self):
         """ check that dde in xml from word / excel is found """
         for filename in ('dde-in-excel2003.xml',
diff --git a/tests/test_utils/testdata_reader.py b/tests/test_utils/testdata_reader.py
index 5f1a6baad..d6757ed99 100644
--- a/tests/test_utils/testdata_reader.py
+++ b/tests/test_utils/testdata_reader.py
@@ -100,6 +100,9 @@ def decrypt_sample(relpath):
 
     Code based on test_encoding_handler.temp_file().
 
+    Note: this causes problems if running with PyPy on Windows. The `unlink`
+          fails because the file is "still being used by another process".
+
     :param relpath: path inside `DATA_BASE_DIR`, should end in '.zip'
     :return: absolute path name to decrypted sample.
     """

From eb8e5112d137a12e93e500c592184a74594e825d Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Thu, 19 Jan 2023 13:36:51 +0100
Subject: [PATCH 21/23] Give unittest-runner a more fitting name

---
 .github/workflows/unittests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
index fff771fe3..b63413290 100644
--- a/.github/workflows/unittests.yml
+++ b/.github/workflows/unittests.yml
@@ -7,7 +7,7 @@ on:
     branches: [master]
 
 jobs:
-  build:
+  check:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:

From e9d25430ad5a47740ee6721ed9c9efeebd760c00 Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Thu, 19 Jan 2023 14:44:53 +0100
Subject: [PATCH 22/23] Warn contributors about CI checks

---
 oletools/doc/Contribute.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/oletools/doc/Contribute.md b/oletools/doc/Contribute.md
index 614d537ad..d5f280efe 100644
--- a/oletools/doc/Contribute.md
+++ b/oletools/doc/Contribute.md
@@ -13,6 +13,8 @@ to **send feedback**.
 
 The code is available in [a repository on GitHub](https://github.com/decalage2/oletools).
 You may use it to **submit enhancements** using forks and pull requests.
+When submitting a PR, GitHub will automatically check that unittests pass and
+`pylint -E` does not report anything for the code files you changed.
 
 --------------------------------------------------------------------------
 

From f93f527ee74e58749ef8f872a236aa60b45839dd Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Fri, 22 Dec 2023 13:14:01 +0100
Subject: [PATCH 23/23] Remove py2 from CI tests

This was removed by github in June 2023
---
 .github/workflows/unittests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
index b63413290..efa391a5c 100644
--- a/.github/workflows/unittests.yml
+++ b/.github/workflows/unittests.yml
@@ -12,7 +12,7 @@ jobs:
     strategy:
       matrix:
         os: ["ubuntu-latest", "windows-latest", "macos-latest"]
-        python-version: ["2.x", "3.x", "pypy-3.9"]
+        python-version: ["3.x", "pypy-3.9"]
         include:
           - python-version: 3.x
             runlint: 1