From f006b534a62cf429d76d2cec3051b658404c8395 Mon Sep 17 00:00:00 2001 From: Alexander Pyhalov Date: Sat, 7 Mar 2020 15:14:27 +0300 Subject: [PATCH 1/4] Revert "Fix decoding file text data as ASCII" This reverts commit d3187bbf6614769114482c8823a6b1adae05ea3a. --- src/modules/misc.py | 5 ----- src/pkg/manifests/package:pkg.p5m | 1 - 2 files changed, 6 deletions(-) diff --git a/src/modules/misc.py b/src/modules/misc.py index dcbdf698a..d037b4db4 100644 --- a/src/modules/misc.py +++ b/src/modules/misc.py @@ -508,11 +508,6 @@ def setlocale(category, loc=None, printer=None): printer("Unable to set locale{0}; locale package may be broken " "or\nnot installed. Reverting to C locale.".format(dl)) locale.setlocale(category, "C") -# Correct preferred encoding so that we don't try to decode files with ascii codec - if locale.getpreferredencoding(False) != "UTF-8": - locale.setlocale(locale.LC_CTYPE, "en_US.UTF-8") - locale.setlocale(locale.LC_COLLATE, "en_US.UTF-8") - def N_(message): """Return its argument; used to mark strings for localization when their use is delayed by the program.""" diff --git a/src/pkg/manifests/package:pkg.p5m b/src/pkg/manifests/package:pkg.p5m index 1814843a1..2636a79b9 100644 --- a/src/pkg/manifests/package:pkg.p5m +++ b/src/pkg/manifests/package:pkg.p5m @@ -500,4 +500,3 @@ depend type=require fmri=library/python/pycurl-35 depend type=require fmri=library/python/pyopenssl-35 depend type=require fmri=library/python/rapidjson-35 depend type=require fmri=library/python/six-35 -depend type=require fmri=locale/en From 72fa78d7c076c71bbaf9cc89bea007b04f3d572f Mon Sep 17 00:00:00 2001 From: Andy Fiddaman Date: Fri, 28 Feb 2020 00:01:30 +0000 Subject: [PATCH 2/4] Fix errors resulting from UTF-8 characters in manifest files --- src/modules/manifest.py | 2 +- src/modules/search_storage.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/modules/manifest.py b/src/modules/manifest.py index e958eab4f..979cc95dd 100644 --- a/src/modules/manifest.py +++ b/src/modules/manifest.py @@ -1077,7 +1077,7 @@ def set_content(self, content=None, excludes=EmptyI, pathname=None, # together has to be solved somewhere else, though.) if pathname: try: - with open(pathname, "r") as mfile: + with open(pathname, "r", encoding='UTF-8') as mfile: content = mfile.read() except EnvironmentError as e: raise apx._convert_error(e) diff --git a/src/modules/search_storage.py b/src/modules/search_storage.py index b1464d42e..7ff9c3cae 100644 --- a/src/modules/search_storage.py +++ b/src/modules/search_storage.py @@ -75,7 +75,7 @@ def consistent_open(data_list, directory, timeout = 1): # in the function is greater than timeout. try: f = os.path.join(directory, d.get_file_name()) - fh = open(f, 'r') + fh = open(f, 'r', encoding='UTF-8') # If we get here, then the current index file # is present. if missing == None: From 900da3cdd52f41ea792ae726805eee5f68b71bbf Mon Sep 17 00:00:00 2001 From: Alexander Pyhalov Date: Sun, 8 Mar 2020 11:27:16 +0300 Subject: [PATCH 3/4] Test pkg_verify with utf-8 pkg data --- src/tests/cli/t_pkg_verify.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/tests/cli/t_pkg_verify.py b/src/tests/cli/t_pkg_verify.py index 2c71296e4..9f4ee77b9 100644 --- a/src/tests/cli/t_pkg_verify.py +++ b/src/tests/cli/t_pkg_verify.py @@ -71,6 +71,13 @@ class TestPkgVerify(pkg5unittest.SingleDepotTestCase): add file bronze2 mode=644 owner=root group=sys path=/etc/bronze2 close """ + baz10 = """ + open baz@1.0,5.11-0:20200308T075512Z + add dir mode=0755 owner=root group=sys path=/opt + add dir mode=0755 owner=root group=sys path="/opt/моя программа" + add file файл mode=0755 owner=root group=sys path="/opt/моя программа/файл" + close + """ sysattr = """ open sysattr@1.0-0 @@ -94,7 +101,8 @@ class TestPkgVerify(pkg5unittest.SingleDepotTestCase): "dricon_ep": """\n""", "permission": "", "bronze1": "", - "bronze2": "" + "bronze2": "", + "файл": "" } def setUp(self): @@ -434,6 +442,20 @@ def test_03_editable(self): self.output.index("etc/preserved") self.output.index("editable file has been changed") + def test_04_unicode(self): + """Ensure that verify can parse unicodee manifests""" + + self.pkgsend_bulk(self.rurl, self.baz10) + self.image_create(self.rurl) + self.pkg("install foo baz") + self.pkg_verify("baz") + + # Should fail with exit code 1 if package is not ok. + portable.remove(os.path.join(self.get_img_path(), "opt", "моя программа", "файл")) + self.pkg_verify("baz", exit=1) + self.assertTrue("Unexpected Exception" not in self.output) + self.assertTrue("PACKAGE" in self.output and "STATUS" in self.output) + def test_verify_changed_manifest(self): """Test that running package verify won't change the manifest of an installed package even if it has changed in the repository. From a2b4a1f35a417047bc33a17fccae928db02f3e17 Mon Sep 17 00:00:00 2001 From: Alexander Pyhalov Date: Tue, 10 Mar 2020 20:49:46 +0300 Subject: [PATCH 4/4] More fixes for UTF-8 characters in manifests files --- src/modules/client/image.py | 12 ++++++------ src/modules/client/transport/transport.py | 4 ++-- src/modules/manifest.py | 14 +++++++------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/modules/client/image.py b/src/modules/client/image.py index 9dcdf3485..181638240 100644 --- a/src/modules/client/image.py +++ b/src/modules/client/image.py @@ -3702,9 +3702,9 @@ def _create_fast_lookups(self, progtrack=None): of, op = self.temporary_file(close=False) bf, bp = self.temporary_file(close=False) - sf = os.fdopen(sf, "w") - of = os.fdopen(of, "w") - bf = os.fdopen(bf, "w") + sf = os.fdopen(sf, "w", encoding="UTF-8") + of = os.fdopen(of, "w", encoding="UTF-8") + bf = os.fdopen(bf, "w", encoding="UTF-8") # We need to make sure the files are coordinated. timestamp = int(time.time()) @@ -3837,7 +3837,7 @@ def _load_actdict(self, progtrack): try: of = open(os.path.join(self.__action_cache_dir, - "actions.offsets"), "r") + "actions.offsets"), "r", encoding="UTF-8") except IOError as e: if e.errno != errno.ENOENT: raise @@ -3902,7 +3902,7 @@ def _get_stripped_actions_file(self, internal=False): return the corresponding file object.""" sf = open(os.path.join(self.__action_cache_dir, - "actions.stripped"), "r") + "actions.stripped"), "r", encoding="UTF-8") sversion = sf.readline().rstrip() stimestamp = sf.readline().rstrip() if internal: @@ -3917,7 +3917,7 @@ def _load_conflicting_keys(self): pth = os.path.join(self.__action_cache_dir, "keys.conflicting") try: - with open(pth, "r") as fh: + with open(pth, "r", encoding="UTF-8") as fh: version = fh.readline().rstrip() if version != "VERSION 1": return None diff --git a/src/modules/client/transport/transport.py b/src/modules/client/transport/transport.py index 08890a5e5..f8246e10b 100644 --- a/src/modules/client/transport/transport.py +++ b/src/modules/client/transport/transport.py @@ -1686,7 +1686,7 @@ def _prefetch_manifests_list(self, mxfr, mlist, excludes=misc.EmptyI): continue try: - mf = open(dl_path) + mf = open(dl_path, "r", encoding="UTF-8") mcontent = mf.read() mf.close() manifest.FactoredManifest(fmri, @@ -1776,7 +1776,7 @@ def _verify_manifest(self, fmri, mfstpath=None, content=None, pub=None): return False if mfstpath: - mf = open(mfstpath) + mf = open(mfstpath, "r", encoding="UTF-8") mcontent = mf.read() mf.close() elif content is not None: diff --git a/src/modules/manifest.py b/src/modules/manifest.py index 979cc95dd..ef629c0a5 100644 --- a/src/modules/manifest.py +++ b/src/modules/manifest.py @@ -1229,7 +1229,7 @@ def search_dict(file_path, excludes, return_line=False, log = lambda x: None try: - file_handle = open(file_path, "r") + file_handle = open(file_path, "r", encoding="UTF-8") except EnvironmentError as e: if e.errno != errno.ENOENT: raise @@ -1364,7 +1364,7 @@ def store(self, mfst_path): e.filename) raise - mfile = os.fdopen(fd, "w") + mfile = os.fdopen(fd, "w", encoding="UTF-8") # # We specifically avoid sorting manifests before writing @@ -1668,7 +1668,7 @@ def __storebytype(self): except EnvironmentError as e: raise apx._convert_error(e) - f = os.fdopen(fd, "w") + f = os.fdopen(fd, "w", encoding="UTF-8") try: for a in acts: f.write("{0}\n".format(a)) @@ -1695,7 +1695,7 @@ def create_cache(name, refs): try: fd, fn = tempfile.mkstemp(dir=t_dir, prefix=name + ".") - with os.fdopen(fd, "w") as f: + with os.fdopen(fd, "w", encoding="UTF-8") as f: f.writelines(refs()) os.chmod(fn, PKG_FILE_MODE) portable.rename(fn, self.__cache_path(name)) @@ -1745,7 +1745,7 @@ def __load_cached_data(self, name): if os.path.exists(mpath): # we have cached copy on disk; use it try: - with open(mpath, "r") as f: + with open(mpath, "r", encoding="UTF-8") as f: self._cache[name] = [ a for a in ( @@ -1830,7 +1830,7 @@ def gen_actions_by_type(self, atype, attr_match=None, excludes=EmptyI): attr_match = _compile_fnpats(attr_match) try: - with open(mpath, "r") as f: + with open(mpath, "r", encoding="UTF-8") as f: for l in f: a = actions.fromstr(l.rstrip()) if (excludes and @@ -1889,7 +1889,7 @@ def __load_attributes(self): mpath = self.__cache_path("manifest.set") if not os.path.exists(mpath): return False - with open(mpath, "r") as f: + with open(mpath, "r", encoding="UTF-8") as f: for l in f: a = actions.fromstr(l.rstrip()) if not self.excludes or \