diff --git a/README.rst b/README.rst index 3e7406b..77d24cd 100644 --- a/README.rst +++ b/README.rst @@ -4,13 +4,50 @@ Introduction ============ -The package `ftw.catalogdoctor` provides checkup and surgery to remove inconsistencies in portal_catalog. +The package ``ftw.catalogdoctor`` provides healthcheck to find +inconsistencies in ``portal_catalog`` and surgery to remove some of them. It +can be run via a ``zopectl.command``. -Compatibility -------------- +Healthcheck +=========== -Plone 4.3.x +Lists inconsistencies detected in ``portal_catalog``. Finds inconsistencies by +inspecting the catalog's internal data structures. It currently uses ``paths`` +(the rid-path mapping), ``uids`` (the path-rid mapping), the ``UID`` index and +catalog metadata to determine if the catalog is healthy or if there are +problems. Healtcheck is a read-only operation and won't modify the catalog. + +It can be run as follows: + +.. code:: sh + + $ bin/instance doctor healtcheck + + +Surgery +======= + +Attempts to fix issues found by ``healthcheck``. Will do a healtchcheck before +surgery, then attempt surgery and finally do a post-surgery healthcheck. +Surgery is a write operation but changes are only committed to the database if +the post-surgery healtcheck yields no more health problems. +Currently the set of available surgery is limited to problems we have observed +in production. + + +It can be run as follows: + +.. code:: sh + + $ bin/instance doctor surgery + + +There is also a `--dry-run` parameter that prevents committing changes. + +.. code:: sh + + $ bin/instance doctor --dry-run surgery Installation @@ -26,6 +63,13 @@ Installation ftw.catalogdoctor +Compatibility +------------- + +Plone 4.3.x +Plone 5.1.x + + Development =========== diff --git a/ftw/catalogdoctor/command.py b/ftw/catalogdoctor/command.py new file mode 100644 index 0000000..cd0ab1a --- /dev/null +++ b/ftw/catalogdoctor/command.py @@ -0,0 +1,154 @@ +from __future__ import print_function +from ftw.catalogdoctor.compat import processQueue +from ftw.catalogdoctor.healthcheck import CatalogHealthCheck +from ftw.catalogdoctor.surgery import CatalogDoctor +from Products.CMFCore.utils import getToolByName +from Products.CMFPlone.interfaces import IPloneSiteRoot +from Testing.makerequest import makerequest +from zope.component.hooks import setSite +import argparse +import sys +import transaction + + +def discover_plone_site(app): + for item_id, item in app.items(): + if IPloneSiteRoot.providedBy(item): + return item_id + return None + + +def load_site(app, path): + if not path: + print('ERROR: No Plone site found. Use --site or create a Plone site ' + 'in the Zope app root.', + file=sys.stderr) + sys.exit(1) + + app = makerequest(app) + site = app.unrestrictedTraverse(path) + app.REQUEST.PARENTS = [site, app] + setSite(site) + + return site + + +class ConsoleOutput(object): + + def info(self, msg): + print(msg) + + def warning(self, msg): + print(msg) + + def error(self, msg): + print(msg, file=sys.stderr) + + +def healthcheck_command(portal_catalog, args, formatter): + transaction.doom() # extra paranoia, prevent erroneous commit + + return _run_healthcheck(portal_catalog, formatter) + + +def _run_healthcheck(portal_catalog, formatter): + result = CatalogHealthCheck(catalog=portal_catalog).run() + result.write_result(formatter) + return result + + +def surgery_command(portal_catalog, args, formatter): + if args.dryrun: + formatter.info('Performing dryrun!') + formatter.info('') + transaction.doom() + + result = _run_healthcheck(portal_catalog, formatter) + if result.is_healthy(): + transaction.doom() # extra paranoia, prevent erroneous commit + formatter.info('Catalog is healthy, no surgery is needed.') + return + + there_is_nothing_we_can_do = [] + formatter.info('Performing surgery:') + for unhealthy_rid in result.get_unhealthy_rids(): + doctor = CatalogDoctor(result.catalog, unhealthy_rid) + if doctor.can_perform_surgery(): + surgery = doctor.perform_surgery() + surgery.write_result(formatter) + formatter.info('') + else: + there_is_nothing_we_can_do.append(unhealthy_rid) + + if there_is_nothing_we_can_do: + formatter.info('The following unhealthy rids could not be fixed:') + for unhealthy_rid in there_is_nothing_we_can_do: + unhealthy_rid.write_result(formatter) + formatter.info('') + + formatter.info('Not all health problems could be fixed, aborting.') + return + + processQueue() + + formatter.info('Performing post-surgery healthcheck:') + post_result = _run_healthcheck(portal_catalog, formatter) + if not post_result.is_healthy(): + transaction.doom() # extra paranoia, prevent erroneous commit + formatter.info('Not all health problems could be fixed, aborting.') + return + + if args.dryrun: + formatter.info('Surgery would have been successful, but was aborted ' + 'due to dryrun!') + else: + transaction.commit() + formatter.info('Surgery was successful, known health problems could ' + 'be fixed!') + + +def _setup_parser(app): + parser = argparse.ArgumentParser( + description='Provide health check and fixes for portal_catalog.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + # swallows instance command + parser.add_argument('-c', help=argparse.SUPPRESS) + + parser.add_argument( + '-s', '--site', dest='site', + default=discover_plone_site(app), + help='Path to the Plone site from which portal_catalog is used.') + parser.add_argument( + '-n', '--dry-run', dest='dryrun', + default=False, action="store_true", + help='Dryrun, do not commit changes. Only relevant for surgery.') + + commands = parser.add_subparsers(dest='command') + healthcheck = commands.add_parser( + 'healthcheck', + help='Run a health check for portal_catalog.') + healthcheck.set_defaults(func=healthcheck_command) + + surgery = commands.add_parser( + 'surgery', + help='Run a healthcheck and perform surgery for unhealthy rids in ' + 'portal_catalog.') + surgery.set_defaults(func=surgery_command) + return parser + + +def _parse(parser, args): + return parser.parse_args(args) + + +def _run(parsed_args, app, formatter): + site = load_site(app, parsed_args.site) + portal_catalog = getToolByName(site, 'portal_catalog') + + return parsed_args.func(portal_catalog, parsed_args, formatter=formatter) + + +def doctor_cmd(app, args, formatter=None): + parser = _setup_parser(app) + parsed_args = _parse(parser, args) + _run(parsed_args, app, formatter or ConsoleOutput()) diff --git a/ftw/catalogdoctor/compat.py b/ftw/catalogdoctor/compat.py new file mode 100644 index 0000000..d6a7402 --- /dev/null +++ b/ftw/catalogdoctor/compat.py @@ -0,0 +1,22 @@ +import pkg_resources + + +IS_PLONE_5 = pkg_resources.get_distribution('Products.CMFPlone').version >= '5' + + +if IS_PLONE_5: + from Products.CMFCore.indexing import processQueue +else: + # optional collective.indexing support + try: + from collective.indexing.queue import processQueue + except ImportError: + def processQueue(): + pass + +# optional Products.DateRecurringIndex support +try: + from Products.DateRecurringIndex.index import DateRecurringIndex +except ImportError: + class DateRecurringIndex(object): + pass diff --git a/ftw/catalogdoctor/debug.py b/ftw/catalogdoctor/debug.py new file mode 100644 index 0000000..983554d --- /dev/null +++ b/ftw/catalogdoctor/debug.py @@ -0,0 +1,58 @@ +from pprint import pprint + + +def btrees_to_python_collections(maybe_btrees): + """Convert collections from btrees to python collections for debugging. + + WARNING: naive implementation: + - converts sets to lists + - should not be used on large data structures + - should only be used to debug + + Only use it to display things on the command line. Better not + programmatically work with the result otherwise. Stick to BTrees if you + can. + + This method is intended to help displaying catalog data structures on the + command line for debugging. It can be uses in combination with pprint to + quickly analize the state of the catalog's internal data structures. + + """ + if isinstance(maybe_btrees, (int, basestring)): + return maybe_btrees + elif hasattr(maybe_btrees, 'items'): + return dict((key, btrees_to_python_collections(val)) + for key, val in maybe_btrees.items()) + else: + return list(maybe_btrees) + + +def pprint_btrees(btrees): + """pretty print a collection from btrees. + + Sample output looks like: + + >>> index = plone.portal_catalog._catalog.indexes['path'] + >>> pprint_btrees(index._index) + {None: {1: [97], 2: [98, 99]}, + 'child': {2: [98]}, + 'otherchild': {2: [99]}, + 'parent': {1: [97, 98, 99]}, + 'plone': {0: [97, 98, 99]}} + + >>> pprint_btrees(index._unindex) + {97: '/plone/parent', + 98: '/plone/parent/child', + 99: '/plone/parent/otherchild'} + + >>> pprint_btrees(index._index_items) + {'/plone/parent': 97, + '/plone/parent/child': 98, + '/plone/parent/otherchild': 99} + + >>> pprint_btrees(index._index_parents) + {'/plone': [97], + '/plone/parent': [98, 99]} + + """ + pprint(btrees_to_python_collections(btrees)) diff --git a/ftw/catalogdoctor/exceptions.py b/ftw/catalogdoctor/exceptions.py new file mode 100644 index 0000000..7420246 --- /dev/null +++ b/ftw/catalogdoctor/exceptions.py @@ -0,0 +1,2 @@ +class CantPerformSurgery(Exception): + """Raised when a procedure cannot be performed.""" diff --git a/ftw/catalogdoctor/healthcheck.py b/ftw/catalogdoctor/healthcheck.py new file mode 100644 index 0000000..d65d7f2 --- /dev/null +++ b/ftw/catalogdoctor/healthcheck.py @@ -0,0 +1,253 @@ +from plone import api + + +class CatalogHealthCheck(object): + """Run health check for a Products.ZCatalog.Catalog instance. + + Validates that the catalogs uid and rid mapping and metadata is consistent. + This means that: + - the mappings have the same length + - the mappings are consistent, so every item is in the reverse mapping + - for every item there is also an entry in the catalog metadata + + The health check does not validate indices and index data yet. + """ + def __init__(self, catalog=None): + self.portal_catalog = catalog or api.portal.get_tool('portal_catalog') + self.catalog = self.portal_catalog._catalog + + def run(self): + result = HealthCheckResult(self.catalog) + + paths = self.catalog.paths + paths_values = set(self.catalog.paths.values()) + uids = self.catalog.uids + uids_values = set(self.catalog.uids.values()) + data = self.catalog.data + + uuid_index = self.catalog.indexes['UID'] + result.report_catalog_stats( + len(self.catalog), len(uids), len(paths), len(data), + len(uuid_index), len(uuid_index._index), len(uuid_index._unindex)) + + for path, rid in uids.items(): + if rid not in paths: + result.report_symptom( + 'in_uids_values_not_in_paths_keys', rid, path=path) + elif paths[rid] != path: + result.report_symptom( + 'paths_tuple_mismatches_uids_tuple', rid, path=path) + + if path not in paths_values: + result.report_symptom( + 'in_uids_keys_not_in_paths_values', rid, path=path) + + if rid not in data: + result.report_symptom( + 'in_uids_values_not_in_metadata_keys', rid, path=path) + + for rid, path in paths.items(): + if path not in uids: + result.report_symptom( + 'in_paths_values_not_in_uids_keys', rid, path=path) + elif uids[path] != rid: + result.report_symptom( + 'uids_tuple_mismatches_paths_tuple', rid, path=path) + + if rid not in uids_values: + result.report_symptom( + 'in_paths_keys_not_in_uids_values', rid, path=path) + + if rid not in data: + result.report_symptom( + 'in_paths_keys_not_in_metadata_keys', rid, path=path) + + for rid in data: + if rid not in paths: + result.report_symptom( + 'in_metadata_keys_not_in_paths_keys', rid) + if rid not in uids_values: + result.report_symptom( + 'in_metadata_keys_not_in_uids_values', rid) + + # we consider the uids (path->rid mapping) as source of truth for the + # rids "registered" in the catalog. that mapping is also used in + # `catalogObject` to decide whether an object is inserted or + # updated, i.e. if entries for an existing rid are updated or if a + # new rid is assigned to the path/object. + rids_in_catalog = uids_values + + index_values = set(uuid_index._index.values()) + + for uuid, rid in uuid_index._index.items(): + if rid not in uuid_index._unindex: + result.report_symptom( + 'in_uuid_index_not_in_uuid_unindex', rid) + elif uuid_index._unindex[rid] != uuid: + result.report_symptom( + 'uuid_index_tuple_mismatches_uuid_unindex_tuple', rid) + if rid not in rids_in_catalog: + result.report_symptom( + 'in_uuid_index_not_in_catalog', rid) + + for rid, uuid in uuid_index._unindex.items(): + if rid not in index_values: + result.report_symptom( + 'in_uuid_unindex_not_in_uuid_index', rid) + elif uuid_index._index[uuid] != rid: + result.report_symptom( + 'uuid_unindex_tuple_mismatches_uuid_index_tuple', rid) + if rid not in rids_in_catalog: + result.report_symptom( + 'in_uuid_unindex_not_in_catalog', rid) + + for path, rid in uids.items(): + if rid not in index_values: + result.report_symptom( + 'in_catalog_not_in_uuid_index', rid, path=path) + if rid not in uuid_index._unindex: + result.report_symptom( + 'in_catalog_not_in_uuid_unindex', rid, path=path) + + return result + + +class UnhealthyRid(object): + """Represents a rid which is considered unhealthy. + + A rid becomes unhealthy if the health check finds one or more issues + with that rid. An `UnhealthyRid` instance groups all issues/symptoms found + for one rid. + + """ + def __init__(self, rid): + self.rid = rid + self._paths = set() + self._catalog_symptoms = set() + + def attach_path(self, path): + self._paths.add(path) + + def report_catalog_symptom(self, name): + """Report a symptom found in the the catalog.""" + + self._catalog_symptoms.add(name) + + @property + def catalog_symptoms(self): + return tuple(sorted(self._catalog_symptoms)) + + @property + def paths(self): + return tuple(sorted(self._paths)) + + def __str__(self): + if self.paths: + paths = ", ".join("'{}'".format(p) for p in self.paths) + else: + paths = "--no path--" + return "rid {} ({})".format(self.rid, paths) + + def write_result(self, formatter): + formatter.info("{}:".format(self)) + for symptom in self.catalog_symptoms: + formatter.info('\t- {}'.format(symptom)) + + +class HealthCheckResult(object): + """Provide health check result for one catalog health check run.""" + + def __init__(self, catalog): + self.catalog = catalog + self.unhealthy_rids = dict() + self.claimed_length = None + self.uids_length = None + self.paths_length = None + self.data_length = None + self.uuid_index_claimed_length = None + self.uuid_index_index_length = None + self.uuid_index_unindex_length = None + + def get_unhealthy_rids(self): + return self.unhealthy_rids.values() + + def report_catalog_stats(self, claimed_length, uids_length, paths_length, + data_length, + uuid_index_claimed_length, + uuid_index_index_length, + uuid_index_unindex_length): + self.claimed_length = claimed_length + self.uids_length = uids_length + self.paths_length = paths_length + self.data_length = data_length + self.uuid_index_claimed_length = uuid_index_claimed_length + self.uuid_index_index_length = uuid_index_index_length + self.uuid_index_unindex_length = uuid_index_unindex_length + + def _get_or_add_unhealthy_rid(self, rid, path=None): + if rid not in self.unhealthy_rids: + self.unhealthy_rids[rid] = UnhealthyRid(rid) + + unhealthy_rid = self.unhealthy_rids[rid] + if path: + unhealthy_rid.attach_path(path) + return unhealthy_rid + + def report_symptom(self, name, rid, path=None): + unhealthy_rid = self._get_or_add_unhealthy_rid(rid, path=path) + unhealthy_rid.report_catalog_symptom(name) + return unhealthy_rid + + def get_symptoms(self, rid): + return self.unhealthy_rids[rid].catalog_symptoms + + def is_healthy(self): + """Return whether the catalog is healthy according to this result.""" + + return self.is_catalog_data_healthy() and self.is_length_healthy() + + def is_catalog_data_healthy(self): + return not self.unhealthy_rids + + def is_length_healthy(self): + return ( + self.claimed_length + == self.uids_length + == self.paths_length + == self.data_length + == self.uuid_index_claimed_length + == self.uuid_index_index_length + == self.uuid_index_unindex_length + ) + + def write_result(self, formatter): + """Log result to logger.""" + + formatter.info("Catalog health check report:") + + if self.is_length_healthy(): + formatter.info( + "Catalog length is consistent at {}.".format( + self.claimed_length)) + else: + formatter.info("Inconsistent catalog length:") + formatter.info(" claimed length: {}".format(self.claimed_length)) + formatter.info(" uids length: {}".format(self.uids_length)) + formatter.info(" paths length: {}".format(self.paths_length)) + formatter.info(" metadata length: {}".format(self.data_length)) + formatter.info(" uid index claimed length: {}".format( + self.uuid_index_claimed_length)) + formatter.info(" uid index index length: {}".format( + self.uuid_index_index_length)) + formatter.info(" uid index unindex length: {}".format( + self.uuid_index_unindex_length)) + + if self.is_catalog_data_healthy(): + formatter.info("Catalog data is healthy.") + else: + formatter.info( + "Catalog data is unhealthy, found {} unhealthy rids:".format( + len(self.unhealthy_rids))) + for unhealthy_rid in self.unhealthy_rids.values(): + unhealthy_rid.write_result(formatter) + formatter.info('') diff --git a/ftw/catalogdoctor/surgery.py b/ftw/catalogdoctor/surgery.py new file mode 100644 index 0000000..015adbe --- /dev/null +++ b/ftw/catalogdoctor/surgery.py @@ -0,0 +1,451 @@ +from Acquisition import aq_chain +from Acquisition import aq_inner +from ftw.catalogdoctor.compat import DateRecurringIndex +from ftw.catalogdoctor.exceptions import CantPerformSurgery +from ftw.catalogdoctor.utils import find_keys_pointing_to_rid +from plone import api +from plone.app.folder.nogopip import GopipIndex +from Products.ExtendedPathIndex.ExtendedPathIndex import ExtendedPathIndex +from Products.PluginIndexes.BooleanIndex.BooleanIndex import BooleanIndex +from Products.PluginIndexes.DateIndex.DateIndex import DateIndex +from Products.PluginIndexes.DateRangeIndex.DateRangeIndex import DateRangeIndex +from Products.PluginIndexes.FieldIndex.FieldIndex import FieldIndex +from Products.PluginIndexes.KeywordIndex.KeywordIndex import KeywordIndex +from Products.PluginIndexes.UUIDIndex.UUIDIndex import UUIDIndex +from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex + + +class SurgeryStep(object): + + def __init__(self, index, rid): + self.index = index + self.rid = rid + + def _remove_keys_pointing_to_rid(self, index, linked_length=None): + """Remove all entries pointing to rid from a forward index. + + Rows in indices are expected to be a set, e.g. a `TreeSet`. Once the + set is emtpy it should also be removed from the index. + + If `linked_length` is provided it is decreased when a row is removed. + + """ + for key in find_keys_pointing_to_rid(index, self.rid): + row = index[key] + row.remove(self.rid) + if not row: + del index[key] + if linked_length: + linked_length.change(-1) + + def _remove_rid_from_unindex(self, unindex): + """Remove rid from a reverse index.""" + + if self.rid in unindex: + del unindex[self.rid] + + def perform(self): + raise NotImplementedError + + +class NullStep(SurgeryStep): + """Don't do anything.""" + + def perform(self): + pass + + +class RemoveFromUUIDIndex(SurgeryStep): + """Remove rid from a `UUIDIndex`.""" + + def _remove_keys_pointing_to_rid(self, index, linked_length=None): + for key in find_keys_pointing_to_rid(index, self.rid): + del index[key] + self.index._length.change(-1) + + def perform(self): + self._remove_keys_pointing_to_rid(self.index._index) + self._remove_rid_from_unindex(self.index._unindex) + + +class RemoveFromUnIndex(SurgeryStep): + """Remove a rid from a simple forward and reverse index.""" + + def perform(self): + self._remove_keys_pointing_to_rid( + self.index._index, linked_length=self.index._length) + self._remove_rid_from_unindex(self.index._unindex) + + +class RemoveFromDateRangeIndex(SurgeryStep): + """Remove rid from a `DateRangeIndex`.""" + + def perform(self): + if self.rid in self.index._always: + self.index._always.remove(self.rid) + + for index in ( + self.index._since_only, + self.index._until_only, + self.index._since, + self.index._until): + self._remove_keys_pointing_to_rid(index) + + self._remove_rid_from_unindex(self.index._unindex) + + +class RemoveFromBooleanIndex(SurgeryStep): + """Remove rid from a `BooleanIndex`. + + Lazily skips checking whether the boolean index should be inverted or not, + as this operation is functionally irrelevant and will happen during the + next reindex operation by plone. + """ + def perform(self): + if self.rid in self.index._unindex: + del self.index._unindex[self.rid] + self.index._length.change(-1) + + if self.rid in self.index._index: + self.index._index.remove(self.rid) + self.index._index_length.change(-1) + + +class RemoveFromExtendedPathIndex(SurgeryStep): + """Remove rid from a `ExtendedPathIndex`.""" + + def perform(self): + # _index + components_with_rid = [] + for component, level_to_rid in self.index._index.items(): + for level, rids in level_to_rid.items(): + if self.rid in rids: + components_with_rid.append((component, level,)) + + for component, level in components_with_rid: + self.index._index[component][level].remove(self.rid) + if not self.index._index[component][level]: + del self.index._index[component][level] + if not self.index._index[component]: + del self.index._index[component] + + # _index_items + for key in find_keys_pointing_to_rid(self.index._index_items, self.rid): + del self.index._index_items[key] + + # _index_parents + self._remove_keys_pointing_to_rid(self.index._index_parents) + + # _unindex + if self.rid in self.index._unindex: + del self.index._unindex[self.rid] + self.index._length.change(-1) + + +class UnindexObject(SurgeryStep): + """Remove a rid via the official `unindex_object` API.""" + + def perform(self): + self.index.unindex_object(self.rid) + + +class Surgery(object): + """Surgery can fix a concrete set of symptoms.""" + + index_to_step = { + BooleanIndex: RemoveFromBooleanIndex, + DateIndex: RemoveFromUnIndex, + DateRangeIndex: RemoveFromDateRangeIndex, + DateRecurringIndex: RemoveFromUnIndex, + ExtendedPathIndex: RemoveFromExtendedPathIndex, + FieldIndex: RemoveFromUnIndex, + GopipIndex: NullStep, # not a real index + KeywordIndex: RemoveFromUnIndex, + UUIDIndex: RemoveFromUUIDIndex, + ZCTextIndex: UnindexObject, + } + + def __init__(self, catalog, unhealthy_rid): + self.catalog = catalog + self.unhealthy_rid = unhealthy_rid + self.surgery_log = [] + + def perform(self): + raise NotImplementedError + + def unindex_rid_from_all_catalog_indexes(self, rid): + for idx in self.catalog.indexes.values(): + surgery_step = self.index_to_step.get(type(idx)) + + if not surgery_step: + raise CantPerformSurgery( + 'Unhandled index type: {0!r}'.format(idx)) + + surgery_step(idx, rid).perform() + + self.surgery_log.append( + "Removed rid from all catalog indexes.") + + def delete_rid_from_paths(self, rid): + del self.catalog.paths[rid] + + self.surgery_log.append( + "Removed rid from paths (the rid->path mapping).") + + def delete_rid_from_metadata(self, rid): + del self.catalog.data[rid] + + self.surgery_log.append( + "Removed rid from catalog metadata.") + + def delete_path_from_uids(self, path): + del self.catalog.uids[path] + + self.surgery_log.append( + "Removed path from uids (the path->rid mapping).") + + def change_catalog_length(self, delta): + self.catalog._length.change(delta) + + def write_result(self, formatter): + """Write surgery result to formatter.""" + + formatter.info("{}:".format(self.unhealthy_rid)) + for entry in self.surgery_log: + formatter.info('\t- {}'.format(entry)) + + +class RemoveExtraRid(Surgery): + """Remove an extra rid from the catalog. + + In this case the object at path still exists but two rids have been + generated for that object. + + We remove the etra rid from metadata, the rid->path mapping and from + all indexes. + """ + def perform(self): + rid = self.unhealthy_rid.rid + if len(self.unhealthy_rid.paths) != 1: + raise CantPerformSurgery( + "Expected exactly one affected path, got: {}" + .format(", ".join(self.unhealthy_rid.paths))) + + path = self.unhealthy_rid.paths[0] + if self.catalog.uids[path] == rid: + raise CantPerformSurgery( + "Expected different rid in catalog uids mapping for path {}" + .format(path)) + + self.unindex_rid_from_all_catalog_indexes(rid) + self.delete_rid_from_paths(rid) + self.delete_rid_from_metadata(rid) + self.change_catalog_length(-1) + + +class RemoveOrphanedRid(Surgery): + """Remove an orphaned rid from the catalog. + + In this case the object at path no longer exists but the rid still remains + in the catalog. + + We remove the orphaned rid from metadata, rid->path mapping and from all + indexes. + """ + def perform(self): + rid = self.unhealthy_rid.rid + if len(self.unhealthy_rid.paths) != 1: + raise CantPerformSurgery( + "Expected exactly one affected path, got: {}" + .format(", ".join(self.unhealthy_rid.paths))) + + path = list(self.unhealthy_rid.paths)[0] + if path in self.catalog.uids: + raise CantPerformSurgery( + "Expected path to be absent from catalog uids {}" + .format(path)) + + portal = api.portal.get() + obj = portal.unrestrictedTraverse(path, None) + if obj is not None: + raise CantPerformSurgery( + "Unexpectedly found object at {}".format(path)) + + self.unindex_rid_from_all_catalog_indexes(rid) + self.delete_rid_from_paths(rid) + self.delete_rid_from_metadata(rid) + self.change_catalog_length(-1) + + +class ReindexMissingUUID(Surgery): + """Reindex an uuid which is partially missing from the UID index. + + Removing and reindexing the object seems to do the trick in such cases. + """ + def perform(self): + rid = self.unhealthy_rid.rid + + if len(self.unhealthy_rid.paths) != 1: + raise CantPerformSurgery( + "Expected exactly one affected path, got: {}" + .format(", ".join(self.unhealthy_rid.paths))) + + path = list(self.unhealthy_rid.paths)[0] + + portal = api.portal.get() + obj = portal.unrestrictedTraverse(path, None) + if obj is None: + raise CantPerformSurgery( + "Missing object at {}".format(path)) + + # update UID index + index = self.catalog.indexes['UID'] + RemoveFromUUIDIndex(index, rid).perform() + index.index_object(rid, obj) + + # make sure catalog metadata is up to date as well + self.catalog.updateMetadata(obj, path, rid) + + self.surgery_log.append("Reindexed UID index and updated metadata.") + + +class RemoveRidOrReindexObject(Surgery): + """Reindex an object for all indexes or remove the stray rid. + + This can have two causes: + - Either there are orphaned rids left behind in the catalogs `uid` and + `path` mappings. In such cases the referenced object is is no longer + traversable as plone content and we can remove the orphaned rid. + - Special case of above when the object has been moved into its parents. In + such cases the object can still be traversed as object is found via + acquisition. We can remove the orphaned rid in such cases. + - The object has not been indexed correctly, in such cases the object can + be traversed and has to be reindexed in all indexes. + + """ + def perform(self): + rid = self.unhealthy_rid.rid + + if rid not in self.catalog.data: + raise CantPerformSurgery( + "Expected rid to be present in catalog metadata {}" + .format(rid)) + + if len(self.unhealthy_rid.paths) != 1: + raise CantPerformSurgery( + "Expected exactly one affected path, got: {}" + .format(", ".join(self.unhealthy_rid.paths))) + + if rid not in self.catalog.paths: + raise CantPerformSurgery( + "Expected rid to be present in catalog paths {}" + .format(rid)) + + path = list(self.unhealthy_rid.paths)[0] + if path not in self.catalog.uids: + raise CantPerformSurgery( + "Expected path to be present in catalog uids {}" + .format(path)) + + portal = api.portal.get() + obj = portal.unrestrictedTraverse(path, None) + + # the object is gone + if obj is None: + self.unindex_rid_from_all_catalog_indexes(rid) + self.delete_rid_from_paths(rid) + self.delete_rid_from_metadata(rid) + self.delete_path_from_uids(path) + self.change_catalog_length(-1) + + return + + # special case when the object has been moved into one of its parents. + # it can be traversed as it is found via acquisition. safeguard so we + # only unindex objects where this special case is true. + obj_path = '/'.join(obj.getPhysicalPath()) + if obj_path != path: + if aq_chain(aq_inner(obj))[1:] == aq_chain(obj)[1:]: + raise CantPerformSurgery( + "Object path after traversing {} differs from path before " + "traversing and in catalog {}, but acquisition chain " + "is unexpectedly equal.".format(obj_path, path)) + + self.unindex_rid_from_all_catalog_indexes(rid) + self.delete_rid_from_paths(rid) + self.delete_rid_from_metadata(rid) + self.delete_path_from_uids(path) + self.change_catalog_length(-1) + + return + + # the object is still there and somehow vanished from the indexes. + # we reindex to update indexes and metadata. + obj.reindexObject() + self.surgery_log.append("Reindexed object.") + + +class CatalogDoctor(object): + """Performs surgery for an unhealthy_rid, if possible. + + Surgeries are assigned based on symptoms. For each set of symptoms a + surgical procedure can be registered. This decides if an unhealthy rid can + be treated. + + When you add symptom tuples to surgeries make sure they are sorted + alphabetically. + """ + surgeries = { + ( + 'in_metadata_keys_not_in_uids_values', + 'in_paths_keys_not_in_uids_values', + 'in_uuid_unindex_not_in_catalog', + 'in_uuid_unindex_not_in_uuid_index', + 'uids_tuple_mismatches_paths_tuple', + ): RemoveExtraRid, + ( + 'in_metadata_keys_not_in_uids_values', + 'in_paths_keys_not_in_uids_values', + 'uids_tuple_mismatches_paths_tuple', + ): RemoveExtraRid, + ( + 'in_metadata_keys_not_in_uids_values', + 'in_paths_keys_not_in_uids_values', + 'in_paths_values_not_in_uids_keys', + ): RemoveOrphanedRid, + ( + 'in_metadata_keys_not_in_uids_values', + 'in_paths_keys_not_in_uids_values', + 'in_paths_values_not_in_uids_keys', + 'in_uuid_unindex_not_in_catalog', + 'in_uuid_unindex_not_in_uuid_index', + ): RemoveOrphanedRid, + ( + 'in_catalog_not_in_uuid_index', + 'in_uuid_unindex_not_in_uuid_index', + ): ReindexMissingUUID, + ( + 'in_catalog_not_in_uuid_index', + 'in_catalog_not_in_uuid_unindex', + ): RemoveRidOrReindexObject, + } + + def __init__(self, catalog, unhealthy_rid): + self.catalog = catalog + self.unhealthy_rid = unhealthy_rid + + def can_perform_surgery(self): + return bool(self.get_surgery()) + + def get_surgery(self): + symptoms = self.unhealthy_rid.catalog_symptoms + return self.surgeries.get(symptoms, None) + + def perform_surgery(self): + surgery_cls = self.get_surgery() + if not surgery_cls: + return None + + surgery = surgery_cls(self.catalog, self.unhealthy_rid) + surgery.perform() + return surgery diff --git a/ftw/catalogdoctor/testing.py b/ftw/catalogdoctor/testing.py index 1c7016a..89cb264 100644 --- a/ftw/catalogdoctor/testing.py +++ b/ftw/catalogdoctor/testing.py @@ -1,15 +1,17 @@ +from ftw.builder.content import register_dx_content_builders from ftw.builder.testing import BUILDER_LAYER from ftw.builder.testing import functional_session_factory from ftw.builder.testing import set_builder_session_factory +from plone.app.testing import applyProfile from plone.app.testing import FunctionalTesting -from plone.app.testing import PLONE_FIXTURE from plone.app.testing import PloneSandboxLayer from plone.testing import z2 from zope.configuration import xmlconfig +from ftw.testing.layer import COMPONENT_REGISTRY_ISOLATION class CatalogdoctorLayer(PloneSandboxLayer): - defaultBases = (PLONE_FIXTURE, BUILDER_LAYER) + defaultBases = (COMPONENT_REGISTRY_ISOLATION, BUILDER_LAYER) def setUpZope(self, app, configurationContext): xmlconfig.string( @@ -20,8 +22,16 @@ def setUpZope(self, app, configurationContext): '', context=configurationContext) + # Prevent an error in layer setup by installing the product, error: + # ValueError: Index of type DateRecurringIndex not found + z2.installProduct(app, 'Products.DateRecurringIndex') + z2.installProduct(app, 'ftw.catalogdoctor') + def setUpPloneSite(self, portal): + applyProfile(portal, 'plone.app.contenttypes:default') + register_dx_content_builders(force=True) + CATALOGDOCTOR_FIXTURE = CatalogdoctorLayer() CATALOGDOCTOR_FUNCTIONAL = FunctionalTesting( diff --git a/ftw/catalogdoctor/tests/__init__.py b/ftw/catalogdoctor/tests/__init__.py index b708aec..c4cc5dd 100644 --- a/ftw/catalogdoctor/tests/__init__.py +++ b/ftw/catalogdoctor/tests/__init__.py @@ -1,17 +1,236 @@ +from Acquisition import aq_base +from Acquisition import aq_inner +from Acquisition import aq_parent +from ftw.catalogdoctor.command import doctor_cmd +from ftw.catalogdoctor.compat import processQueue +from ftw.catalogdoctor.healthcheck import CatalogHealthCheck from ftw.catalogdoctor.testing import CATALOGDOCTOR_FUNCTIONAL +from plone import api from plone.app.testing import setRoles from plone.app.testing import TEST_USER_ID +from random import randint +from StringIO import StringIO from unittest2 import TestCase import transaction +import uuid + + +class MockFormatter(object): + + def __init__(self): + self.log = StringIO() + + def info(self, msg): + self.log.write(msg + '\n') + + def warning(self, msg): + self.log.write(msg + '\n') + + def error(self, msg): + self.log.write(msg + '\n') + + def getlines(self): + return self.log.getvalue().splitlines() + + +class Mock(object): + pass class FunctionalTestCase(TestCase): + layer = CATALOGDOCTOR_FUNCTIONAL def setUp(self): + self.app = self.layer['app'] self.portal = self.layer['portal'] self.request = self.layer['request'] + self.portal_catalog = api.portal.get_tool('portal_catalog') + self.catalog = self.portal_catalog._catalog + self.catalog._v_nextid = 97 + + self._chosen_rids = set() + + def run_command(self, *args): + formatter = MockFormatter() + command = ['-c'] + list(args) + doctor_cmd(self.app, command, formatter=formatter) + return formatter.getlines() + + def run_healthcheck(self): + self.maybe_process_indexing_queue() # enforce up to date catalog + healthcheck = CatalogHealthCheck(self.portal_catalog) + return healthcheck.run() + + def choose_next_rid(self): + """Return a currently unused rid for testing. + + It will avoid returning rids already used by the catalog or already + chosen by an earlier call to this method during the same test-case + execution. + + Test-helper supposed to be called in a test-case once all objects + have been added to the catalog by plone or your setup and you want to + add specific rid entries to your catalog, usually in order to add + superfluous entries to the catalog or its indices to setup your + test-case. + """ + index = getattr(self.catalog, '_v_nextid', 0) + while (index in self.catalog.data + or index in self.catalog.paths + or index in self._chosen_rids): + index = randint(-2000000000, 2000000000) + + self.catalog._v_nextid = index + self._chosen_rids.add(index) + return index def grant(self, *roles): setRoles(self.portal, TEST_USER_ID, list(roles)) transaction.commit() + + def get_catalog_indexdata(self, obj, omit_empty=False): + """Return the catalog index data for an object as dict. + """ + self.maybe_process_indexing_queue() + index_data = self.portal_catalog.getIndexDataForRID(self.get_rid(obj)) + if omit_empty: + index_data = dict((key, value) for key, value in index_data.items() + if value) + return index_data + + def get_catalog_metadata(self, obj): + """Return the catalog metadata for an object as dict. + """ + self.maybe_process_indexing_queue() + return self.portal_catalog.getMetadataForRID(self.get_rid(obj)) + + def reindex_object(self, obj): + obj.reindexObject() + self.maybe_process_indexing_queue() + + def maybe_process_indexing_queue(self): + processQueue() + + def get_rid(self, obj): + path = self.get_physical_path(obj) + return self.catalog.uids[path] + + def get_physical_path(self, obj): + return '/'.join(obj.getPhysicalPath()) + + def make_unhealthy_extra_rid_after_move(self, obj, new_id=None): + """Make catalog unhealthy and create an extra rid for obj. + + :param obj: the object that will have an unhealthy additional rid + :param new_id: the target object id after the object has been moved. + An UUID4 is used if this argument is not supplied. + + Simulate an issue when objects are reindexed prematurely while plone is + processing the IObjectWillBeMovedEvent and IObjectMovedEvent events. + This issue only surfaces when `ftw.copymovepatches` is installed and + is also described in detail in: + https://github.com/4teamwork/opengever.core/pull/5533 + The premature reindex can be caused by e.g. a catalog query in another + event handler for IOBjectMoved. + + - Move the object without firing moved events. Disconnects object from + catalogs path->rid mapping as the object will have a new path. + - Reindex the object, this adds a new entry to the catalog and the + catalog indices. As already mentioned the catalog uses its internal + path->rid mapping to decide if the object is inserted or updated. + - Simulate ftw.copymovepatches optimized move that is applied to the + old data which breaks the catalog. + + """ + new_id = new_id or str(uuid.uuid4()) + old_rid = self.get_rid(obj) # keep old rid for ftw.copymovepatches + + # move without firing events, disconnect ob from catalog + old_id = self.child.getId() + ob = self.parent._getOb(old_id) + self.parent._delObject(old_id, suppress_events=True) + ob = aq_base(ob) + ob._setId(new_id) + self.parent._setObject(new_id, ob, set_owner=0, suppress_events=True) + ob = self.parent._getOb(new_id) + ob._postCopy(self, op=1) + + # reindex ob, create new rid in catalog and new entries + self.reindex_object(ob) + + # simulate ftw.copymovepatches optimized move, breaks catalog entries + new_path = '/'.join(ob.getPhysicalPath()) + old_path = self.catalog.paths[old_rid] + del self.catalog.uids[old_path] + self.catalog.uids[new_path] = old_rid + self.catalog.paths[old_rid] = new_path + + return ob + + def make_orphaned_rid(self, obj): + """Make catalog unhealthy and create an orphaned rid for obj. + + :param obj: the object that will be deleted and leave an unhealthy rid + behind. + + This simulates an issue that surfaces when an object with an extra + rid as created by `make_unhealthy_extra_rid_after_move` is deleted. + + """ + ob = self.make_unhealthy_extra_rid_after_move(obj) + self.delete_object(ob) + + self.maybe_process_indexing_queue() + return ob + + def recatalog_object_with_new_rid(self, obj): + """Make catalog unhealthy by recataloging an object with a new rid. + + This will leave the old rid behind in catalog metadata and in the + rid->path mapping but remove it from all indexes. + + """ + self.drop_object_from_catalog_indexes(obj) + + path = '/'.join(obj.getPhysicalPath()) + del self.catalog.uids[path] + + self.catalog.catalogObject(obj, path) + + def delete_object(self, obj): + aq_parent(aq_inner(obj)).manage_delObjects([obj.getId()]) + + def delete_object_silenty(self, obj): + """Silently delete the object without firing any reindexing events.""" + + parent = aq_parent(aq_inner(obj)) + parent._delObject(obj.getId(), suppress_events=True) + + def make_missing_uuid_forward_index_entry(self, obj): + """Make catalog unhealthy by dropping an item from the forward index. + + + :param obj: the object for which the UUIDIndex will be set into an + inconsistent state. + + """ + rid = self.get_rid(obj) + + uuid_index = self.catalog.indexes['UID'] + uuid = uuid_index._unindex[rid] + del uuid_index._index[uuid] + uuid_index._length.change(-1) + + return obj + + def drop_object_from_catalog_indexes(self, obj): + """Make catalog unhealthy by dropping `obj` from all indexes.""" + + self.drop_rid_from_catalog_indexes(self.get_rid(obj)) + + def drop_rid_from_catalog_indexes(self, rid): + """Make catalog unhealthy by dropping `rid` from all indexes.""" + + for index in self.catalog.indexes.values(): + index.unindex_object(rid) diff --git a/ftw/catalogdoctor/tests/test_catalog_healthcheck.py b/ftw/catalogdoctor/tests/test_catalog_healthcheck.py new file mode 100644 index 0000000..833c300 --- /dev/null +++ b/ftw/catalogdoctor/tests/test_catalog_healthcheck.py @@ -0,0 +1,266 @@ +from ftw.builder import Builder +from ftw.builder import create +from ftw.catalogdoctor.tests import FunctionalTestCase +from ftw.catalogdoctor.tests import Mock +from ftw.catalogdoctor.tests import MockFormatter + + +class TestCatalogHealthCheck(FunctionalTestCase): + + def setUp(self): + super(TestCatalogHealthCheck, self).setUp() + + self.grant('Contributor') + self.folder = create(Builder('folder').titled(u'Foo')) + + def test_initial_catalog_is_healthy(self): + result = self.run_healthcheck() + + self.assertTrue(result.is_healthy()) + self.assertTrue(result.is_length_healthy()) + + def test_unhealthy_rids_make_catalog_unhealthy(self): + result = self.run_healthcheck() + self.assertTrue(result.is_healthy()) + + result.report_symptom('foo', self.choose_next_rid()) + self.assertFalse(result.is_healthy()) + + def test_longer_uids_make_catalog_unhealthy(self): + self.catalog.uids['foo'] = self.choose_next_rid() + + result = self.run_healthcheck() + self.assertFalse(result.is_length_healthy()) + + def test_longer_paths_make_catalog_unhealthy(self): + self.catalog.paths[self.choose_next_rid()] = 'foo' + + result = self.run_healthcheck() + self.assertFalse(result.is_length_healthy()) + + def test_longer_metadata_make_catalog_unhealthy(self): + self.catalog.data[self.choose_next_rid()] = dict() + + result = self.run_healthcheck() + self.assertFalse(result.is_length_healthy()) + + def test_extra_uid_index_make_catalog_unhealthy(self): + uid_index = self.catalog.indexes['UID'] + unhealthy_rid = self.choose_next_rid() + + mock = Mock() + mock.UID = 'foo' + uid_index.index_object(unhealthy_rid, mock) + + result = self.run_healthcheck() + self.assertFalse(result.is_length_healthy()) + + def test_missing_uid_index_make_catalog_unhealthy(self): + rid = self.get_rid(self.folder) + + uid_index = self.catalog.indexes['UID'] + uid_index.unindex_object(rid) + + result = self.run_healthcheck() + self.assertFalse(result.is_length_healthy()) + + def test_detects_duplicate_entry_in_rid_to_path_mapping_keys(self): + broken_rid = self.choose_next_rid() + self.catalog.paths[broken_rid] = self.get_physical_path(self.folder) + self.catalog._length.change(1) + + result = self.run_healthcheck() + + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.unhealthy_rids)) + self.assertEqual( + ( + 'in_paths_keys_not_in_metadata_keys', + 'in_paths_keys_not_in_uids_values', + 'uids_tuple_mismatches_paths_tuple', + ), + result.get_symptoms(broken_rid)) + + def test_detects_extra_entry_in_rid_to_path_mapping(self): + extra_rid = self.choose_next_rid() + self.catalog.paths[extra_rid] = '/foo' + self.catalog._length.change(1) + + result = self.run_healthcheck() + + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.unhealthy_rids)) + self.assertEqual( + ( + 'in_paths_keys_not_in_metadata_keys', + 'in_paths_keys_not_in_uids_values', + 'in_paths_values_not_in_uids_keys', + ), + result.get_symptoms(extra_rid)) + + def test_detects_missing_entry_in_rid_to_path_mapping_values(self): + path = self.get_physical_path(self.folder) + rid = self.catalog.uids.pop(path) + + result = self.run_healthcheck() + + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.unhealthy_rids)) + self.assertEqual( + ( + 'in_metadata_keys_not_in_uids_values', + 'in_paths_keys_not_in_uids_values', + 'in_paths_values_not_in_uids_keys', + 'in_uuid_index_not_in_catalog', + 'in_uuid_unindex_not_in_catalog', + ), + result.get_symptoms(rid)) + + def test_detects_duplicate_entry_in_path_to_rid_mapping(self): + rid = self.get_rid(self.folder) + self.catalog.uids['/some/other/path'] = rid + + result = self.run_healthcheck() + + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.unhealthy_rids)) + self.assertEqual( + ( + 'in_uids_keys_not_in_paths_values', + 'paths_tuple_mismatches_uids_tuple', + ), + result.get_symptoms(rid)) + + def test_detects_missing_entry_in_path_to_rid_mapping(self): + rid = self.get_rid(self.folder) + del self.catalog.paths[rid] + + result = self.run_healthcheck() + + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.unhealthy_rids)) + self.assertEqual( + ( + 'in_metadata_keys_not_in_paths_keys', + 'in_uids_keys_not_in_paths_values', + 'in_uids_values_not_in_paths_keys', + ), + result.get_symptoms(rid)) + + def test_detects_extra_entry_in_path_to_rid_mapping(self): + extra_rid = self.choose_next_rid() + self.catalog.uids['/foo'] = extra_rid + + result = self.run_healthcheck() + + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.unhealthy_rids)) + self.assertEqual( + ( + 'in_catalog_not_in_uuid_index', + 'in_catalog_not_in_uuid_unindex', + 'in_uids_keys_not_in_paths_values', + 'in_uids_values_not_in_metadata_keys', + 'in_uids_values_not_in_paths_keys', + ), + result.get_symptoms(extra_rid)) + + def test_detects_extra_entry_in_metadata(self): + extra_rid = self.choose_next_rid() + self.catalog.data[extra_rid] = dict() + + result = self.run_healthcheck() + + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.unhealthy_rids)) + self.assertEqual( + ( + 'in_metadata_keys_not_in_paths_keys', + 'in_metadata_keys_not_in_uids_values', + ), + result.get_symptoms(extra_rid)) + + def test_detects_extra_entry_in_uuid_index(self): + extra_rid = self.choose_next_rid() + uuid_index = self.catalog.indexes['UID'] + uuid_index._index['foo'] = extra_rid + + result = self.run_healthcheck() + + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.unhealthy_rids)) + self.assertEqual( + ( + 'in_uuid_index_not_in_catalog', + 'in_uuid_index_not_in_uuid_unindex', + ), + result.get_symptoms(extra_rid)) + + def test_detects_swapped_uuid_index_tuple(self): + folder_2 = create(Builder('folder').titled(u'Bar')) + + rid = self.get_rid(self.folder) + rid_2 = self.get_rid(folder_2) + uuid_index = self.catalog.indexes['UID'] + uuid = uuid_index._unindex[rid] + uuid_2 = uuid_index._unindex[rid_2] + uuid_index._index[uuid] = rid_2 + uuid_index._index[uuid_2] = rid + + result = self.run_healthcheck() + self.assertFalse(result.is_healthy()) + self.assertEqual(2, len(result.unhealthy_rids)) + self.assertEqual( + ( + 'uuid_index_tuple_mismatches_uuid_unindex_tuple', + 'uuid_unindex_tuple_mismatches_uuid_index_tuple', + ), + result.get_symptoms(rid)) + self.assertEqual( + ( + 'uuid_index_tuple_mismatches_uuid_unindex_tuple', + 'uuid_unindex_tuple_mismatches_uuid_index_tuple', + ), + result.get_symptoms(rid_2)) + + def test_detects_extra_rid_in_uuid_unindex(self): + extra_rid = self.choose_next_rid() + + uuid_index = self.catalog.indexes['UID'] + uuid_index._unindex[extra_rid] = 'qux' + + result = self.run_healthcheck() + + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.unhealthy_rids)) + self.assertEqual( + ( + 'in_uuid_unindex_not_in_catalog', + 'in_uuid_unindex_not_in_uuid_index', + ), + result.get_symptoms(extra_rid)) + + def test_logging(self): + extra_rid = self.choose_next_rid() + self.catalog.data[extra_rid] = dict() + + result = self.run_healthcheck() + formatter = MockFormatter() + result.write_result(formatter) + expected = [ + 'Catalog health check report:', + 'Inconsistent catalog length:', + ' claimed length: 1', + ' uids length: 1', + ' paths length: 1', + ' metadata length: 2', + ' uid index claimed length: 1', + ' uid index index length: 1', + ' uid index unindex length: 1', + 'Catalog data is unhealthy, found 1 unhealthy rids:', + 'rid 98 (--no path--):', + '\t- in_metadata_keys_not_in_paths_keys', + '\t- in_metadata_keys_not_in_uids_values', + '', + ] + self.assertEqual(expected, formatter.getlines()) diff --git a/ftw/catalogdoctor/tests/test_command.py b/ftw/catalogdoctor/tests/test_command.py new file mode 100644 index 0000000..0fe653d --- /dev/null +++ b/ftw/catalogdoctor/tests/test_command.py @@ -0,0 +1,164 @@ +from ftw.builder import Builder +from ftw.builder import create +from ftw.catalogdoctor.tests import FunctionalTestCase + + +class TestDoctorCommand(FunctionalTestCase): + + maxDiff = None + + def setUp(self): + super(TestDoctorCommand, self).setUp() + + self.grant('Contributor') + self.folder = create(Builder('folder').titled(u'Foo')) + + def test_healthcheck_healthy_catalog(self): + expected = [ + 'Catalog health check report:', + 'Catalog length is consistent at 1.', + 'Catalog data is healthy.'] + self.assertEqual(expected, self.run_command('doctor', 'healthcheck')) + + def test_healthcheck_unhealthy_catalog(self): + extra_rid = self.choose_next_rid() + self.catalog.data[extra_rid] = dict() + + expected = [ + 'Catalog health check report:', + 'Inconsistent catalog length:', + ' claimed length: 1', + ' uids length: 1', + ' paths length: 1', + ' metadata length: 2', + ' uid index claimed length: 1', + ' uid index index length: 1', + ' uid index unindex length: 1', + 'Catalog data is unhealthy, found 1 unhealthy rids:', + 'rid 98 (--no path--):', + '\t- in_metadata_keys_not_in_paths_keys', + '\t- in_metadata_keys_not_in_uids_values', + '', + ] + self.assertEqual(expected, self.run_command('doctor', 'healthcheck')) + + def test_surgery_healthy_catlog(self): + expected = [ + 'Catalog health check report:', + 'Catalog length is consistent at 1.', + 'Catalog data is healthy.', + 'Catalog is healthy, no surgery is needed.', + ] + self.assertEqual(expected, self.run_command('doctor', 'surgery')) + + def test_surgery_unhealthy_catalog_unfixable_problem(self): + extra_rid = self.choose_next_rid() + self.catalog.data[extra_rid] = dict() + + expected = [ + 'Catalog health check report:', + 'Inconsistent catalog length:', + ' claimed length: 1', + ' uids length: 1', + ' paths length: 1', + ' metadata length: 2', + ' uid index claimed length: 1', + ' uid index index length: 1', + ' uid index unindex length: 1', + 'Catalog data is unhealthy, found 1 unhealthy rids:', + 'rid {} (--no path--):'.format(extra_rid), + '\t- in_metadata_keys_not_in_paths_keys', + '\t- in_metadata_keys_not_in_uids_values', + '', + 'Performing surgery:', + 'The following unhealthy rids could not be fixed:', + 'rid {} (--no path--):'.format(extra_rid), + '\t- in_metadata_keys_not_in_paths_keys', + '\t- in_metadata_keys_not_in_uids_values', + '', + 'Not all health problems could be fixed, aborting.' + ] + self.assertEqual(expected, self.run_command('doctor', 'surgery')) + + def test_successful_surgery_unhealthy_catalog(self): + path = self.get_physical_path(self.folder) + rid = self.catalog.uids.pop(path) + # drop from uid index index, leave in unindex + uid_index = self.catalog.indexes['UID'] + uid_index.removeForwardIndexEntry(uid_index._unindex[rid], rid) + self.portal._delObject(self.folder.getId(), suppress_events=True) + + expected = [ + 'Catalog health check report:', + 'Inconsistent catalog length:', + ' claimed length: 1', + ' uids length: 0', + ' paths length: 1', + ' metadata length: 1', + ' uid index claimed length: 0', + ' uid index index length: 0', + ' uid index unindex length: 1', + 'Catalog data is unhealthy, found 1 unhealthy rids:', + 'rid {} (\'/plone/foo\'):'.format(rid), + '\t- in_metadata_keys_not_in_uids_values', + '\t- in_paths_keys_not_in_uids_values', + '\t- in_paths_values_not_in_uids_keys', + '\t- in_uuid_unindex_not_in_catalog', + '\t- in_uuid_unindex_not_in_uuid_index', + '', + 'Performing surgery:', + 'rid {} (\'/plone/foo\'):'.format(rid), + '\t- Removed rid from all catalog indexes.', + '\t- Removed rid from paths (the rid->path mapping).', + '\t- Removed rid from catalog metadata.', + '', + 'Performing post-surgery healthcheck:', + 'Catalog health check report:', + 'Catalog length is consistent at 0.', + 'Catalog data is healthy.', + 'Surgery was successful, known health problems could be fixed!', + ] + self.assertEqual(expected, self.run_command('doctor', 'surgery')) + + def test_successful_surgery_unhealthy_catalog_dryrun(self): + path = self.get_physical_path(self.folder) + rid = self.catalog.uids.pop(path) + # drop from uid index index, leave in unindex + uid_index = self.catalog.indexes['UID'] + uid_index.removeForwardIndexEntry(uid_index._unindex[rid], rid) + self.portal._delObject(self.folder.getId(), suppress_events=True) + + expected = [ + 'Performing dryrun!', + '', + 'Catalog health check report:', + 'Inconsistent catalog length:', + ' claimed length: 1', + ' uids length: 0', + ' paths length: 1', + ' metadata length: 1', + ' uid index claimed length: 0', + ' uid index index length: 0', + ' uid index unindex length: 1', + 'Catalog data is unhealthy, found 1 unhealthy rids:', + 'rid {} (\'/plone/foo\'):'.format(rid), + '\t- in_metadata_keys_not_in_uids_values', + '\t- in_paths_keys_not_in_uids_values', + '\t- in_paths_values_not_in_uids_keys', + '\t- in_uuid_unindex_not_in_catalog', + '\t- in_uuid_unindex_not_in_uuid_index', + '', + 'Performing surgery:', + 'rid {} (\'/plone/foo\'):'.format(rid), + '\t- Removed rid from all catalog indexes.', + '\t- Removed rid from paths (the rid->path mapping).', + '\t- Removed rid from catalog metadata.', + '', + 'Performing post-surgery healthcheck:', + 'Catalog health check report:', + 'Catalog length is consistent at 0.', + 'Catalog data is healthy.', + 'Surgery would have been successful, but was aborted due to ' + 'dryrun!', + ] + self.assertEqual(expected, self.run_command('doctor', '-n', 'surgery')) diff --git a/ftw/catalogdoctor/tests/test_profiles.py b/ftw/catalogdoctor/tests/test_profiles.py deleted file mode 100644 index 140f97f..0000000 --- a/ftw/catalogdoctor/tests/test_profiles.py +++ /dev/null @@ -1,7 +0,0 @@ -from ftw.catalogdoctor.tests import FunctionalTestCase - - -class TestDefaultProfile(FunctionalTestCase): - - def test_installed(self): - pass diff --git a/ftw/catalogdoctor/tests/test_selftest.py b/ftw/catalogdoctor/tests/test_selftest.py new file mode 100644 index 0000000..88a4859 --- /dev/null +++ b/ftw/catalogdoctor/tests/test_selftest.py @@ -0,0 +1,293 @@ +from datetime import date +from ftw.builder import Builder +from ftw.builder import create +from ftw.catalogdoctor.compat import DateRecurringIndex +from ftw.catalogdoctor.tests import FunctionalTestCase +from ftw.catalogdoctor.utils import find_keys_pointing_to_rid +from plone.app.folder.nogopip import GopipIndex +from Products.ExtendedPathIndex.ExtendedPathIndex import ExtendedPathIndex +from Products.PluginIndexes.BooleanIndex.BooleanIndex import BooleanIndex +from Products.PluginIndexes.DateIndex.DateIndex import DateIndex +from Products.PluginIndexes.DateRangeIndex.DateRangeIndex import DateRangeIndex +from Products.PluginIndexes.FieldIndex.FieldIndex import FieldIndex +from Products.PluginIndexes.KeywordIndex.KeywordIndex import KeywordIndex +from Products.PluginIndexes.UUIDIndex.UUIDIndex import UUIDIndex +from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex + + +class TestSelftest(FunctionalTestCase): + """Selftest for functional test case helpers.""" + + maxDiff = None + + def setUp(self): + super(TestSelftest, self).setUp() + + self.grant('Contributor') + self.parent = create(Builder('folder') + .titled(u'parent')) + self.child = create(Builder('folder') + .within(self.parent) + .titled(u'child')) + + # pretend we are something that supports recurring dates + self.child.start = date(2010, 1, 1) + self.child.recurrence = 'FREQ=DAILY;INTERVAL=1;COUNT=5' + self.child.isPrincipiaFolderish = False + self.reindex_object(self.child) + + def test_make_unhealthy_extra_rid_after_move(self): + """Selftest that broken rids are created correctly. + + Apparently the problem surfaces only with plone < 5. + + Document in what way the catalog is broken when an extra rid is + created. This has been verified against productive deployments + where this issue is present. + + """ + self.make_unhealthy_extra_rid_after_move(self.child) + + result = self.run_healthcheck() + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.unhealthy_rids)) + extra_rid = result.get_unhealthy_rids()[0].rid + + self.assertTrue(extra_rid in self.catalog.paths) + self.assertFalse(extra_rid in self.catalog.uids.values()) + + uuid_index = self.catalog.indexes['UID'] + self.assertTrue(extra_rid in uuid_index._unindex) + self.assertFalse(extra_rid in uuid_index._index.values()) + + for name, index in self.catalog.indexes.items(): + # Purposefully don't use isinstance to avoid being bitten by + # subclasses that change how the index behaves or add additional + # internal data structures which we would not cover here, e.g.: + # `ExtendedPathIndex` v.s. `PathIndex`. + + if index.__class__ in (FieldIndex, DateIndex, + DateRecurringIndex, KeywordIndex): + # These indices seem to consistently contain the extra rid, i.e. + # it is left behind in the forward index and also in the + # backward indices. + rows_with_rid = find_keys_pointing_to_rid( + index._index, extra_rid) + if rows_with_rid: + self.assertIn(extra_rid, index._unindex) + if extra_rid in index._unindex: + self.assertGreaterEqual(len(rows_with_rid), 1) + + elif index.__class__ == ZCTextIndex: + # Our broken object test provides values for all ZCTextIndex + # indices. All ZCTextIndex indices that contain the extra rid + # seem to contain it consistently. + self.assertTrue(index.index.has_doc(extra_rid)) + + elif index.__class__ == UUIDIndex: + # We expect only one UUIDIndex and we have already handled it + # explicitly above + if name != 'UID': + self.fail('Unexpected uuid index: {}'.format(index)) + + elif index.__class__ == DateRangeIndex: + # The index seems to be consistent, forward and backward + # indices contain the extra rid. + self.assertTrue(extra_rid in index._unindex) + self.assertTrue(any(( + # _always: [rid] + extra_rid in index._always, + # all other extra indices provide: {date: [rid]} + find_keys_pointing_to_rid(index._since_only, extra_rid), + find_keys_pointing_to_rid(index._until_only, extra_rid), + find_keys_pointing_to_rid(index._since, extra_rid), + find_keys_pointing_to_rid(index._until, extra_rid), + ))) + + elif index.__class__ == BooleanIndex: + # The index seems to be consistent, forward and backward + # indices contain the extra rid. + self.assertIn(extra_rid, index._unindex) + if index._unindex[extra_rid] == index._index_value: + self.assertIn(extra_rid, index._index) + + elif index.__class__ == ExtendedPathIndex: + # The index seems to be consistent, forward and backward + # indices contain the extra rid. + # _unindex: {rid: path} + self.assertIn(extra_rid, index._unindex) + # _index_items: {path: rid} + self.assertIn(extra_rid, index._index_items.values()) + # _index_parents: {path: [rid]} (path to rid of children) + paths_with_rid_as_child = find_keys_pointing_to_rid( + index._index_parents, extra_rid) + self.assertEqual(1, len(paths_with_rid_as_child)) + # _index: {component: {level: [rid]}} (component to level to rid) + components_with_rid = [component for component, level_to_rid in index._index.items() + if any(extra_rid in rids for level, rids in level_to_rid.items())] + self.assertGreaterEqual(len(components_with_rid), 1) + + elif index.__class__ == GopipIndex: + # This isn't a real index. + pass + + else: + self.fail('Unhandled index type: {}'.format(index)) + + self.assertEqual( + ( + 'in_metadata_keys_not_in_uids_values', + 'in_paths_keys_not_in_uids_values', + 'in_uuid_unindex_not_in_catalog', + 'in_uuid_unindex_not_in_uuid_index', + 'uids_tuple_mismatches_paths_tuple', + ), + result.get_symptoms(extra_rid)) + + def test_make_orphaned_rid(self): + self.make_orphaned_rid(self.child) + + result = self.run_healthcheck() + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.unhealthy_rids)) + orphaned_rid = result.get_unhealthy_rids()[0].rid + + self.assertTrue(orphaned_rid in self.catalog.paths) + self.assertFalse(orphaned_rid in self.catalog.uids.values()) + + uuid_index = self.catalog.indexes['UID'] + self.assertTrue(orphaned_rid in uuid_index._unindex) + self.assertFalse(orphaned_rid in uuid_index._index.values()) + + for name, index in self.catalog.indexes.items(): + # Purposefully don't use isinstance to avoid being bitten by + # subclasses that change how the index behaves or add additional + # internal data structures which we would not cover here, e.g.: + # `ExtendedPathIndex` v.s. `PathIndex`. + + if index.__class__ in (FieldIndex, DateIndex, + DateRecurringIndex, KeywordIndex): + # These indices seem to consistently contain the orphaned rid, + # i.e. it is left behind in the forward index and also in the + # backward indices. + rows_with_rid = find_keys_pointing_to_rid( + index._index, orphaned_rid) + if rows_with_rid: + self.assertIn(orphaned_rid, index._unindex) + if orphaned_rid in index._unindex: + self.assertGreaterEqual(len(rows_with_rid), 1) + + elif index.__class__ == ZCTextIndex: + # Our broken object test provides values for all ZCTextIndex + # indices. All ZCTextIndex indices that contain the extra rid + # seem to contain it consistently. + self.assertTrue(index.index.has_doc(orphaned_rid)) + + elif index.__class__ == UUIDIndex: + # We expect only one UUIDIndex and we have already handled it + # explicitly above + if name != 'UID': + self.fail('Unexpected uuid index: {}'.format(index)) + + elif index.__class__ == DateRangeIndex: + # The index seems to be consistent, forward and backward + # indices contain the orphaned rid. + self.assertTrue(orphaned_rid in index._unindex) + self.assertTrue(any(( + # _always: [rid] + orphaned_rid in index._always, + # all other extra indices provide: {date: [rid]} + find_keys_pointing_to_rid(index._since_only, orphaned_rid), + find_keys_pointing_to_rid(index._until_only, orphaned_rid), + find_keys_pointing_to_rid(index._since, orphaned_rid), + find_keys_pointing_to_rid(index._until, orphaned_rid), + ))) + + elif index.__class__ == BooleanIndex: + # The index seems to be consistent, forward and backward + # indices contain the orphaned rid. + self.assertIn(orphaned_rid, index._unindex) + if index._unindex[orphaned_rid] == index._index_value: + self.assertIn(orphaned_rid, index._index) + + elif index.__class__ == ExtendedPathIndex: + # The index seems to be consistent, forward and backward + # indices contain the orphaned rid. + # _unindex: {rid: path} + self.assertIn(orphaned_rid, index._unindex) + # _index_items: {path: rid} + self.assertIn(orphaned_rid, index._index_items.values()) + # _index_parents: {path: [rid]} (path to rid of children) + paths_with_rid_as_child = find_keys_pointing_to_rid( + index._index_parents, orphaned_rid) + self.assertEqual(1, len(paths_with_rid_as_child)) + # _index: {component: {level: [rid]}} (component to level to rid) + components_with_rid = [component for component, level_to_rid in index._index.items() + if any(orphaned_rid in rids for level, rids in level_to_rid.items())] + self.assertGreaterEqual(len(components_with_rid), 1) + + elif index.__class__ == GopipIndex: + # This isn't a real index. + pass + + else: + self.fail('Unhandled index type: {}'.format(index)) + + self.assertEqual( + ( + 'in_metadata_keys_not_in_uids_values', + 'in_paths_keys_not_in_uids_values', + 'in_paths_values_not_in_uids_keys', + 'in_uuid_unindex_not_in_catalog', + 'in_uuid_unindex_not_in_uuid_index', + ), + result.get_symptoms(orphaned_rid)) + + def test_make_missing_uuid_forward_index_entry(self): + self.make_missing_uuid_forward_index_entry(self.parent) + + result = self.run_healthcheck() + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.unhealthy_rids)) + rid = result.get_unhealthy_rids()[0].rid + + self.assertTrue(rid in self.catalog.paths) + self.assertTrue(rid in self.catalog.uids.values()) + + uuid_index = self.catalog.indexes['UID'] + self.assertTrue(rid in uuid_index._unindex) + self.assertFalse(rid in uuid_index._index.values()) + self.assertEqual(1, uuid_index._length()) + + self.assertEqual( + ( + 'in_catalog_not_in_uuid_index', + 'in_uuid_unindex_not_in_uuid_index', + ), + result.get_symptoms(rid)) + + def test_drop_object_from_catalog_indexes(self): + rid = self.get_rid(self.child) + self.drop_object_from_catalog_indexes(self.child) + + result = self.run_healthcheck() + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.unhealthy_rids)) + self.assertEqual(rid, result.get_unhealthy_rids()[0].rid) + self.assertTrue(rid in self.catalog.paths) + self.assertTrue(rid in self.catalog.uids.values()) + + uuid_index = self.catalog.indexes['UID'] + self.assertFalse(rid in uuid_index._unindex) + self.assertFalse(rid in uuid_index._index.values()) + self.assertEqual(1, uuid_index._length()) + + for index in self.catalog.indexes.values(): + self.assertFalse(index.getEntryForObject(rid)) + + self.assertEqual( + ( + 'in_catalog_not_in_uuid_index', + 'in_catalog_not_in_uuid_unindex', + ), + result.get_symptoms(rid)) diff --git a/ftw/catalogdoctor/tests/test_surgery.py b/ftw/catalogdoctor/tests/test_surgery.py new file mode 100644 index 0000000..d61b07a --- /dev/null +++ b/ftw/catalogdoctor/tests/test_surgery.py @@ -0,0 +1,293 @@ +from ftw.builder import Builder +from ftw.builder import create +from ftw.catalogdoctor.exceptions import CantPerformSurgery +from ftw.catalogdoctor.surgery import CatalogDoctor +from ftw.catalogdoctor.surgery import ReindexMissingUUID +from ftw.catalogdoctor.surgery import RemoveExtraRid +from ftw.catalogdoctor.surgery import RemoveOrphanedRid +from ftw.catalogdoctor.surgery import RemoveRidOrReindexObject +from ftw.catalogdoctor.tests import FunctionalTestCase + + +class TestSurgery(FunctionalTestCase): + """Test various surgery can be performed successfully.""" + + maxDiff = None + + def setUp(self): + super(TestSurgery, self).setUp() + + self.grant('Contributor') + self.parent = create(Builder('folder') + .titled(u'parent')) + self.child = create(Builder('folder') + .within(self.parent) + .titled(u'child')) + + def test_surgery_remove_extra_rid_with_partial_uuid(self): + self.make_unhealthy_extra_rid_after_move(self.child) + + result = self.run_healthcheck() + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.get_unhealthy_rids())) + unhealthy_rid = result.get_unhealthy_rids()[0] + + self.assertEqual( + ( + 'in_metadata_keys_not_in_uids_values', + 'in_paths_keys_not_in_uids_values', + 'in_uuid_unindex_not_in_catalog', + 'in_uuid_unindex_not_in_uuid_index', + 'uids_tuple_mismatches_paths_tuple', + ), + result.get_symptoms(unhealthy_rid.rid)) + + doctor = CatalogDoctor(self.catalog, unhealthy_rid) + self.assertIs(RemoveExtraRid, doctor.get_surgery()) + doctor.perform_surgery() + + result = self.run_healthcheck() + self.assertTrue(result.is_healthy()) + + def test_surgery_remove_extra_rid_without_partial_uuid(self): + self.recatalog_object_with_new_rid(self.child) + + result = self.run_healthcheck() + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.get_unhealthy_rids())) + unhealthy_rid = result.get_unhealthy_rids()[0] + + self.assertEqual( + ( + 'in_metadata_keys_not_in_uids_values', + 'in_paths_keys_not_in_uids_values', + 'uids_tuple_mismatches_paths_tuple', + ), + unhealthy_rid.catalog_symptoms) + + doctor = CatalogDoctor(self.catalog, unhealthy_rid) + self.assertIs(RemoveExtraRid, doctor.get_surgery()) + doctor.perform_surgery() + + result = self.run_healthcheck() + self.assertTrue(result.is_healthy()) + + def test_surgery_remove_orphaned_rid_not_in_indexes(self): + path = '/'.join(self.child.getPhysicalPath()) + self.drop_object_from_catalog_indexes(self.child) + self.delete_object_silenty(self.child) + del self.catalog.uids[path] + + result = self.run_healthcheck() + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.get_unhealthy_rids())) + unhealthy_rid = result.get_unhealthy_rids()[0] + + self.assertEqual( + ( + 'in_metadata_keys_not_in_uids_values', + 'in_paths_keys_not_in_uids_values', + 'in_paths_values_not_in_uids_keys', + ), + result.get_symptoms(unhealthy_rid.rid)) + + doctor = CatalogDoctor(self.catalog, unhealthy_rid) + self.assertIs(RemoveOrphanedRid, doctor.get_surgery()) + doctor.perform_surgery() + + result = self.run_healthcheck() + self.assertTrue(result.is_healthy()) + + def test_surgery_remove_orphaned_rid_in_indexes(self): + self.make_orphaned_rid(self.child) + + result = self.run_healthcheck() + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.get_unhealthy_rids())) + unhealthy_rid = result.get_unhealthy_rids()[0] + + self.assertEqual( + ( + 'in_metadata_keys_not_in_uids_values', + 'in_paths_keys_not_in_uids_values', + 'in_paths_values_not_in_uids_keys', + 'in_uuid_unindex_not_in_catalog', + 'in_uuid_unindex_not_in_uuid_index', + ), + result.get_symptoms(unhealthy_rid.rid)) + + doctor = CatalogDoctor(self.catalog, unhealthy_rid) + self.assertIs(RemoveOrphanedRid, doctor.get_surgery()) + doctor.perform_surgery() + + result = self.run_healthcheck() + self.assertTrue(result.is_healthy()) + + def test_surgery_reindex_missing_uuid(self): + self.make_missing_uuid_forward_index_entry(self.child) + + result = self.run_healthcheck() + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.get_unhealthy_rids())) + unhealthy_rid = result.get_unhealthy_rids()[0] + + self.assertEqual( + ( + 'in_catalog_not_in_uuid_index', + 'in_uuid_unindex_not_in_uuid_index', + ), + result.get_symptoms(unhealthy_rid.rid)) + + doctor = CatalogDoctor(self.catalog, unhealthy_rid) + self.assertIs(ReindexMissingUUID, doctor.get_surgery()) + doctor.perform_surgery() + + result = self.run_healthcheck() + self.assertTrue(result.is_healthy()) + + def test_surgery_remove_object_moved_into_parent_and_found_via_acquisition_abort(self): + self.parent['qux'] = self.child + broken_path = '/'.join(self.child.getPhysicalPath()[:-1] + ('qux',)) + + rid = self.choose_next_rid() + self.catalog.uids[broken_path] = rid + self.catalog.paths[rid] = broken_path + self.catalog.data[rid] = {} + self.catalog._length.change(1) + + result = self.run_healthcheck() + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.get_unhealthy_rids())) + unhealthy_rid = result.get_unhealthy_rids()[0] + self.assertEqual(rid, unhealthy_rid.rid) + self.assertEqual( + ( + 'in_catalog_not_in_uuid_index', + 'in_catalog_not_in_uuid_unindex', + ), + result.get_symptoms(unhealthy_rid.rid)) + + doctor = CatalogDoctor(self.catalog, unhealthy_rid) + self.assertIs(RemoveRidOrReindexObject, doctor.get_surgery()) + with self.assertRaises(CantPerformSurgery): + doctor.perform_surgery() + + def test_surgery_remove_object_moved_into_parent_and_found_via_acquisition(self): + grandchild = create(Builder('folder') + .within(self.child) + .titled(u'nastygrandchild')) + + old_grandchild_path = '/'.join(grandchild.getPhysicalPath()) + # move object into parent's parent + self.parent.manage_pasteObjects( + self.child.manage_cutObjects(grandchild.getId()), + ) + + # re-register old grandchild path with different rid + rid = self.choose_next_rid() + self.catalog.uids[old_grandchild_path] = rid + self.catalog.paths[rid] = old_grandchild_path + self.catalog.data[rid] = {} + self.catalog._length.change(1) + + result = self.run_healthcheck() + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.get_unhealthy_rids())) + unhealthy_rid = result.get_unhealthy_rids()[0] + self.assertEqual(rid, unhealthy_rid.rid) + self.assertEqual( + ( + 'in_catalog_not_in_uuid_index', + 'in_catalog_not_in_uuid_unindex', + ), + result.get_symptoms(unhealthy_rid.rid)) + + doctor = CatalogDoctor(self.catalog, unhealthy_rid) + self.assertIs(RemoveRidOrReindexObject, doctor.get_surgery()) + doctor.perform_surgery() + + result = self.run_healthcheck() + self.assertTrue(result.is_healthy()) + + def test_surgery_add_dropped_object_to_indices(self): + self.drop_object_from_catalog_indexes(self.parent) + self.assertEqual( + {}, self.get_catalog_indexdata(self.parent, omit_empty=True)) + + result = self.run_healthcheck() + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.get_unhealthy_rids())) + unhealthy_rid = result.get_unhealthy_rids()[0] + self.assertEqual( + ( + 'in_catalog_not_in_uuid_index', + 'in_catalog_not_in_uuid_unindex', + ), + result.get_symptoms(unhealthy_rid.rid)) + + doctor = CatalogDoctor(self.catalog, unhealthy_rid) + self.assertIs(RemoveRidOrReindexObject, doctor.get_surgery()) + doctor.perform_surgery() + + result = self.run_healthcheck() + self.assertTrue(result.is_healthy()) + + self.assertDictContainsSubset( + {'Creator': 'test_user_1_', + 'Description': [], + 'SearchableText': ['parent', 'parent'], + 'Subject': '', + 'Title': ['parent'], + 'Type': u'Folder', + 'allowedRolesAndUsers': ['Anonymous'], + 'cmf_uid': '', + 'commentators': '', + 'effectiveRange': (-1560, None), + 'getId': 'parent', + 'getObjPositionInParent': [], + 'getRawRelatedItems': '', + 'id': 'parent', + 'in_reply_to': '', + 'is_default_page': 0, + 'is_folderish': 1, + 'meta_type': 'Dexterity Container', + 'path': '/plone/parent', + 'portal_type': 'Folder', + 'review_state': '', + 'sortable_title': 'parent', + 'start': '', + 'sync_uid': '', + 'total_comments': 0}, + self.get_catalog_indexdata(self.parent)) + + def test_surgery_remove_untraversable_object_from_catalog(self): + rid = self.get_rid(self.child) + self.drop_object_from_catalog_indexes(self.child) + self.delete_object_silenty(self.child) + + self.assertEqual(2, len(self.catalog)) + self.assertIn(rid, self.catalog.paths) + self.assertIn(rid, self.catalog.data) + + result = self.run_healthcheck() + self.assertFalse(result.is_healthy()) + self.assertEqual(1, len(result.get_unhealthy_rids())) + unhealthy_rid = result.get_unhealthy_rids()[0] + self.assertEqual(rid, unhealthy_rid.rid) + self.assertEqual( + ( + 'in_catalog_not_in_uuid_index', + 'in_catalog_not_in_uuid_unindex', + ), + unhealthy_rid.catalog_symptoms) + + doctor = CatalogDoctor(self.catalog, unhealthy_rid) + self.assertIs(RemoveRidOrReindexObject, doctor.get_surgery()) + doctor.perform_surgery() + + self.assertEqual(1, len(self.catalog)) + self.assertNotIn(rid, self.catalog.paths) + self.assertNotIn(rid, self.catalog.data) + + result = self.run_healthcheck() + self.assertTrue(result.is_healthy()) diff --git a/ftw/catalogdoctor/tests/test_surgery_remove_from_boolean_index.py b/ftw/catalogdoctor/tests/test_surgery_remove_from_boolean_index.py new file mode 100644 index 0000000..75ed2d8 --- /dev/null +++ b/ftw/catalogdoctor/tests/test_surgery_remove_from_boolean_index.py @@ -0,0 +1,95 @@ +from ftw.builder import Builder +from ftw.builder import create +from ftw.catalogdoctor.surgery import RemoveFromBooleanIndex +from ftw.catalogdoctor.tests import FunctionalTestCase + + +class TestRemoveFromBooleanIndex(FunctionalTestCase): + + def setUp(self): + super(TestRemoveFromBooleanIndex, self).setUp() + + self.grant('Contributor') + self.folder = create(Builder('folder').titled(u'Foo')) + self.folder.isPrincipiaFolderish = False + self.reindex_object(self.folder) + + def test_remove_from_boolean_index(self): + rid = self.get_rid(self.folder) + index = self.catalog.indexes['is_folderish'] + + self.assertIn(rid, index._index) + self.assertIn(rid, index._unindex) + self.assertEqual(1, len(index._index)) + self.assertEqual(1, index._index_length.value) + self.assertEqual(1, len(index._unindex)) + # off by one. not sure what is happening here. not observed + # in production. maybe test-setup related? + self.assertEqual(2, index._length.value) + + surgery = RemoveFromBooleanIndex(index, rid) + surgery.perform() + + self.assertNotIn(rid, index._index) + self.assertNotIn(rid, index._unindex) + self.assertEqual(0, len(index._index)) + self.assertEqual(0, index._index_length.value) + self.assertEqual(0, len(index._unindex)) + # off by one. not entirely sure what is happening here. not observed + # in production. maybe test-setup related? + self.assertEqual(1, index._length.value) + + def test_remove_from_forward_index_only(self): + rid = self.get_rid(self.folder) + index = self.catalog.indexes['is_folderish'] + + # remove entry from reverse index + del index._unindex[rid] + index._length.change(-1) + + self.assertIn(rid, index._index) + self.assertEqual(1, len(index._index)) + self.assertEqual(1, index._index_length.value) + self.assertEqual(0, len(index._unindex)) + # off by one. not sure what is happening here. not observed + # in production. maybe test-setup related? + self.assertEqual(1, index._length.value) + + surgery = RemoveFromBooleanIndex(index, rid) + surgery.perform() + + self.assertNotIn(rid, index._index) + self.assertNotIn(rid, index._unindex) + self.assertEqual(0, len(index._index)) + self.assertEqual(0, index._index_length.value) + self.assertEqual(0, len(index._unindex)) + # off by one. not entirely sure what is happening here. not observed + # in production. maybe test-setup related? + self.assertEqual(1, index._length.value) + + def test_remove_from_reverse_index_only(self): + rid = self.get_rid(self.folder) + index = self.catalog.indexes['is_folderish'] + + # remove entry from forward index + index._index.remove(rid) + index._index_length.change(-1) + + self.assertEqual(0, len(index._index)) + self.assertEqual(0, index._index_length.value) + self.assertEqual(1, len(index._unindex)) + # off by one. not sure what is happening here. not observed + # in production. maybe test-setup related? + self.assertEqual(2, index._length.value) + + surgery = RemoveFromBooleanIndex(index, rid) + surgery.perform() + + self.assertNotIn(rid, index._index) + self.assertNotIn(rid, index._unindex) + self.assertEqual(0, len(index._index)) + self.assertEqual(0, index._index_length.value) + self.assertEqual(0, len(index._unindex)) + # off by one. not entirely sure what is happening here. not observed + # in production. maybe test-setup related? + self.assertEqual(1, index._length.value) diff --git a/ftw/catalogdoctor/tests/test_surgery_remove_from_date_range_index.py b/ftw/catalogdoctor/tests/test_surgery_remove_from_date_range_index.py new file mode 100644 index 0000000..390884c --- /dev/null +++ b/ftw/catalogdoctor/tests/test_surgery_remove_from_date_range_index.py @@ -0,0 +1,131 @@ +from datetime import date +from ftw.builder import Builder +from ftw.builder import create +from ftw.catalogdoctor.surgery import RemoveFromDateRangeIndex +from ftw.catalogdoctor.tests import FunctionalTestCase +from ftw.catalogdoctor.utils import find_keys_pointing_to_rid + + +class TestRemoveFromDateRangeIndex(FunctionalTestCase): + + def setUp(self): + super(TestRemoveFromDateRangeIndex, self).setUp() + + self.grant('Contributor') + self.folder = create(Builder('folder').titled(u'Foo')) + + def set_effective_range(self, start, end): + if start is None: + # bypass getter fallback to FLOOR_DATE + self.folder.effective = None + else: + self.folder.effective_date = start + + if end is None: + # bypass getter fallback to CEILING_DATE + self.folder.expires = None + else: + self.folder.expiration_date = end + self.reindex_object(self.folder) + + def test_index_attributes_configured_as_expected_by_tests(self): + index = self.catalog.indexes['effectiveRange'] + self.assertEqual('effective', index._since_field) + self.assertEqual('expires', index._until_field) + + def test_remove_from_daterange_index_since_only(self): + self.set_effective_range(date(2010, 1, 1), None) + + rid = self.get_rid(self.folder) + index = self.catalog.indexes['effectiveRange'] + + self.assertNotIn(rid, index._always) + self.assertEqual( + 1, len(find_keys_pointing_to_rid(index._since_only, rid))) + self.assertEqual( + 0, len(find_keys_pointing_to_rid(index._until_only, rid))) + self.assertEqual( + 0, len(find_keys_pointing_to_rid(index._since, rid))) + self.assertEqual( + 0, len(find_keys_pointing_to_rid(index._until, rid))) + self.assertIn(rid, index._unindex) + + surgery = RemoveFromDateRangeIndex(index, rid) + surgery.perform() + + self.assertEqual( + 0, len(find_keys_pointing_to_rid(index._since_only, rid))) + self.assertNotIn(rid, index._unindex) + + def test_remove_from_daterange_index_until_only(self): + self.set_effective_range(None, date(2016, 12, 1)) + + rid = self.get_rid(self.folder) + index = self.catalog.indexes['effectiveRange'] + + self.assertNotIn(rid, index._always) + self.assertEqual( + 0, len(find_keys_pointing_to_rid(index._since_only, rid))) + self.assertEqual( + 1, len(find_keys_pointing_to_rid(index._until_only, rid))) + self.assertEqual( + 0, len(find_keys_pointing_to_rid(index._since, rid))) + self.assertEqual( + 0, len(find_keys_pointing_to_rid(index._until, rid))) + self.assertIn(rid, index._unindex) + + surgery = RemoveFromDateRangeIndex(index, rid) + surgery.perform() + + self.assertEqual( + 0, len(find_keys_pointing_to_rid(index._until_only, rid))) + self.assertNotIn(rid, index._unindex) + + def test_remove_from_daterange_index_since_until(self): + self.set_effective_range(date(2010, 7, 7), date(2016, 9, 13)) + + rid = self.get_rid(self.folder) + index = self.catalog.indexes['effectiveRange'] + + self.assertNotIn(rid, index._always) + self.assertEqual( + 0, len(find_keys_pointing_to_rid(index._since_only, rid))) + self.assertEqual( + 0, len(find_keys_pointing_to_rid(index._until_only, rid))) + self.assertEqual( + 1, len(find_keys_pointing_to_rid(index._since, rid))) + self.assertEqual( + 1, len(find_keys_pointing_to_rid(index._until, rid))) + self.assertIn(rid, index._unindex) + + surgery = RemoveFromDateRangeIndex(index, rid) + surgery.perform() + + self.assertEqual( + 0, len(find_keys_pointing_to_rid(index._since, rid))) + self.assertEqual( + 0, len(find_keys_pointing_to_rid(index._until, rid))) + self.assertNotIn(rid, index._unindex) + + def test_remove_from_daterange_index_always(self): + self.set_effective_range(None, None) + + rid = self.get_rid(self.folder) + index = self.catalog.indexes['effectiveRange'] + + self.assertIn(rid, index._always) + self.assertEqual( + 0, len(find_keys_pointing_to_rid(index._since_only, rid))) + self.assertEqual( + 0, len(find_keys_pointing_to_rid(index._until_only, rid))) + self.assertEqual( + 0, len(find_keys_pointing_to_rid(index._since, rid))) + self.assertEqual( + 0, len(find_keys_pointing_to_rid(index._until, rid))) + self.assertIn(rid, index._unindex) + + surgery = RemoveFromDateRangeIndex(index, rid) + surgery.perform() + + self.assertNotIn(rid, index._always) + self.assertNotIn(rid, index._unindex) diff --git a/ftw/catalogdoctor/tests/test_surgery_remove_from_extended_path_index.py b/ftw/catalogdoctor/tests/test_surgery_remove_from_extended_path_index.py new file mode 100644 index 0000000..cb71979 --- /dev/null +++ b/ftw/catalogdoctor/tests/test_surgery_remove_from_extended_path_index.py @@ -0,0 +1,121 @@ +from ftw.builder import Builder +from ftw.builder import create +from ftw.catalogdoctor.surgery import RemoveFromExtendedPathIndex +from ftw.catalogdoctor.tests import FunctionalTestCase +from ftw.catalogdoctor.utils import find_keys_pointing_to_rid + + +class TestRemoveFromExtendedPathIndex(FunctionalTestCase): + + def setUp(self): + super(TestRemoveFromExtendedPathIndex, self).setUp() + + self.grant('Contributor') + self.folder = create(Builder('folder').titled(u'Foo')) + self.child_folder = create(Builder('folder').titled(u'Child').within(self.folder)) + self.sibling_folder = create(Builder('folder').titled(u'Sibling')) + + def test_remove_parent_from_extended_path_index(self): + rid = self.get_rid(self.folder) + index = self.catalog.indexes['path'] + + # _index + self.assertIn(rid, index._index['plone'][0]) # plone site at level 0 + self.assertIn(rid, index._index['foo'][1]) # object id at level 1 + self.assertIn(rid, index._index[None][1]) # terminator at level 1 + self.assertEqual(5, len(index._index)) # 3 objects, plone, terminator + # _index_items + items_pointing_to_rid = find_keys_pointing_to_rid( + index._index_items, rid) + self.assertEqual(1, len(items_pointing_to_rid)) + self.assertEqual(3, len(index._index_items)) + # _index_parents + parents_pointing_to_rid = find_keys_pointing_to_rid( + index._index_parents, rid) + self.assertEqual(1, len(parents_pointing_to_rid)) + self.assertEqual(2, len(index._index_parents)) + # _unindex + self.assertIn(rid, index._unindex) + self.assertEqual(3, len(index._unindex)) + # index stats + self.assertEqual(3, len(index)) + + surgery = RemoveFromExtendedPathIndex(index, rid) + surgery.perform() + + # _index + self.assertNotIn(rid, index._index['plone'][0]) + # foo itself remains as it has children + self.assertNotIn(rid, index._index['foo'][1]) + self.assertNotIn(rid, index._index[None][1]) + # length remains as we removed an object with children + self.assertEqual(5, len(index._index)) + # _index_items + items_pointing_to_rid = find_keys_pointing_to_rid( + index._index_items, rid) + self.assertEqual(0, len(items_pointing_to_rid)) + self.assertEqual(2, len(index._index_items)) + # _index_parents + parents_pointing_to_rid = find_keys_pointing_to_rid( + index._index_parents, rid) + self.assertEqual(0, len(parents_pointing_to_rid)) + self.assertEqual(2, len(index._index_parents)) + # _unindex + self.assertNotIn(rid, index._unindex) + self.assertEqual(2, len(index._unindex)) + # index stats + self.assertEqual(2, len(index)) + + def test_remove_child_from_extended_path_index(self): + rid = self.get_rid(self.child_folder) + index = self.catalog.indexes['path'] + + # _index + self.assertIn(rid, index._index['plone'][0]) # plone site at level 0 + self.assertIn(rid, index._index['foo'][1]) # parent id at level 1 + self.assertIn(rid, index._index['child'][2]) # object id at level 2 + self.assertIn(rid, index._index[None][2]) # terminator at level 2 + self.assertEqual(5, len(index._index)) # 3 objects, plone, terminator + # _index_items + items_pointing_to_rid = find_keys_pointing_to_rid( + index._index_items, rid) + self.assertEqual(1, len(items_pointing_to_rid)) + self.assertEqual(3, len(index._index_items)) + # _index_parents + parents_pointing_to_rid = find_keys_pointing_to_rid( + index._index_parents, rid) + self.assertEqual(1, len(parents_pointing_to_rid)) + self.assertEqual(2, len(index._index_parents)) + # _unindex + self.assertIn(rid, index._unindex) + self.assertEqual(3, len(index._unindex)) + # index stats + self.assertEqual(3, len(index)) + + surgery = RemoveFromExtendedPathIndex(index, rid) + surgery.perform() + + # _index + self.assertNotIn(rid, index._index['plone'][0]) + self.assertNotIn(rid, index._index['foo'][1]) + # child is dropped as it is without children + self.assertNotIn('child', index._index) + # 2nd level terminator is dropped as there are no more entries + self.assertNotIn(2, index._index[None]) + self.assertEqual(4, len(index._index)) + # _index_items + items_pointing_to_rid = find_keys_pointing_to_rid( + index._index_items, rid) + self.assertEqual(0, len(items_pointing_to_rid)) + self.assertEqual(2, len(index._index_items)) + # _index_parents + parents_pointing_to_rid = find_keys_pointing_to_rid( + index._index_parents, rid) + self.assertEqual(0, len(parents_pointing_to_rid)) + # '/plone' is the only remaining parent + self.assertEqual(1, len(index._index_parents)) + # _unindex + self.assertNotIn(rid, index._unindex) + self.assertEqual(2, len(index._unindex)) + # index stats + self.assertEqual(2, len(index)) diff --git a/ftw/catalogdoctor/tests/test_surgery_remove_from_unindex.py b/ftw/catalogdoctor/tests/test_surgery_remove_from_unindex.py new file mode 100644 index 0000000..3b61665 --- /dev/null +++ b/ftw/catalogdoctor/tests/test_surgery_remove_from_unindex.py @@ -0,0 +1,135 @@ +from datetime import date +from ftw.builder import Builder +from ftw.builder import create +from ftw.catalogdoctor.compat import DateRecurringIndex +from ftw.catalogdoctor.surgery import RemoveFromUnIndex +from ftw.catalogdoctor.tests import FunctionalTestCase +from ftw.catalogdoctor.utils import find_keys_pointing_to_rid +from Products.PluginIndexes.DateIndex.DateIndex import DateIndex +from Products.PluginIndexes.FieldIndex.FieldIndex import FieldIndex +from Products.PluginIndexes.KeywordIndex.KeywordIndex import KeywordIndex + + +class TestRemoveFromUnIndex(FunctionalTestCase): + + def setUp(self): + super(TestRemoveFromUnIndex, self).setUp() + + self.grant('Contributor') + self.folder = create(Builder('folder').titled(u'Foo')) + + def test_remove_object_from_reverse_index_only(self): + rid = self.get_rid(self.folder) + index = self.catalog.indexes['Type'] + + entries_pointing_to_rid = find_keys_pointing_to_rid(index, rid) + self.assertEqual(1, len(entries_pointing_to_rid)) + self.assertIn(rid, index._unindex) + self.assertEqual(1, len(index)) + + del index._index[entries_pointing_to_rid[0]] + index._length.change(-1) + + surgery = RemoveFromUnIndex(index, rid) + surgery.perform() + + entries_pointing_to_rid = find_keys_pointing_to_rid(index, rid) + self.assertEqual(0, len(entries_pointing_to_rid)) + self.assertNotIn(rid, index._unindex) + self.assertEqual(0, len(index)) + + def test_remove_object_from_forward_index_only(self): + rid = self.get_rid(self.folder) + index = self.catalog.indexes['Type'] + + entries_pointing_to_rid = find_keys_pointing_to_rid(index, rid) + self.assertEqual(1, len(entries_pointing_to_rid)) + self.assertIn(rid, index._unindex) + self.assertEqual(1, len(index)) + + del index._unindex[rid] + + surgery = RemoveFromUnIndex(index, rid) + surgery.perform() + + entries_pointing_to_rid = find_keys_pointing_to_rid(index, rid) + self.assertEqual(0, len(entries_pointing_to_rid)) + self.assertNotIn(rid, index._unindex) + self.assertEqual(0, len(index)) + + def test_remove_healthy_object_from_fieldindex(self): + rid = self.get_rid(self.folder) + index = self.catalog.indexes['Type'] + self.assertIs(FieldIndex, type(index)) + + entries_pointing_to_rid = find_keys_pointing_to_rid(index, rid) + self.assertEqual(1, len(entries_pointing_to_rid)) + self.assertIn(rid, index._unindex) + self.assertEqual(1, len(index)) + + surgery = RemoveFromUnIndex(index, rid) + surgery.perform() + + entries_pointing_to_rid = find_keys_pointing_to_rid(index, rid) + self.assertEqual(0, len(entries_pointing_to_rid)) + self.assertNotIn(rid, index._unindex) + self.assertEqual(0, len(index)) + + def test_remove_healthy_object_from_dateindex(self): + rid = self.get_rid(self.folder) + index = self.catalog.indexes['modified'] + self.assertIs(DateIndex, type(index)) + + entries_pointing_to_rid = find_keys_pointing_to_rid(index, rid) + self.assertEqual(1, len(entries_pointing_to_rid)) + self.assertIn(rid, index._unindex) + self.assertEqual(1, len(index)) + + surgery = RemoveFromUnIndex(index, rid) + surgery.perform() + + entries_pointing_to_rid = find_keys_pointing_to_rid(index, rid) + self.assertEqual(0, len(entries_pointing_to_rid)) + self.assertNotIn(rid, index._unindex) + self.assertEqual(0, len(index)) + + def test_remove_healthy_object_from_daterecurringindex(self): + # pretend we are something that supports recurring dates + self.folder.start = date(2010, 1, 1) + self.folder.recurrence = 'FREQ=DAILY;INTERVAL=1;COUNT=5' + self.reindex_object(self.folder) + + rid = self.get_rid(self.folder) + index = self.catalog.indexes['start'] + self.assertIs(DateRecurringIndex, type(index)) + + entries_pointing_to_rid = find_keys_pointing_to_rid(index, rid) + self.assertEqual(5, len(entries_pointing_to_rid)) + self.assertIn(rid, index._unindex) + self.assertEqual(5, len(index)) + + surgery = RemoveFromUnIndex(index, rid) + surgery.perform() + + entries_pointing_to_rid = find_keys_pointing_to_rid(index, rid) + self.assertEqual(0, len(entries_pointing_to_rid)) + self.assertNotIn(rid, index._unindex) + self.assertEqual(0, len(index)) + + def test_remove_healthy_object_from_keywordindex(self): + rid = self.get_rid(self.folder) + index = self.catalog.indexes['object_provides'] + self.assertIs(KeywordIndex, type(index)) + + entries_pointing_to_rid = find_keys_pointing_to_rid(index, rid) + self.assertGreater(len(entries_pointing_to_rid), 0) + self.assertIn(rid, index._unindex) + self.assertGreater(len(index), 0) + + surgery = RemoveFromUnIndex(index, rid) + surgery.perform() + + entries_pointing_to_rid = find_keys_pointing_to_rid(index, rid) + self.assertEqual(0, len(entries_pointing_to_rid)) + self.assertNotIn(rid, index._unindex) + self.assertEqual(0, len(index)) diff --git a/ftw/catalogdoctor/tests/test_surgery_remove_from_uuidindex.py b/ftw/catalogdoctor/tests/test_surgery_remove_from_uuidindex.py new file mode 100644 index 0000000..cec449e --- /dev/null +++ b/ftw/catalogdoctor/tests/test_surgery_remove_from_uuidindex.py @@ -0,0 +1,68 @@ +from ftw.builder import Builder +from ftw.builder import create +from ftw.catalogdoctor.surgery import RemoveFromUUIDIndex +from ftw.catalogdoctor.tests import FunctionalTestCase +from ftw.catalogdoctor.utils import find_keys_pointing_to_rid + + +class TestRemoveFromUUIDIndex(FunctionalTestCase): + + def setUp(self): + super(TestRemoveFromUUIDIndex, self).setUp() + + self.grant('Contributor') + self.folder = create(Builder('folder').titled(u'Foo')) + + def test_remove_healthy_object(self): + rid = self.get_rid(self.folder) + index = self.catalog.indexes['UID'] + + entries_pointing_to_rid = find_keys_pointing_to_rid(index._index, rid) + self.assertEqual(1, len(entries_pointing_to_rid)) + self.assertIn(rid, index._unindex) + self.assertEqual(1, len(index)) + + surgery = RemoveFromUUIDIndex(index, rid) + surgery.perform() + + entries_pointing_to_rid = find_keys_pointing_to_rid(index._index, rid) + self.assertEqual(0, len(entries_pointing_to_rid)) + self.assertNotIn(rid, index._unindex) + self.assertEqual(0, len(index)) + + def test_remove_from_reverse_index_only(self): + rid = self.get_rid(self.folder) + index = self.catalog.indexes['UID'] + + entries_pointing_to_rid = find_keys_pointing_to_rid(index._index, rid) + del index._index[entries_pointing_to_rid[0]] + index._length.change(-1) + + self.assertIn(rid, index._unindex) + self.assertEqual(1, len(index._unindex)) + + surgery = RemoveFromUUIDIndex(index, rid) + surgery.perform() + + entries_pointing_to_rid = find_keys_pointing_to_rid(index._index, rid) + self.assertEqual(0, len(entries_pointing_to_rid)) + self.assertNotIn(rid, index._unindex) + self.assertEqual(0, len(index)) + + def test_remove_from_forward_index_only(self): + rid = self.get_rid(self.folder) + index = self.catalog.indexes['UID'] + + del index._unindex[rid] + + entries_pointing_to_rid = find_keys_pointing_to_rid(index._index, rid) + self.assertEqual(1, len(entries_pointing_to_rid)) + self.assertEqual(1, len(index._index)) + + surgery = RemoveFromUUIDIndex(index, rid) + surgery.perform() + + entries_pointing_to_rid = find_keys_pointing_to_rid(index._index, rid) + self.assertEqual(0, len(entries_pointing_to_rid)) + self.assertNotIn(rid, index._unindex) + self.assertEqual(0, len(index)) diff --git a/ftw/catalogdoctor/tests/test_surgery_unindex_object.py b/ftw/catalogdoctor/tests/test_surgery_unindex_object.py new file mode 100644 index 0000000..f91380e --- /dev/null +++ b/ftw/catalogdoctor/tests/test_surgery_unindex_object.py @@ -0,0 +1,25 @@ +from ftw.builder import Builder +from ftw.builder import create +from ftw.catalogdoctor.surgery import UnindexObject +from ftw.catalogdoctor.tests import FunctionalTestCase + + +class TestUnindexObject(FunctionalTestCase): + + def setUp(self): + super(TestUnindexObject, self).setUp() + + self.grant('Contributor') + self.folder = create(Builder('folder').titled(u'Foo') + .having(description=u'Lorem ipsum dolor sit')) + + def test_remove_from_zctextindex(self): + rid = self.get_rid(self.folder) + index = self.catalog.indexes['SearchableText'] + + self.assertTrue(index.index.has_doc(rid)) + + surgery = UnindexObject(index, rid) + surgery.perform() + + self.assertFalse(index.index.has_doc(rid)) diff --git a/ftw/catalogdoctor/tests/test_utils.py b/ftw/catalogdoctor/tests/test_utils.py new file mode 100644 index 0000000..dc220a2 --- /dev/null +++ b/ftw/catalogdoctor/tests/test_utils.py @@ -0,0 +1,69 @@ +from BTrees.IIBTree import IITreeSet +from BTrees.OOBTree import OOBTree +from ftw.catalogdoctor.tests import FunctionalTestCase +from ftw.catalogdoctor.tests import Mock +from ftw.catalogdoctor.utils import contains_or_equals_rid +from ftw.catalogdoctor.utils import find_keys_pointing_to_rid +from Products.PluginIndexes.common.UnIndex import UnIndex + + +class TestFindKeysPointingToRid(FunctionalTestCase): + + def test_find_keys_pointing_to_rid(self): + dictish = {'foo': [1, 77, 678], + 'bar': [3, 77], + 'qux': []} + + self.assertItemsEqual( + ['foo', 'bar'], find_keys_pointing_to_rid(dictish, 77)) + + def test_find_keys_pointing_to_rid_single_value(self): + dictish = {'foo': 1, + 'bar': -23} + + self.assertItemsEqual( + ['bar'], find_keys_pointing_to_rid(dictish, -23)) + + def test_find_keys_pointing_to_rid_emtpy_result(self): + dictish = {'foo': [1, 77], + 'qux': []} + + self.assertItemsEqual( + [], find_keys_pointing_to_rid(dictish, 1337)) + + def test_btrees_find_keys_in_pointing_to_rid(self): + dictish = OOBTree({'foo': IITreeSet((5, -17, 43)), + 'bar': IITreeSet(), + 'somekey': IITreeSet((-17, 1))}) + + self.assertItemsEqual( + ['foo', 'somekey'], find_keys_pointing_to_rid(dictish, -17)) + + def test_unindex_find_keys_pointing_to_rid(self): + mock = Mock() + mock.foo = 'a key' + index = UnIndex('foo') + index.index_object(-12, mock) + + self.assertItemsEqual(['a key'], find_keys_pointing_to_rid(index, -12)) + + +class TestContainsOrEqualsRid(FunctionalTestCase): + + def test_contains_rid_truthy_set(self): + self.assertTrue(contains_or_equals_rid(123, set((123, 45, -7)))) + + def test_contains_rid_falsy_set(self): + self.assertFalse(contains_or_equals_rid(3, set((123, 45, -7)))) + + def test_contains_rid_truthy_treeset(self): + self.assertTrue(contains_or_equals_rid(-3, IITreeSet((10, 33, -3)))) + + def test_contains_rid_falsy_treeset(self): + self.assertFalse(contains_or_equals_rid(-3, IITreeSet((10, 33, -103)))) + + def test_equals_rid_truthy(self): + self.assertTrue(contains_or_equals_rid(123, 123)) + + def test_equals_rid_false(self): + self.assertFalse(contains_or_equals_rid(123, 77)) diff --git a/ftw/catalogdoctor/utils.py b/ftw/catalogdoctor/utils.py new file mode 100644 index 0000000..8170c78 --- /dev/null +++ b/ftw/catalogdoctor/utils.py @@ -0,0 +1,20 @@ +def find_keys_pointing_to_rid(dictish, rid): + """Return all entries in dictish item pointing to rid. + + The values in dictish can be either the rid as a single value or a + collection containing the rid. + + """ + return [ + key for key, rids_or_rid in dictish.items() + if contains_or_equals_rid(rid, rids_or_rid) + ] + + +def contains_or_equals_rid(rid, rids_or_rid): + """Return whether rids_or_rid contains or equals a rid.""" + + try: + return rid in rids_or_rid + except TypeError: + return rid == rids_or_rid diff --git a/setup.py b/setup.py index 7d25bdf..3466a49 100644 --- a/setup.py +++ b/setup.py @@ -7,8 +7,10 @@ 'ftw.builder', 'ftw.testbrowser', 'ftw.testing', + 'plone.app.contenttypes', 'plone.app.testing', 'plone.testing', + 'zope.configuration', ] extras_require = { @@ -28,6 +30,7 @@ classifiers=[ 'Framework :: Plone', 'Framework :: Plone :: 4.3', + 'Framework :: Plone :: 5.1', 'License :: OSI Approved :: GNU General Public License (GPL)', 'Programming Language :: Python', 'Programming Language :: Python :: 2.7', @@ -59,5 +62,8 @@ # -*- Entry points: -*- [z3c.autoinclude.plugin] target = plone + + [zopectl.command] + doctor = ftw.catalogdoctor.command:doctor_cmd """, )