From 95c2bd84e3542ebfab6039a906e6db40ea70b8c5 Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Mon, 11 Mar 2024 11:04:58 +0100 Subject: [PATCH] Handle `enabled=off` with recursion in place (#625) * lib/ZnapZend.pm: createSnapshot(): be pedantic about "implicit value of org.znapzend:enabled" for sub-datasets Signed-off-by: Jim Klimov * lib/ZnapZend.pm: createSnapshot(): update comments about creation and cleanup of snapshots on enabled=off sub-datasets Signed-off-by: Jim Klimov * lib/ZnapZend.pm: refactor listDisabledSourceDescendants() out of createSnapshot() so it can also be used in sendRecvCleanup() Signed-off-by: Jim Klimov * lib/ZnapZend.pm: refactor listDisabledSourceDescendants() calls to be done in refreshBackupPlans() once Track the list of names as @{$backupSet->{srcDisabledDescendants}} Signed-off-by: Jim Klimov * lib/ZnapZend/Config.pm::checkBackupSets(), lib/ZnapZend.pm::listDisabledSourceDescendants(): recognize sub-datasets that are both enabled(=off|on) and recursive(=on) Signed-off-by: Jim Klimov * .github/workflows/spelling/expect.txt: update for PERL source changes Signed-off-by: Jim Klimov * CHANGES: update wording about "enabled=off" support Signed-off-by: Jim Klimov * README.md: clarify about use of local ZFS properties for ZnapZend configurations Signed-off-by: Jim Klimov * README.md: clarify about current support of not-enabled datasets under a recursive backup schedule Signed-off-by: Jim Klimov * README.md: update about "enabled=off + recursive=on" setting for sub-tree pruning [#625] Signed-off-by: Jim Klimov * lib/ZnapZend.pm: listDisabledSourceDescendants(): avoid changing data type for raw and parsed cmd output [#625 review] Signed-off-by: Jim Klimov * lib/ZnapZend.pm: listDisabledSourceDescendants(): avoid "&&", use "and" [#625 review] Signed-off-by: Jim Klimov --------- Signed-off-by: Jim Klimov Co-authored-by: Tobias Oetiker --- .github/workflows/spelling/expect.txt | 5 + CHANGES | 1 + README.md | 19 +++ lib/ZnapZend.pm | 175 ++++++++++++++++++++++++-- lib/ZnapZend/Config.pm | 10 +- 5 files changed, 200 insertions(+), 10 deletions(-) diff --git a/.github/workflows/spelling/expect.txt b/.github/workflows/spelling/expect.txt index 02e18fc2..9fae8ddd 100644 --- a/.github/workflows/spelling/expect.txt +++ b/.github/workflows/spelling/expect.txt @@ -237,6 +237,7 @@ gtar gz HAARG hadfl +hashmaps Hassler Heitm heitmueller @@ -255,6 +256,7 @@ Icreate Idataset Idestroy Idocuments +idx Iexport Ifacility ifdef @@ -330,6 +332,7 @@ loglevel logto lowmem lpr +LRds lsb ltrim mailprog @@ -462,6 +465,7 @@ respawn resync RETVAL RHEL +rindex rmcmd rmdir rmprog @@ -535,6 +539,7 @@ strptime subdataset SUBDIRS subr +substr substvars subsys sudo diff --git a/CHANGES b/CHANGES index f92c0e74..5bda5ca6 100644 --- a/CHANGES +++ b/CHANGES @@ -2,6 +2,7 @@ znapzend (0.21.3) unstable; urgency=medium * Maintenance release: refine splitting of [[user@]host:]dataset[:with-colons][@snap[:with-colons]] strings to work for the realistic majority of use-cases; fix back support of pool root dataset in such spec * Update self-tests with verification that [[user@]host:]dataset[:with-colons][@snap[:with-colons]] string decoding yields expected results + * Extended handling of "org.znapzend:enabled=off" setting for sub-trees: now if the same intermediate dataset declares "org.znapzend:recursive=on", the disablement of znapzend handling takes place for descendants as well (previously it had effect only exactly for datasets that set "org.znapzend:enabled=off" as a local ZFS property) * Fixed CI recipes and contents for spell-checker * Added rc-script and integration documentation for FreeBSD and similar platforms diff --git a/README.md b/README.md index 45177f5d..53317eb8 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,25 @@ would not be tracked with a long-term history locally or remotely. > such snapshots may linger indefinitely and "unexpectedly" consume disk > space for their uniquely referenced blocks. +Current ZnapZend releases extend this support with an ability to also set +a local ZFS property `org.znapzend:recursive=on` in such datasets (so there +would be two properties -- to enable/disable and to recurse that), with the +effect that whole sub-trees of ZFS datasets can be excluded from ZnapZend +retention handling with one configuration in their common ancestor dataset +(previously this would require `enabled=off` in each excluded dataset). + +This behavior can be useful, for example, on CI build hosts, where you would +generally enable backups of `rpool/home` but would exclude the location for +discardable bulk data like build roots or caches in the worker account's home. + +> **_NOTE:_** Technically, the code allows to further set `enabled=on` in +> certain sub-datasets of the not-enabled tree to re-enable snapshot tracking +> for that dataset (maybe recursively to its descendants), but this feature +> has not yet seen much use and feedback in real-life situations. It may be +> possible that you would have to pre-create the parent datasets (disabled +> on source) to receive regular backups from ZnapZend on remote destinations, +> etc. + Compilation and Installation from source Inztructionz ----------------------------------------------------- diff --git a/lib/ZnapZend.pm b/lib/ZnapZend.pm index 1533be0f..5c2a4c80 100644 --- a/lib/ZnapZend.pm +++ b/lib/ZnapZend.pm @@ -147,8 +147,7 @@ my $killThemAll = sub { # Return an array of dataset names which are local descendants of the # provided $backupSet->{src} and have an explicit org.znapzend:enabled=off -# (note that currently each disabled descendant must be explicit - maybe -# an also-explicit recursion handling is a good idea for the future). +# and/or an intermediate nearest ancestor which is disabled with recursion. # The array may be empty if not in recursive mode, or no descendants # exist, or none are disabled. my $listDisabledSourceDescendants = sub { @@ -159,6 +158,11 @@ my $listDisabledSourceDescendants = sub { $SIG{HUP} = 'IGNORE'; my @dataSetsExplicitlyDisabled = (); + + my %explicitEnabled = (); + my %explicitRecursiveLocal = (); + my %explicitRecursiveInherited = (); + if ($backupSet->{recursive} eq 'on') { $self->zLog->info("checking for explicitly excluded ZFS dependent datasets under '$backupSet->{src}'"); @@ -166,7 +170,7 @@ my $listDisabledSourceDescendants = sub { ###my @dataSetList = grep /^$backupSet->{src}($|\/)/, @{$self->zZfs->listDataSets()}; my @dataSetList = @{$self->zZfs->listDataSets(undef, $backupSet->{src}, 1)}; - if ( @dataSetList ) { + if (@dataSetList) { # the default sub-dataset enablement value is implicitly "on" # (technically, the value inherited from $backupSet which we # are currently processing, because it is enabled) @@ -179,15 +183,168 @@ my $listDisabledSourceDescendants = sub { # newly created snapshot for removal for my $dataSet (@dataSetList){ # get the value for org.znapzend property - my @cmd = (@{$self->zZfs->priv}, qw(zfs get -H -s local -o value org.znapzend:enabled), $dataSet); - print STDERR '# ' . join(' ', @cmd) . "\n" if $self->debug; - open my $prop, '-|', @cmd; + my @cmdLE = (@{$self->zZfs->priv}, qw(zfs get -H -s local -o value org.znapzend:enabled), $dataSet); + print STDERR '# ' . join(' ', @cmdLE) . "\n" if $self->debug; + open my $cmdOutLE, '-|', @cmdLE; + + my @cmdLR = (@{$self->zZfs->priv}, qw(zfs get -H -s local -o value org.znapzend:recursive), $dataSet); + print STDERR '# ' . join(' ', @cmdLR) . "\n" if $self->debug; + open my $cmdOutLR, '-|', @cmdLR; # if the property does not exist, the command will just return. # In this case, use the default determined above. - $prop = <$prop> || $enabled_default; - chomp($prop); - if ( $prop eq 'off' ) { + my $propLE = <$cmdOutLE>; # || $enabled_default; + if ($propLE) { + chomp($propLE); + } + + my $propLR = <$cmdOutLR>; + my $propIR; + if ($propLR) { + chomp($propLR); + } else { + my @cmdIR = (@{$self->zZfs->priv}, qw(zfs get -H -s inherited -o value org.znapzend:recursive), $dataSet); + print STDERR '# ' . join(' ', @cmdIR) . "\n" if $self->debug; + open my $cmdOutIR, '-|', @cmdIR; + + $propIR = <$cmdOutIR>; + if ($propIR) { + chomp($propIR); + } + } + + # ASSUMPTION: We process the dataSetList in alphanumeric + # order, so parent datasets were seen before child ones! + $explicitEnabled{$dataSet} = $propLE; + $explicitRecursiveLocal{$dataSet} = $propLR; + $explicitRecursiveInherited{$dataSet} = $propIR; + + $self->zLog->debug("=== $dataSet snapshotting:" . + " enabled=" . (defined($propLE) ? $propLE : "") . + " recursive=" . (defined($propLR) ? $propLR : "") . + " (inherited_recursive=" . (defined($propIR) ? $propIR : "") . ")" + ) if $self->debug; + + # We treat a dataset as individually not-enabled for backup + # snapshot processing if either: + # * its local enabled==off, whatever the recursive setting + # * the nearest ancestor with a local setting about this has + # both enabled==off and recursive==on (explicitly set in it) + # Note that a dataset, whose nearest ancestor has enabled==off + # but does not set (or enable as "on") the "recursive" option, + # would be backed up as usual. + # In case of partial pruning and un-pruning, for a sub-dataset + # whose nearest ancestor has enabled==on again (which may also + # re-define the "recursive" option), a ZFS-inherited definition + # of "recursive" would be used. + # It is not valid for a single dataset configuration to only + # specify "recursive" (however, specifying only "enabled" is + # valid, as well as specifying "enabled" and "recursive" as + # the only two local znapzend-related properties). + + # Is this source dataset not-enabled for snapshotting? Tri-state: + # -1 Known enabled (loop to next item) + # 0 Continue investigating + # 1 Known disabled (add to output list) + my $isDisabled = 0; + + # Is the local "enabled" property value set for this dataset? + if (defined($propLE)) { + if ($propLE eq 'off') { $isDisabled = 1; } + elsif ($propLE eq 'on') { $isDisabled = -1; } + } + + if (!$isDisabled) { + # Is anything "inherited" from local property definitions? + # Backtrack through collected hashmaps... + my $nearestLE; # local enabled + my $nearestLR; # local recursive + my $nearestLEds; # dataset with local enabled setting + my $nearestLRds; # dataset with local recursive setting + my $ancestor = $dataSet; + while ($ancestor ne $backupSet->{src}) { + # Chop off last slash and dataset name after it + my $idx = rindex($ancestor, '/'); + if ($idx == 0) { + die "Did not expect to see a starting slash in dataset name in the loop under $backupSet->{src}: $dataSet => $ancestor" + } + elsif ($idx < 0) { + # -1 means no slash, looking at a pool's root dataset + # May be our starting point or last loop cycle, so process it? + # Although $backupSet->{src} should have ruled it out anyway. + # In any case, avoid infinite loop upon errors, bail out below. + $self->zLog->debug("WARNING: Did not expect to see a root dataset name in the loop under $backupSet->{src}: $dataSet => $ancestor"); + } else { + $ancestor = substr($ancestor, 0, $idx); + } + if ((!defined($nearestLE)) and defined($explicitEnabled{$ancestor})) { + $nearestLE = $explicitEnabled{$ancestor}; + $nearestLEds = $ancestor; + } + if ((!defined($nearestLR)) and defined($explicitRecursiveLocal{$ancestor})) { + $nearestLR = $explicitRecursiveLocal{$ancestor}; + $nearestLRds = $ancestor; + } + if (defined($nearestLE) and defined($nearestLR)) { + last; + } + if ($idx < 1 || $ancestor eq $dataSet) { + # Chopping did not go well + last; + } + } + + if (defined($nearestLE)) { + # Got something, is it "on" or "off", is recursion involved? + if (defined($nearestLR)) { + # Both properties are locally defined in some ancestor(s) + if ($nearestLE eq "off" and $nearestLR eq "on" and $nearestLEds eq $nearestLRds) { + # An ancestor defines both enabled==off and recursive==on + # explicitly, and is the nearest one to define such things. + $isDisabled = 1; + } + # else enabled=on, or recursive=off, or not set in same dataset + elsif ($nearestLE eq "on" and $nearestLR eq "on" and $nearestLEds eq $nearestLRds) { + # logical inheritance of "enabled=on" + $isDisabled = -1; + } + # else enabled=on, or recursive=off, or not set in same dataset + elsif ($nearestLE eq "on" and $nearestLR eq "off" and $nearestLEds eq $nearestLRds) { + # logical inheritance of "enabled=..." from parent of that + # ancestor because its own "enabled=on" is not recursive + my $ancestor2 = $nearestLEds; + while ($ancestor2) { + $ancestor2 =~ s/\/[^\/]+$// ; + if (grep {$_ eq $ancestor2} @dataSetsExplicitlyDisabled) { + # We handled that ancestor and found it disabled, in an earlier loop + $isDisabled = 1; + last; + } + if (index($ancestor2, '/') < 0) { + last; + } + } + } + elsif ($nearestLE eq "on" and defined($propIR) and $propIR eq "on") { + # zfs-inheritance of "enabled=on" + $isDisabled = -1; + } + } # else only "enabled=whatever" is defined in some ancestor, but + # not "recursive" anywhere => this one remains enabled, probably + } # else no ancestor defines a local "enabled" setting + } + + if (!$isDisabled) { + if ($enabled_default eq 'off') { + # Somehow processing a disabled backupSet?.. + $isDisabled = 1; + } elsif ($enabled_default eq 'on') { + $isDisabled = -1; + } + } + + $self->zLog->debug("=== $dataSet snapshotting: isDisabled=$isDisabled (" . ($isDisabled==1 ? "known-disabled" : ($isDisabled==-1 ? "known-enabled" : "uncertain")) . ")") if $self->debug; + if ($isDisabled == 1) { push(@dataSetsExplicitlyDisabled, $dataSet); } } diff --git a/lib/ZnapZend/Config.pm b/lib/ZnapZend/Config.pm index 018ca9c0..8aeb01ad 100644 --- a/lib/ZnapZend/Config.pm +++ b/lib/ZnapZend/Config.pm @@ -95,7 +95,15 @@ my $checkBackupSets = sub { # need to skip the dataset if there are two properties and one of # them is "enabled". if (keys(%{$backupSet}) eq 2 && exists($backupSet->{"enabled"})){ - next; + next; + } + + # Similarly for datasets which declare both the "enabled" flag and + # the "recursion" flag (e.g. to prune whole dataset sub-trees from + # backing up with znapzend) by configuring only the root of such + # sub-tree. + if (keys(%{$backupSet}) eq 3 && exists($backupSet->{"enabled"}) && exists($backupSet->{"recursive"})){ + next; } if ( $backupSet->{src} =~ m/[\@]/ ) {