From 2dabc4bc4a94551f49ff1cc5c2290ffe82e4839a Mon Sep 17 00:00:00 2001 From: undaunt <31376520+undaunt@users.noreply.github.com> Date: Wed, 6 Nov 2024 12:50:55 -0800 Subject: [PATCH 1/7] Support hardlink groups, add debug -Adds debug functionality with extended details -Supports detecting inode groups for hardlink processing. -Pulls files and sorts by, then groups by inode group with awk -Checks all files in an inode group's counts when calculating skipping counts -Removes existing skip hardlink flag -Removes hardlinks and recreates them directly after the balance copy/delete/move operation per inode group to minimize 'downtime' --- zfs-inplace-rebalancing.sh | 330 +++++++++++++++++++++---------------- 1 file changed, 187 insertions(+), 143 deletions(-) diff --git a/zfs-inplace-rebalancing.sh b/zfs-inplace-rebalancing.sh index b9fe6ce..f20084b 100755 --- a/zfs-inplace-rebalancing.sh +++ b/zfs-inplace-rebalancing.sh @@ -1,14 +1,14 @@ #!/usr/bin/env bash -# exit script on error +# Exit script on error set -e -# exit on undeclared variable +# Exit on undeclared variable set -u -# file used to track processed files +# File used to track processed files rebalance_db_file_name="rebalance_db.txt" -# index used for progress +# Index used for progress current_index=0 ## Color Constants @@ -24,19 +24,18 @@ Cyan='\033[0;36m' # Cyan ## Functions -# print a help message +# Print a help message function print_usage() { - echo "Usage: zfs-inplace-rebalancing --checksum true --skip-hardlinks false --passes 1 /my/pool" + echo "Usage: zfs-inplace-rebalancing.sh --checksum true --passes 1 --debug true /my/pool" } -# print a given text entirely in a given color +# Print a given text entirely in a given color function color_echo () { color=$1 text=$2 echo -e "${color}${text}${Color_Off}" } - function get_rebalance_count () { file_path=$1 @@ -52,131 +51,99 @@ function get_rebalance_count () { fi } -# rebalance a specific file -function rebalance () { - file_path=$1 +# Rebalance a group of files that are hardlinked together +function process_inode_group() { + paths=("$@") + num_paths="${#paths[@]}" - # check if file has >=2 links in the case of --skip-hardlinks - # this shouldn't be needed in the typical case of `find` only finding files with links == 1 - # but this can run for a long time, so it's good to double check if something changed - if [[ "${skip_hardlinks_flag,,}" == "true"* ]]; then - if [[ "${OSTYPE,,}" == "linux-gnu"* ]]; then - # Linux - # - # -c --format=FORMAT - # use the specified FORMAT instead of the default; output a - # newline after each use of FORMAT - # %h number of hard links - - hardlink_count=$(stat -c "%h" "${file_path}") - elif [[ "${OSTYPE,,}" == "darwin"* ]] || [[ "${OSTYPE,,}" == "freebsd"* ]]; then - # Mac OS - # FreeBSD - # -f format - # Display information using the specified format - # l Number of hard links to file (st_nlink) - - hardlink_count=$(stat -f %l "${file_path}") - else - echo "Unsupported OS type: $OSTYPE" - exit 1 - fi - - if [ "${hardlink_count}" -ge 2 ]; then - echo "Skipping hard-linked file: ${file_path}" - return - fi + # Progress tracking + current_index="$((current_index + 1))" + progress_raw=$((current_index * 10000 / file_count)) + progress_percent=$(printf '%0.2f' "${progress_raw}e-2") + color_echo "${Cyan}" "Progress -- Files: ${current_index}/${file_count} (${progress_percent}%)" + + if [ "$debug_flag" = true ]; then + echo "Processing inode group with ${num_paths} paths:" + for path in "${paths[@]}"; do + echo " - $path" + done fi - current_index="$((current_index + 1))" - progress_percent=$(printf '%0.2f' "$((current_index*10000/file_count))e-2") - color_echo "${Cyan}" "Progress -- Files: ${current_index}/${file_count} (${progress_percent}%)" + # Check rebalance counts for all files + should_skip=false + for path in "${paths[@]}"; do + rebalance_count=$(get_rebalance_count "${path}") + if [ "${rebalance_count}" -ge "${passes_flag}" ]; then + should_skip=true + break + fi + done - if [[ ! -f "${file_path}" ]]; then - color_echo "${Yellow}" "File is missing, skipping: ${file_path}" + if [ "${should_skip}" = true ]; then + if [ "${num_paths}" -gt 1 ]; then + color_echo "${Yellow}" "Rebalance count (${passes_flag}) reached, skipping group: ${paths[*]}" + else + color_echo "${Yellow}" "Rebalance count (${passes_flag}) reached, skipping: ${paths[0]}" + fi + return fi - if [ "${passes_flag}" -ge 1 ]; then - # check if target rebalance count is reached - rebalance_count=$(get_rebalance_count "${file_path}") - if [ "${rebalance_count}" -ge "${passes_flag}" ]; then - color_echo "${Yellow}" "Rebalance count (${passes_flag}) reached, skipping: ${file_path}" + main_file="${paths[0]}" + + # Check if main_file exists + if [[ ! -f "${main_file}" ]]; then + color_echo "${Yellow}" "File is missing, skipping: ${main_file}" return - fi fi - + tmp_extension=".balance" - tmp_file_path="${file_path}${tmp_extension}" + tmp_file_path="${main_file}${tmp_extension}" - echo "Copying '${file_path}' to '${tmp_file_path}'..." + echo "Copying '${main_file}' to '${tmp_file_path}'..." + if [ "$debug_flag" = true ]; then + echo "Executing copy command:" + fi if [[ "${OSTYPE,,}" == "linux-gnu"* ]]; then # Linux - - # --reflink=never -- force standard copy (see ZFS Block Cloning) - # -a -- keep attributes, includes -d -- keep symlinks (dont copy target) and - # -p -- preserve ACLs to - # -x -- stay on one system - cp --reflink=never -ax "${file_path}" "${tmp_file_path}" + cmd=(cp --reflink=never -ax "${main_file}" "${tmp_file_path}") + if [ "$debug_flag" = true ]; then + echo "${cmd[@]}" + fi + "${cmd[@]}" elif [[ "${OSTYPE,,}" == "darwin"* ]] || [[ "${OSTYPE,,}" == "freebsd"* ]]; then - # Mac OS - # FreeBSD - - # -a -- Archive mode. Same as -RpP. Includes preservation of modification - # time, access time, file flags, file mode, ACL, user ID, and group - # ID, as allowed by permissions. - # -x -- File system mount points are not traversed. - cp -ax "${file_path}" "${tmp_file_path}" + # Mac OS and FreeBSD + cmd=(cp -ax "${main_file}" "${tmp_file_path}") + if [ "$debug_flag" = true ]; then + echo "${cmd[@]}" + fi + "${cmd[@]}" else echo "Unsupported OS type: $OSTYPE" exit 1 fi - # compare copy against original to make sure nothing went wrong + # Compare copy against original to make sure nothing went wrong if [[ "${checksum_flag,,}" == "true"* ]]; then echo "Comparing copy against original..." if [[ "${OSTYPE,,}" == "linux-gnu"* ]]; then # Linux - - # file attributes - original_md5=$(lsattr "${file_path}" | awk '{print $1}') - # file permissions, owner, group - # shellcheck disable=SC2012 - original_md5="${original_md5} $(ls -lha "${file_path}" | awk '{print $1 " " $3 " " $4}')" - # file content - original_md5="${original_md5} $(md5sum -b "${file_path}" | awk '{print $1}')" - - # file attributes - copy_md5=$(lsattr "${tmp_file_path}" | awk '{print $1}') - # file permissions, owner, group - # shellcheck disable=SC2012 - copy_md5="${copy_md5} $(ls -lha "${tmp_file_path}" | awk '{print $1 " " $3 " " $4}')" - # file content - copy_md5="${copy_md5} $(md5sum -b "${tmp_file_path}" | awk '{print $1}')" + original_md5=$(md5sum -b "${main_file}" | awk '{print $1}') + copy_md5=$(md5sum -b "${tmp_file_path}" | awk '{print $1}') elif [[ "${OSTYPE,,}" == "darwin"* ]] || [[ "${OSTYPE,,}" == "freebsd"* ]]; then - # Mac OS - # FreeBSD - - # file attributes - original_md5=$(lsattr "${file_path}" | awk '{print $1}') - # file permissions, owner, group - # shellcheck disable=SC2012 - original_md5="${original_md5} $(ls -lha "${file_path}" | awk '{print $1 " " $3 " " $4}')" - # file content - original_md5="${original_md5} $(md5 -q "${file_path}")" - - # file attributes - copy_md5=$(lsattr "${tmp_file_path}" | awk '{print $1}') - # file permissions, owner, group - # shellcheck disable=SC2012 - copy_md5="${copy_md5} $(ls -lha "${tmp_file_path}" | awk '{print $1 " " $3 " " $4}')" - # file content - copy_md5="${copy_md5} $(md5 -q "${tmp_file_path}")" + # Mac OS and FreeBSD + original_md5=$(md5 -q "${main_file}") + copy_md5=$(md5 -q "${tmp_file_path}") else echo "Unsupported OS type: $OSTYPE" exit 1 fi - if [[ "${original_md5}" == "${copy_md5}"* ]]; then + if [ "$debug_flag" = true ]; then + echo "Original MD5: $original_md5" + echo "Copy MD5: $copy_md5" + fi + + if [[ "${original_md5}" == "${copy_md5}" ]]; then color_echo "${Green}" "MD5 OK" else color_echo "${Red}" "MD5 FAILED: ${original_md5} != ${copy_md5}" @@ -184,30 +151,52 @@ function rebalance () { fi fi - echo "Removing original '${file_path}'..." - rm "${file_path}" + echo "Removing original files..." + for path in "${paths[@]}"; do + if [ "$debug_flag" = true ]; then + echo "Removing $path" + fi + rm "${path}" + done - echo "Renaming temporary copy to original '${file_path}'..." - mv "${tmp_file_path}" "${file_path}" + echo "Renaming temporary copy to original '${main_file}'..." + if [ "$debug_flag" = true ]; then + echo "Moving ${tmp_file_path} to ${main_file}" + fi + mv "${tmp_file_path}" "${main_file}" - if [ "${passes_flag}" -ge 1 ]; then - # update rebalance "database" - line_nr=$(grep -xF -n "${file_path}" "./${rebalance_db_file_name}" | head -n 1 | cut -d: -f1) - if [ -z "${line_nr}" ]; then - rebalance_count=1 - echo "${file_path}" >> "./${rebalance_db_file_name}" - echo "${rebalance_count}" >> "./${rebalance_db_file_name}" - else - rebalance_count_line_nr="$((line_nr + 1))" - rebalance_count="$((rebalance_count + 1))" - sed -i '' "${rebalance_count_line_nr}s/.*/${rebalance_count}/" "./${rebalance_db_file_name}" + echo "Recreating hardlinks..." + for (( i=1; i<${#paths[@]}; i++ )); do + if [ "$debug_flag" = true ]; then + echo "Linking ${main_file} to ${paths[$i]}" fi + ln "${main_file}" "${paths[$i]}" + done + + if [ "${passes_flag}" -ge 1 ]; then + # Update rebalance "database" for all files + for path in "${paths[@]}"; do + line_nr=$(grep -xF -n "${path}" "./${rebalance_db_file_name}" | head -n 1 | cut -d: -f1) + if [ -z "${line_nr}" ]; then + rebalance_count=1 + echo "${path}" >> "./${rebalance_db_file_name}" + echo "${rebalance_count}" >> "./${rebalance_db_file_name}" + else + rebalance_count_line_nr="$((line_nr + 1))" + rebalance_count=$(awk "NR == ${rebalance_count_line_nr}" "./${rebalance_db_file_name}") + rebalance_count="$((rebalance_count + 1))" + if [ "$debug_flag" = true ]; then + echo "Updating rebalance count for ${path} to ${rebalance_count}" + fi + sed -i "${rebalance_count_line_nr}s/.*/${rebalance_count}/" "./${rebalance_db_file_name}" + fi + done fi } checksum_flag='true' -skip_hardlinks_flag='false' passes_flag='1' +debug_flag='false' if [[ "$#" -eq 0 ]]; then print_usage @@ -228,18 +217,18 @@ while true ; do fi shift 2 ;; - --skip-hardlinks ) + -p | --passes ) + passes_flag=$2 + shift 2 + ;; + --debug ) if [[ "$2" == 1 || "$2" =~ (on|true|yes) ]]; then - skip_hardlinks_flag="true" + debug_flag="true" else - skip_hardlinks_flag="false" + debug_flag="false" fi shift 2 ;; - -p | --passes ) - passes_flag=$2 - shift 2 - ;; *) break ;; @@ -252,29 +241,84 @@ color_echo "$Cyan" "Start rebalancing $(date):" color_echo "$Cyan" " Path: ${root_path}" color_echo "$Cyan" " Rebalancing Passes: ${passes_flag}" color_echo "$Cyan" " Use Checksum: ${checksum_flag}" -color_echo "$Cyan" " Skip Hardlinks: ${skip_hardlinks_flag}" - -# count files -if [[ "${skip_hardlinks_flag,,}" == "true"* ]]; then - file_count=$(find "${root_path}" -type f -links 1 | wc -l) +color_echo "$Cyan" " Debug Mode: ${debug_flag}" + +# Generate files_list.txt with device and inode numbers using stat, separated by a pipe '|' +if [[ "${OSTYPE,,}" == "linux-gnu"* ]]; then + # Linux + find "$root_path" -type f -not -path '*/.zfs/*' -exec stat --printf '%d:%i|%n\n' {} \; > files_list.txt +elif [[ "${OSTYPE,,}" == "darwin"* ]] || [[ "${OSTYPE,,}" == "freebsd"* ]]; then + # Mac OS and FreeBSD + find "$root_path" -type f -not -path '*/.zfs/*' -exec sh -c 'stat -f "%d:%i|%N" "$0"' {} \; {} \; > files_list.txt else - file_count=$(find "${root_path}" -type f | wc -l) + echo "Unsupported OS type: $OSTYPE" + exit 1 +fi + +if [ "$debug_flag" = true ]; then + echo "Contents of files_list.txt:" + cat files_list.txt +fi + +# Sort files_list.txt by device and inode number +sort -t '|' -k1,1 files_list.txt > sorted_files_list.txt + +if [ "$debug_flag" = true ]; then + echo "Contents of sorted_files_list.txt:" + cat sorted_files_list.txt +fi + +# Use awk to group paths by inode key +awk -F'|' '{ + key = $1 + path = $2 + if (key == prev_key) { + paths = paths " " path + } else { + if (NR > 1) { + print prev_key "|" paths + } + prev_key = key + paths = path + } +} +END { + if (NR > 0) { + print prev_key "|" paths + } +}' sorted_files_list.txt > grouped_inodes.txt + +if [ "$debug_flag" = true ]; then + echo "Contents of grouped_inodes.txt:" + cat grouped_inodes.txt fi -color_echo "$Cyan" " File count: ${file_count}" +# Count number of inode groups +file_count=$(wc -l < grouped_inodes.txt | tr -d ' ') -# create db file +color_echo "$Cyan" " Number of files to process: ${file_count}" + +# Initialize current_index +current_index=0 + +# Create db file if [ "${passes_flag}" -ge 1 ]; then touch "./${rebalance_db_file_name}" fi -# recursively scan through files and execute "rebalance" procedure -# in the case of --skip-hardlinks, only find files with links == 1 -if [[ "${skip_hardlinks_flag,,}" == "true"* ]]; then - find "$root_path" -type f -links 1 -print0 | while IFS= read -r -d '' file; do rebalance "$file"; done -else - find "$root_path" -type f -print0 | while IFS= read -r -d '' file; do rebalance "$file"; done -fi +# Read grouped_inodes.txt and process each group +while IFS='|' read -r key paths; do + if [ "$debug_flag" = true ]; then + echo "Detected inode group: key=${key}" + echo "Paths:${paths}" + fi + # Split the paths into an array + read -a path_array <<< "${paths}" + process_inode_group "${path_array[@]}" +done < grouped_inodes.txt + +# Clean up temporary files +rm files_list.txt sorted_files_list.txt grouped_inodes.txt echo "" echo "" From d5678de7a51e704a2f4a9387c2a73b7c4d0986c3 Mon Sep 17 00:00:00 2001 From: undaunt <31376520+undaunt@users.noreply.github.com> Date: Wed, 6 Nov 2024 13:04:38 -0800 Subject: [PATCH 2/7] Update README.md to denote hardlink support Adds details around the debug flag, hardlink support, removed --skip-hardlinks functionality, and temporary files used during the script processing. --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a7b6564..544fa11 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@ Simple bash script to rebalance pool data between all mirrors when adding vdevs This script recursively traverses all the files in a given directory. Each file is copied with a `.balance` suffix, retaining all file attributes. The original is then deleted and the *copy* is renamed back to the name of the original file. When copying a file ZFS will spread the data blocks across all vdevs, effectively distributing/rebalancing the data of the original file (more or less) evenly. This allows the pool data to be rebalanced without the need for a separate backup pool/drive. +When the script detects an inode group of hardlinked files, it will proceed to copy one file in the inode group. The original file and all hardlinks are then deleted, the *copy* is renamed back to the name of the original file, and new hardlinks are generated from that copy to replace all other linked files that were removed. + The way ZFS distributes writes is not trivial, which makes it hard to predict how effective the redistribution will be. See: - https://jrs-s.net/2018/04/11/zfs-allocates-writes-according-to-free-space-per-vdev-not-latency-per-vdev/ - https://jrs-s.net/2018/08/24/zfs-write-allocation-in-0-7-x/ @@ -28,6 +30,8 @@ Since file attributes are fully retained, it is not possible to verify if an ind 1 ``` +The hardlink support process creates temporary files in the script location alongside `rebalance_db.txt` which are removed upon the end of each run. `files_list.txt` lists all files found in the given target location. `sorted_files_list.txt` lists all files sorted by inode number. `grouped_inodes.txt` lists all files by inode, but with all files from a given inode space separated on one line. + ## Prerequisites ### Balance Status @@ -83,6 +87,7 @@ chmod +x ./zfs-inplace-rebalancing.sh Dependencies: * `perl` - it should be available on most systems by default +* `awk` - it should be available on most systems by default ## Usage @@ -100,7 +105,7 @@ You can print a help message by running the script without any parameters: |-----------|-------------|---------| | `-c`
`--checksum` | Whether to compare attributes and content of the copied file using an **MD5** checksum. Technically this is a redundent check and consumes a lot of resources, so think twice. | `true` | | `-p`
`--passes` | The maximum number of rebalance passes per file. Setting this to infinity by using a value `<= 0` might improve performance when rebalancing a lot of small files. | `1` | -| `--skip-hardlinks` | Skip rebalancing hardlinked files, since it will only create duplicate data. | `false` | +| `--debug` | Shows additional output, including listing all files in the target location 3 times (list, inode sorted list, inode groupings) and more granular move/copy/link/count transaction information. | `false` | ### Example From 8f63fe0b29b349d9c0cfdf72a7a7afd69e70e0aa Mon Sep 17 00:00:00 2001 From: undaunt <31376520+undaunt@users.noreply.github.com> Date: Wed, 6 Nov 2024 13:07:39 -0800 Subject: [PATCH 3/7] Add additional hardlink group database notes --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 544fa11..ed88aff 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,8 @@ Since file attributes are fully retained, it is not possible to verify if an ind 1 ``` +All files in a given inode group will be added to the database when processed. The highest count in a given inode group of files will be used to determine if the group should be skipped when processing against the number of passes specified + The hardlink support process creates temporary files in the script location alongside `rebalance_db.txt` which are removed upon the end of each run. `files_list.txt` lists all files found in the given target location. `sorted_files_list.txt` lists all files sorted by inode number. `grouped_inodes.txt` lists all files by inode, but with all files from a given inode space separated on one line. ## Prerequisites From 1111a4027f4d6862808f086ff5346474e8278cae Mon Sep 17 00:00:00 2001 From: undaunt <31376520+undaunt@users.noreply.github.com> Date: Wed, 6 Nov 2024 13:08:08 -0800 Subject: [PATCH 4/7] typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ed88aff..b6e91bb 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Since file attributes are fully retained, it is not possible to verify if an ind 1 ``` -All files in a given inode group will be added to the database when processed. The highest count in a given inode group of files will be used to determine if the group should be skipped when processing against the number of passes specified +All files in a given inode group will be added to the database when processed. The highest count in a given inode group of files will be used to determine if the group should be skipped when processing against the number of passes in a given script execution. The hardlink support process creates temporary files in the script location alongside `rebalance_db.txt` which are removed upon the end of each run. `files_list.txt` lists all files found in the given target location. `sorted_files_list.txt` lists all files sorted by inode number. `grouped_inodes.txt` lists all files by inode, but with all files from a given inode space separated on one line. From 46c14fae53d28e71e50bd6bca913a32e51f61e73 Mon Sep 17 00:00:00 2001 From: undaunt <31376520+undaunt@users.noreply.github.com> Date: Wed, 6 Nov 2024 13:11:17 -0800 Subject: [PATCH 5/7] Flip default debug --- zfs-inplace-rebalancing.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zfs-inplace-rebalancing.sh b/zfs-inplace-rebalancing.sh index f20084b..a6af50f 100755 --- a/zfs-inplace-rebalancing.sh +++ b/zfs-inplace-rebalancing.sh @@ -26,7 +26,7 @@ Cyan='\033[0;36m' # Cyan # Print a help message function print_usage() { - echo "Usage: zfs-inplace-rebalancing.sh --checksum true --passes 1 --debug true /my/pool" + echo "Usage: zfs-inplace-rebalancing.sh --checksum true --passes 1 --debug false /my/pool" } # Print a given text entirely in a given color From 3ae15e565acacf54cf34612e54029c5a9c14c3fc Mon Sep 17 00:00:00 2001 From: undaunt <31376520+undaunt@users.noreply.github.com> Date: Wed, 6 Nov 2024 13:37:42 -0800 Subject: [PATCH 6/7] Fix space handling in paths --- zfs-inplace-rebalancing.sh | 52 +++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/zfs-inplace-rebalancing.sh b/zfs-inplace-rebalancing.sh index a6af50f..c968a0d 100755 --- a/zfs-inplace-rebalancing.sh +++ b/zfs-inplace-rebalancing.sh @@ -37,9 +37,9 @@ function color_echo () { } function get_rebalance_count () { - file_path=$1 + file_path="$1" - line_nr=$(grep -xF -n "${file_path}" "./${rebalance_db_file_name}" | head -n 1 | cut -d: -f1) + line_nr=$(grep -xF -n -e "${file_path}" "./${rebalance_db_file_name}" | head -n 1 | cut -d: -f1) if [ -z "${line_nr}" ]; then echo "0" return @@ -176,7 +176,7 @@ function process_inode_group() { if [ "${passes_flag}" -ge 1 ]; then # Update rebalance "database" for all files for path in "${paths[@]}"; do - line_nr=$(grep -xF -n "${path}" "./${rebalance_db_file_name}" | head -n 1 | cut -d: -f1) + line_nr=$(grep -xF -n -e "${path}" "./${rebalance_db_file_name}" | head -n 1 | cut -d: -f1) if [ -z "${line_nr}" ]; then rebalance_count=1 echo "${path}" >> "./${rebalance_db_file_name}" @@ -268,23 +268,19 @@ if [ "$debug_flag" = true ]; then cat sorted_files_list.txt fi -# Use awk to group paths by inode key +# Use awk to group paths by inode key and handle spaces in paths awk -F'|' '{ key = $1 path = $2 if (key == prev_key) { - paths = paths " " path + print "\t" path } else { if (NR > 1) { - print prev_key "|" paths + # Do nothing } + print key + print "\t" path prev_key = key - paths = path - } -} -END { - if (NR > 0) { - print prev_key "|" paths } }' sorted_files_list.txt > grouped_inodes.txt @@ -294,7 +290,7 @@ if [ "$debug_flag" = true ]; then fi # Count number of inode groups -file_count=$(wc -l < grouped_inodes.txt | tr -d ' ') +file_count=$(grep -cvP '^\t' grouped_inodes.txt) color_echo "$Cyan" " Number of files to process: ${file_count}" @@ -306,17 +302,31 @@ if [ "${passes_flag}" -ge 1 ]; then touch "./${rebalance_db_file_name}" fi -# Read grouped_inodes.txt and process each group -while IFS='|' read -r key paths; do - if [ "$debug_flag" = true ]; then - echo "Detected inode group: key=${key}" - echo "Paths:${paths}" +key="" +paths=() + +# Read grouped_inodes.txt line by line +while IFS= read -r line; do + if [[ "$line" == $'\t'* ]]; then + # This is a path line + path="${line#$'\t'}" + paths+=("$path") + else + # This is a new inode key + if [[ "${#paths[@]}" -gt 0 ]]; then + # Process the previous group + process_inode_group "${paths[@]}" + fi + key="$line" + paths=() fi - # Split the paths into an array - read -a path_array <<< "${paths}" - process_inode_group "${path_array[@]}" done < grouped_inodes.txt +# Process the last group after the loop ends +if [[ "${#paths[@]}" -gt 0 ]]; then + process_inode_group "${paths[@]}" +fi + # Clean up temporary files rm files_list.txt sorted_files_list.txt grouped_inodes.txt From d7c74441a59b71c6371c9a5da09a9303567ec0c5 Mon Sep 17 00:00:00 2001 From: undaunt <31376520+undaunt@users.noreply.github.com> Date: Wed, 6 Nov 2024 13:48:53 -0800 Subject: [PATCH 7/7] Fix echo bug Removed the 'recreating hardlinks' echo for inode groups of 1 file. --- zfs-inplace-rebalancing.sh | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/zfs-inplace-rebalancing.sh b/zfs-inplace-rebalancing.sh index c968a0d..2346aad 100755 --- a/zfs-inplace-rebalancing.sh +++ b/zfs-inplace-rebalancing.sh @@ -165,13 +165,16 @@ function process_inode_group() { fi mv "${tmp_file_path}" "${main_file}" - echo "Recreating hardlinks..." - for (( i=1; i<${#paths[@]}; i++ )); do - if [ "$debug_flag" = true ]; then - echo "Linking ${main_file} to ${paths[$i]}" - fi - ln "${main_file}" "${paths[$i]}" - done + # Only recreate hardlinks if there are multiple paths + if [ "${num_paths}" -gt 1 ]; then + echo "Recreating hardlinks..." + for (( i=1; i<${#paths[@]}; i++ )); do + if [ "$debug_flag" = true ]; then + echo "Linking ${main_file} to ${paths[$i]}" + fi + ln "${main_file}" "${paths[$i]}" + done + fi if [ "${passes_flag}" -ge 1 ]; then # Update rebalance "database" for all files