-zcrasis-process-buffer

# This file is double-licensed under GPLv3 and MIT (see LICENSE file)

# Input:
# $1 - text to process
#
# Output:
# ZCR_PB_WORDS - split of "$1" into shell words; array
# ZCR_PB_WORDS_BEGINNINGS - indexes of first letters of corresponding words in ZCR_PB_WORDS
# ZCR_PB_SPACES - white spaces before corresponding words in ZCR_PB_WORDS
# ZCR_PB_ALL - spaces and words, together
#

emulate -LR zsh
setopt typesetsilent extendedglob noshortloops

local -a mbegin mend match
local MATCH; integer MBEGIN MEND

local buf="$1"
local len="${#buf}"

# All output variables are either overwritten or cleared
ZCR_PB_WORDS=( "${(Z+cn+)buf}" )
ZCR_PB_SPACES=()
ZCR_PB_WORDS_BEGINNINGS=()
ZCR_PB_ALL=()

# (Z+n+) will return 1 element for buf that is empty or only whitespace
if [[ "$buf" = ( |$'\t')# ]]; then
    ZCR_PB_WORDS=()
    integer nwords=0
else
    integer nwords="${#ZCR_PB_WORDS}"
fi

-zcrasis_dbg_msg "Processing #$nwords tokens"

# Remove ZCR_PB_WORDS one by one, counting characters,
# computing beginning of each word, to find
# place to break the word into 2 halves (for
# complete_in_word option)

local i word wordlen text text_len text2 text2_len tmp_buf search='\\'$'\n' search2='\\('$'\n|(#e))'
integer bslen start found found2
integer char_count=0

for (( i=1; i<=nwords; i++ )); do
    # Remove spurious space generated by Z-flag when
    # input is an unbound '$(' (happens with zsh < 5.1)
    # and also real spaces gathered by an unbound '$(',
    # to handle them in a way normal to this loop
    ZCR_PB_WORDS[i]="${ZCR_PB_WORDS[i]%% ##}"
    word="${ZCR_PB_WORDS[i]}"
    wordlen="${#word}"

    # Remove white spaces
    buf="${buf##(#m)[^$word[1]]#}"

    # Detect text modification via backslash at end of line.
    # When in string, such backslash is removed and lines
    # concatenated into single token
    start=0
    # +2 to detect a trailing \$'\n' removal in case of unquoted
    # trailing backslash and then an empty line
    bslen=wordlen+2
    tmp_buf="$word"
    while (( 1 )); do
        # start+1+bslen-1 - position 1 shifted by `start',
        # follows text long for `bslen' characters
        text="${buf[start+1,start+1+bslen-1]}"
        text_len="${#text}"
        text2="${tmp_buf[start+1,start+1+bslen-1]}"
        text2_len="${#text2}"
        found="${text[(i)$search]}"
        # Found \$'n' ?
        (( found && found <= text_len )) && {
            found2="${text2[(i)${~search2}]}"
            # The \$'\n' not missing?
            if (( found2 == 0 || found2 > text2_len || found2 != found )); then
                -zcrasis_dbg_msg "Complex zshrc construct [len:$text_len]: $text"

                # +2 added before this loop to fully compensate
                # 2-characters long \$'\n'
                word="${buf[1,start+1+bslen-1]}"
                ZCR_PB_WORDS[i]="$word"

                # Start is 0-based, so -1
                start+=found+2-1

                # After processing of single \$'\n' we should add 2
                # characters into the bslen, as the two extra chars
                # have been now detected.
                # Subtracting found+1 subtracts the \$'\n' that are
                # found:
                #  tttttFFttttt, bslen=12, found=6
                #         ttttt, correct bslen=12-7
                # Initial +2 added to wordlen constantly holds.
                bslen=bslen-(found+1)+2
            else
                -zcrasis_dbg_msg "False alarm for [len:$text2_len]: $text2"
                start+=found+2-1
                bslen=bslen-(found+1)+2
            fi
        } || {
            wordlen="${#word}"
            if [[ "$start" -gt 0 ]]; then
                -zcrasis_dbg_msg "Fixed expression [len:$wordlen]: $word"
                -zcrasis_dbg_msg ""
            fi
            break
        }
    done

    # In general, $buf can start with white spaces
    # We will not search for them, but instead for
    # leading character of current shell word,
    # negated. This is an ambition to completely
    # avoid character classes

    # Count them
    char_count=char_count+"${#MATCH}"
    # This is the beginning of current word
    ZCR_PB_WORDS_BEGINNINGS[i]=$(( char_count + 1 ))
    # Remember the spaces
    ZCR_PB_SPACES[i]="$MATCH"

    # Store full data
    ZCR_PB_ALL+=( "$MATCH" "$word" )

    # Remove the word
    buf="${buf[wordlen+1,len]}"
done 

# What's left in $buf can be only white spaces
ZCR_PB_SPACES[i]="$buf"
ZCR_PB_ALL+=( "$buf" )

return 0

# vim:ft=zsh