Skip to content

Commit

Permalink
Implement function to prune string keeping HTML closing tags
Browse files Browse the repository at this point in the history
  • Loading branch information
aliksend committed Dec 17, 2024
1 parent c3ba55b commit f5283cc
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 1 deletion.
113 changes: 112 additions & 1 deletion backend/app/notify/telegram.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,17 @@ package notify
import (
"context"
"fmt"
"strings"
"time"

log "github.com/go-pkgz/lgr"
ntf "github.com/go-pkgz/notify"
"github.com/hashicorp/go-multierror"
"golang.org/x/net/html"
)

const comment_text_length_limit = 100

// TelegramParams contain settings for telegram notifications
type TelegramParams struct {
AdminChannelID string // unique identifier for the target chat or username of the target channel (in the format @channelusername)
Expand Down Expand Up @@ -75,6 +79,113 @@ func (t *Telegram) Send(ctx context.Context, req Request) error {
return result.ErrorOrNil()
}

type stringArr struct {
data []string
len int
}

// Push adds element to the end
func (s *stringArr) Push(v string) {
s.data = append(s.data, v)
s.len += len(v)
}

// Pop removes element from end and returns it
func (s *stringArr) Pop() string {
l := len(s.data)
newData, v := s.data[:l-1], s.data[l-1]
s.data = newData
s.len -= len(v)
return v
}

// Unshift adds element to the start
func (s *stringArr) Unshift(v string) {
s.data = append([]string{v}, s.data...)
s.len += len(v)
}

// Shift removes element from start and returns it
func (s *stringArr) Shift() string {
v, newData := s.data[0], s.data[1:]
s.data = newData
s.len -= len(v)
return v
}

// String returns all strings concatenated
func (s stringArr) String() string {
return strings.Join(s.data, "")
}

// Len returns total length of all strings concatenated
func (s stringArr) Len() int {
return s.len
}

// pruneHTML prunes string keeping HTML closing tags
func pruneHTML(htmlText string, maxLength int) string {
result := stringArr{}
endTokens := stringArr{}

suffix := "..."
suffixLen := len(suffix)

tokenizer := html.NewTokenizer(strings.NewReader(htmlText))
for {
if tokenizer.Next() == html.ErrorToken {
return result.String()
}
token := tokenizer.Token()

switch token.Type {
case html.CommentToken, html.DoctypeToken:
// skip tokens without content
continue

case html.StartTagToken:
// <token></token>
// len(token) * 2 + len("<></>")
totalLenToAppend := len(token.Data)*2 + 5
if result.Len()+totalLenToAppend+endTokens.Len()+suffixLen > maxLength {
return result.String() + suffix + endTokens.String()
}
endTokens.Unshift(fmt.Sprintf("</%s>", token.Data))

case html.EndTagToken:
endTokens.Shift()

case html.TextToken, html.SelfClosingTagToken:
if result.Len()+len(token.String())+endTokens.Len()+suffixLen > maxLength {
text := pruneStringToWord(token.String(), maxLength-result.Len()-endTokens.Len()-suffixLen)
return result.String() + text + suffix + endTokens.String()
}
}

result.Push((token.String()))
}
}

// pruneStringToWord prunes string to specified length respecting words
func pruneStringToWord(text string, maxLength int) string {
if maxLength <= 0 {
return ""
}

result := ""

arr := strings.Split(text, " ")
for _, s := range arr {
if len(result)+len(s) >= maxLength {
return strings.TrimRight(result, " ")
}
// keep last space, it's ok
result += s + " "
}

return text
}

// buildMessage generates message for generic notification about new comment
func (t *Telegram) buildMessage(req Request) string {
commentURLPrefix := req.Comment.Locator.URL + uiNav
Expand All @@ -85,7 +196,7 @@ func (t *Telegram) buildMessage(req Request) string {
msg += fmt.Sprintf(" -> <a href=%q>%s</a>", commentURLPrefix+req.parent.ID, ntf.EscapeTelegramText(req.parent.User.Name))
}

msg += fmt.Sprintf("\n\n%s", ntf.TelegramSupportedHTML(req.Comment.Text))
msg += fmt.Sprintf("\n\n%s", pruneHTML(ntf.TelegramSupportedHTML(req.Comment.Text), comment_text_length_limit))

if req.Comment.ParentID != "" {
msg += fmt.Sprintf("\n\n\"<i>%s</i>\"", ntf.TelegramSupportedHTML(req.parent.Text))
Expand Down
9 changes: 9 additions & 0 deletions backend/app/notify/telegram_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,15 @@ some text
<b>Hello</b><i><b>World</b></i>`,
res)

// prune string keeping HTML closing tags
c = store.Comment{
Text: "<b>Lorem ipsum <i>dolor sit amet</i>, consectetur adipiscing <code>elit, sed do eiusmod tempor incididunt</code> ut labore et dolore magna aliqua.</b>",
}
res = tb.buildMessage(Request{Comment: c})
assert.Equal(t, `<a href="#remark42__comment-"></a>
<b>Lorem ipsum <i>dolor sit amet</i>, consectetur adipiscing <code>elit, sed do...</code></b>`, res)
}

func TestTelegram_SendVerification(t *testing.T) {
Expand Down

0 comments on commit f5283cc

Please sign in to comment.