Skip to content

Commit

Permalink
refactor: split processor package into smaller files
Browse files Browse the repository at this point in the history
  • Loading branch information
fguillot committed Sep 23, 2024
1 parent c2ac2bf commit cfe410f
Show file tree
Hide file tree
Showing 7 changed files with 351 additions and 271 deletions.
92 changes: 92 additions & 0 deletions internal/reader/processor/bilibili.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package processor

import (
"encoding/json"
"fmt"
"log/slog"
"regexp"

"miniflux.app/v2/internal/config"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/reader/fetcher"
)

var (
bilibiliURLRegex = regexp.MustCompile(`bilibili\.com/video/(.*)$`)
bilibiliVideoIdRegex = regexp.MustCompile(`/video/(?:av(\d+)|BV([a-zA-Z0-9]+))`)
)

func shouldFetchBilibiliWatchTime(entry *model.Entry) bool {
if !config.Opts.FetchBilibiliWatchTime() {
return false
}
matches := bilibiliURLRegex.FindStringSubmatch(entry.URL)
urlMatchesBilibiliPattern := len(matches) == 2
return urlMatchesBilibiliPattern
}

func extractBilibiliVideoID(websiteURL string) (string, string, error) {
matches := bilibiliVideoIdRegex.FindStringSubmatch(websiteURL)
if matches == nil {
return "", "", fmt.Errorf("no video ID found in URL: %s", websiteURL)
}
if matches[1] != "" {
return "aid", matches[1], nil
}
if matches[2] != "" {
return "bvid", matches[2], nil
}
return "", "", fmt.Errorf("unexpected regex match result for URL: %s", websiteURL)
}

func fetchBilibiliWatchTime(websiteURL string) (int, error) {
requestBuilder := fetcher.NewRequestBuilder()
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())

idType, videoID, extractErr := extractBilibiliVideoID(websiteURL)
if extractErr != nil {
return 0, extractErr
}
bilibiliApiURL := fmt.Sprintf("https://api.bilibili.com/x/web-interface/view?%s=%s", idType, videoID)

responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(bilibiliApiURL))
defer responseHandler.Close()

if localizedError := responseHandler.LocalizedError(); localizedError != nil {
slog.Warn("Unable to fetch Bilibili API",
slog.String("website_url", websiteURL),
slog.String("api_url", bilibiliApiURL),
slog.Any("error", localizedError.Error()))
return 0, localizedError.Error()
}

var result map[string]interface{}
doc := json.NewDecoder(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
if docErr := doc.Decode(&result); docErr != nil {
return 0, fmt.Errorf("failed to decode API response: %v", docErr)
}

if code, ok := result["code"].(float64); !ok || code != 0 {
return 0, fmt.Errorf("API returned error code: %v", result["code"])
}

data, ok := result["data"].(map[string]interface{})
if !ok {
return 0, fmt.Errorf("data field not found or not an object")
}

duration, ok := data["duration"].(float64)
if !ok {
return 0, fmt.Errorf("duration not found or not a number")
}
intDuration := int(duration)
durationMin := intDuration / 60
if intDuration%60 != 0 {
durationMin++
}
return durationMin, nil
}
60 changes: 60 additions & 0 deletions internal/reader/processor/nebula.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package processor

import (
"errors"
"fmt"
"log/slog"
"regexp"
"strconv"

"github.com/PuerkitoBio/goquery"

"miniflux.app/v2/internal/config"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/reader/fetcher"
)

var nebulaRegex = regexp.MustCompile(`^https://nebula\.tv`)

func shouldFetchNebulaWatchTime(entry *model.Entry) bool {
if !config.Opts.FetchNebulaWatchTime() {
return false
}
matches := nebulaRegex.FindStringSubmatch(entry.URL)
return matches != nil
}

func fetchNebulaWatchTime(websiteURL string) (int, error) {
requestBuilder := fetcher.NewRequestBuilder()
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())

responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
defer responseHandler.Close()

if localizedError := responseHandler.LocalizedError(); localizedError != nil {
slog.Warn("Unable to fetch Nebula watch time", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
return 0, localizedError.Error()
}

doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
if docErr != nil {
return 0, docErr
}

durs, exists := doc.Find(`meta[property="video:duration"]`).First().Attr("content")
// durs contains video watch time in seconds
if !exists {
return 0, errors.New("duration has not found")
}

dur, err := strconv.ParseInt(durs, 10, 64)
if err != nil {
return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
}

return int(dur / 60), nil
}
60 changes: 60 additions & 0 deletions internal/reader/processor/odysee.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package processor

import (
"errors"
"fmt"
"log/slog"
"regexp"
"strconv"

"github.com/PuerkitoBio/goquery"

"miniflux.app/v2/internal/config"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/reader/fetcher"
)

var odyseeRegex = regexp.MustCompile(`^https://odysee\.com`)

func shouldFetchOdyseeWatchTime(entry *model.Entry) bool {
if !config.Opts.FetchOdyseeWatchTime() {
return false
}
matches := odyseeRegex.FindStringSubmatch(entry.URL)
return matches != nil
}

func fetchOdyseeWatchTime(websiteURL string) (int, error) {
requestBuilder := fetcher.NewRequestBuilder()
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())

responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
defer responseHandler.Close()

if localizedError := responseHandler.LocalizedError(); localizedError != nil {
slog.Warn("Unable to fetch Odysee watch time", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
return 0, localizedError.Error()
}

doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
if docErr != nil {
return 0, docErr
}

durs, exists := doc.Find(`meta[property="og:video:duration"]`).First().Attr("content")
// durs contains video watch time in seconds
if !exists {
return 0, errors.New("duration has not found")
}

dur, err := strconv.ParseInt(durs, 10, 64)
if err != nil {
return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
}

return int(dur / 60), nil
}
Loading

0 comments on commit cfe410f

Please sign in to comment.