Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: split processor package into smaller files #2860

Merged
merged 1 commit into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions internal/reader/processor/bilibili.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package processor

import (
"encoding/json"
"fmt"
"log/slog"
"regexp"

"miniflux.app/v2/internal/config"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/reader/fetcher"
)

var (
bilibiliURLRegex = regexp.MustCompile(`bilibili\.com/video/(.*)$`)
bilibiliVideoIdRegex = regexp.MustCompile(`/video/(?:av(\d+)|BV([a-zA-Z0-9]+))`)
)

func shouldFetchBilibiliWatchTime(entry *model.Entry) bool {
if !config.Opts.FetchBilibiliWatchTime() {
return false
}
matches := bilibiliURLRegex.FindStringSubmatch(entry.URL)
urlMatchesBilibiliPattern := len(matches) == 2
return urlMatchesBilibiliPattern
}

func extractBilibiliVideoID(websiteURL string) (string, string, error) {
matches := bilibiliVideoIdRegex.FindStringSubmatch(websiteURL)
if matches == nil {
return "", "", fmt.Errorf("no video ID found in URL: %s", websiteURL)
}
if matches[1] != "" {
return "aid", matches[1], nil
}
if matches[2] != "" {
return "bvid", matches[2], nil
}
return "", "", fmt.Errorf("unexpected regex match result for URL: %s", websiteURL)
}

func fetchBilibiliWatchTime(websiteURL string) (int, error) {
requestBuilder := fetcher.NewRequestBuilder()
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())

idType, videoID, extractErr := extractBilibiliVideoID(websiteURL)
if extractErr != nil {
return 0, extractErr
}
bilibiliApiURL := fmt.Sprintf("https://api.bilibili.com/x/web-interface/view?%s=%s", idType, videoID)

responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(bilibiliApiURL))
defer responseHandler.Close()

if localizedError := responseHandler.LocalizedError(); localizedError != nil {
slog.Warn("Unable to fetch Bilibili API",
slog.String("website_url", websiteURL),
slog.String("api_url", bilibiliApiURL),
slog.Any("error", localizedError.Error()))
return 0, localizedError.Error()
}

var result map[string]interface{}
doc := json.NewDecoder(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
if docErr := doc.Decode(&result); docErr != nil {
return 0, fmt.Errorf("failed to decode API response: %v", docErr)
}

if code, ok := result["code"].(float64); !ok || code != 0 {
return 0, fmt.Errorf("API returned error code: %v", result["code"])
}

data, ok := result["data"].(map[string]interface{})
if !ok {
return 0, fmt.Errorf("data field not found or not an object")
}

duration, ok := data["duration"].(float64)
if !ok {
return 0, fmt.Errorf("duration not found or not a number")
}
intDuration := int(duration)
durationMin := intDuration / 60
if intDuration%60 != 0 {
durationMin++
}
return durationMin, nil
}
60 changes: 60 additions & 0 deletions internal/reader/processor/nebula.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package processor

import (
"errors"
"fmt"
"log/slog"
"regexp"
"strconv"

"github.com/PuerkitoBio/goquery"

"miniflux.app/v2/internal/config"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/reader/fetcher"
)

var nebulaRegex = regexp.MustCompile(`^https://nebula\.tv`)

func shouldFetchNebulaWatchTime(entry *model.Entry) bool {
if !config.Opts.FetchNebulaWatchTime() {
return false
}
matches := nebulaRegex.FindStringSubmatch(entry.URL)
return matches != nil
}

func fetchNebulaWatchTime(websiteURL string) (int, error) {
requestBuilder := fetcher.NewRequestBuilder()
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())

responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
defer responseHandler.Close()

if localizedError := responseHandler.LocalizedError(); localizedError != nil {
slog.Warn("Unable to fetch Nebula watch time", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
return 0, localizedError.Error()
}

doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
if docErr != nil {
return 0, docErr
}

durs, exists := doc.Find(`meta[property="video:duration"]`).First().Attr("content")
// durs contains video watch time in seconds
if !exists {
return 0, errors.New("duration has not found")
}

dur, err := strconv.ParseInt(durs, 10, 64)
if err != nil {
return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
}

return int(dur / 60), nil
}
60 changes: 60 additions & 0 deletions internal/reader/processor/odysee.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package processor

import (
"errors"
"fmt"
"log/slog"
"regexp"
"strconv"

"github.com/PuerkitoBio/goquery"

"miniflux.app/v2/internal/config"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/reader/fetcher"
)

var odyseeRegex = regexp.MustCompile(`^https://odysee\.com`)

func shouldFetchOdyseeWatchTime(entry *model.Entry) bool {
if !config.Opts.FetchOdyseeWatchTime() {
return false
}
matches := odyseeRegex.FindStringSubmatch(entry.URL)
return matches != nil
}

func fetchOdyseeWatchTime(websiteURL string) (int, error) {
requestBuilder := fetcher.NewRequestBuilder()
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())

responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
defer responseHandler.Close()

if localizedError := responseHandler.LocalizedError(); localizedError != nil {
slog.Warn("Unable to fetch Odysee watch time", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
return 0, localizedError.Error()
}

doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
if docErr != nil {
return 0, docErr
}

durs, exists := doc.Find(`meta[property="og:video:duration"]`).First().Attr("content")
// durs contains video watch time in seconds
if !exists {
return 0, errors.New("duration has not found")
}

dur, err := strconv.ParseInt(durs, 10, 64)
if err != nil {
return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
}

return int(dur / 60), nil
}
Loading
Loading