Skip to content

Commit d133a41

Browse files
committed
add rumble hls extractor
1 parent cd76394 commit d133a41

File tree

3 files changed

+137
-7
lines changed

3 files changed

+137
-7
lines changed

cmd/live-stream.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@ import (
99

1010
func newM3U8Cmd() *cobra.Command {
1111
var outputPath string
12+
var extract string
1213

1314
cmd := &cobra.Command{
14-
Use: "live-stream [URL] [--output OUTPUT_PATH]",
15+
Use: "live-stream [URL] [--output OUTPUT_PATH] [--extract EXTRACTOR]",
1516
Short: "Download HLS/M3U8 live streams",
1617
Aliases: []string{"hls", "m3u8", "livestream", "stream"},
1718
Args: cobra.ExactArgs(1),
@@ -25,12 +26,16 @@ func newM3U8Cmd() *cobra.Command {
2526
HTTPClientConfig: globalHTTPConfig,
2627
Metadata: make(map[string]any),
2728
}
29+
if extract != "" {
30+
job.Metadata["extract"] = extract
31+
}
2832
jobs := []utils.DanzoJob{job}
2933
log.Debug().Str("op", "cmd/live-stream").Msgf("Starting scheduler with %d jobs", len(jobs))
3034
scheduler.Run(jobs, workers)
3135
},
3236
}
3337

3438
cmd.Flags().StringVarP(&outputPath, "output", "o", "", "Output file path (default: stream_[timestamp].mp4)")
39+
cmd.Flags().StringVarP(&extract, "extract", "e", "", "Site-specific extractor to use (e.g., rumble)")
3540
return cmd
3641
}
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
package m3u8
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
"io"
7+
"maps"
8+
"net/http"
9+
"net/http/cookiejar"
10+
"regexp"
11+
"strings"
12+
13+
"github.com/rs/zerolog/log"
14+
"github.com/tanq16/danzo/internal/utils"
15+
)
16+
17+
// JSON response from Rumble embedJS endpoint
18+
type RumbleJSResponse struct {
19+
U struct {
20+
HLS struct {
21+
URL string `json:"url"`
22+
} `json:"hls"`
23+
} `json:"u"`
24+
UA struct {
25+
HLS map[string]struct {
26+
URL string `json:"url"`
27+
} `json:"hls"`
28+
} `json:"ua"`
29+
}
30+
31+
func runExtractor(job *utils.DanzoJob) error {
32+
extractor, _ := job.Metadata["extract"].(string)
33+
switch strings.ToLower(extractor) {
34+
case "rumble":
35+
return extractRumbleURL(job)
36+
default:
37+
return fmt.Errorf("unsupported extractor: %s", extractor)
38+
}
39+
}
40+
41+
func extractRumbleURL(job *utils.DanzoJob) error {
42+
log.Debug().Str("op", "live-stream/extractor").Msgf("Extracting Rumble URL from %s", job.URL)
43+
videoID, err := getRumbleVideoID(job.URL)
44+
if err != nil {
45+
return err
46+
}
47+
log.Debug().Str("op", "live-stream/extractor").Msgf("Found Rumble video ID: %s", videoID)
48+
m3u8URL, err := getRumbleM3U8FromVideoID(videoID, job.HTTPClientConfig)
49+
if err != nil {
50+
return err
51+
}
52+
job.URL = m3u8URL
53+
return nil
54+
}
55+
56+
func getRumbleVideoID(pageURL string) (string, error) {
57+
jar, _ := cookiejar.New(nil)
58+
client := &http.Client{Jar: jar}
59+
req, err := http.NewRequest("GET", pageURL, nil)
60+
if err != nil {
61+
return "", fmt.Errorf("failed to create request for rumble page: %w", err)
62+
}
63+
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36")
64+
req.Header.Set("Connection", "keep-alive")
65+
req.Header.Set("Upgrade-Insecure-Requests", "1")
66+
67+
resp, err := client.Do(req)
68+
if err != nil {
69+
return "", fmt.Errorf("failed to fetch rumble page: %w", err)
70+
}
71+
defer resp.Body.Close()
72+
body, err := io.ReadAll(resp.Body)
73+
if err != nil {
74+
return "", fmt.Errorf("failed to read rumble page body: %w", err)
75+
}
76+
77+
re := regexp.MustCompile(`"embedUrl":\s*"https://rumble\.com/embed/([^/"]+)/"`)
78+
// re := regexp.MustCompile(`https://rumble\.com/embed/([^&",/]*)`)
79+
matches := re.FindStringSubmatch(string(body))
80+
if len(matches) >= 2 {
81+
return matches[1], nil
82+
}
83+
return "", fmt.Errorf("could not find rumble video ID in page source")
84+
}
85+
86+
func getRumbleM3U8FromVideoID(videoID string, clientConfig utils.HTTPClientConfig) (string, error) {
87+
jsonURL := fmt.Sprintf("https://rumble.com/embedJS/u3/?request=video&ver=2&v=%s", videoID)
88+
newClientConfig := clientConfig
89+
newClientConfig.Headers = make(map[string]string)
90+
maps.Copy(newClientConfig.Headers, clientConfig.Headers)
91+
newClientConfig.Headers["Referer"] = "https://rumble.com/"
92+
client := utils.NewDanzoHTTPClient(newClientConfig)
93+
94+
req, err := http.NewRequest("GET", jsonURL, nil)
95+
if err != nil {
96+
return "", fmt.Errorf("failed to create request for rumble json: %w", err)
97+
}
98+
resp, err := client.Do(req)
99+
if err != nil {
100+
return "", fmt.Errorf("failed to fetch rumble json: %w", err)
101+
}
102+
defer resp.Body.Close()
103+
104+
var data RumbleJSResponse
105+
if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
106+
return "", fmt.Errorf("failed to decode rumble json: %w", err)
107+
}
108+
if data.U.HLS.URL != "" {
109+
return data.U.HLS.URL, nil
110+
}
111+
if auto, ok := data.UA.HLS["auto"]; ok && auto.URL != "" {
112+
return auto.URL, nil
113+
}
114+
return "", fmt.Errorf("could not find m3u8 url in rumble json response")
115+
}

internal/downloaders/live-stream/initial.go

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,28 @@ import (
1414
type M3U8Downloader struct{}
1515

1616
func (d *M3U8Downloader) ValidateJob(job *utils.DanzoJob) error {
17-
parsedURL, err := url.Parse(job.URL)
18-
if err != nil {
19-
return fmt.Errorf("invalid URL: %v", err)
20-
}
21-
if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" {
22-
return fmt.Errorf("unsupported scheme: %s", parsedURL.Scheme)
17+
// Validation happens after chunklist URL extraction (if)
18+
if _, ok := job.Metadata["extract"]; !ok {
19+
parsedURL, err := url.Parse(job.URL)
20+
if err != nil {
21+
return fmt.Errorf("invalid URL: %v", err)
22+
}
23+
if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" {
24+
return fmt.Errorf("unsupported scheme: %s", parsedURL.Scheme)
25+
}
2326
}
2427
log.Info().Str("op", "live-stream/initial").Msgf("job validated for %s", job.URL)
2528
return nil
2629
}
2730

2831
func (d *M3U8Downloader) BuildJob(job *utils.DanzoJob) error {
32+
if extractor, ok := job.Metadata["extract"].(string); ok && extractor != "" {
33+
log.Info().Str("op", "live-stream/initial").Msgf("Using extractor: %s", extractor)
34+
if err := runExtractor(job); err != nil {
35+
return fmt.Errorf("extractor failed: %v", err)
36+
}
37+
log.Info().Str("op", "live-stream/initial").Msgf("URL extracted: %s", job.URL)
38+
}
2939
if job.OutputPath == "" {
3040
job.OutputPath = fmt.Sprintf("stream_%s.mp4", time.Now().Format("2006-01-02_15-04"))
3141
}

0 commit comments

Comments
 (0)