diff --git a/README.md b/README.md index 1211183..5792a69 100644 --- a/README.md +++ b/README.md @@ -8,14 +8,14 @@ This script is ideal for: - *Anyone* who wishes to study from the conference talks but doesn't have a reliable internet connection. ## What it can do? -This script will allow you to download the LDS General Conference talks in mp3 form that are available at https://www.lds.org/general-conference. +This script will allow you to download the LDS General Conference talks in mp3 form that are available at https://www.churchofjesuschrist.org/general-conference. It will create *playlists* as *.m3u files to allow you to play an *entire session*. It will also create playlists for *speakers* and *topcs*. This will not only work with the default English versions, but also for *every other language* for which audio files are available. Currently, hundreds of talks are available in many languages, going back as far as 1971 for some. ## How does it work? -It will programmatically navigate the lds.org website, downloading and organising every talk of interest to you. +It will programmatically navigate the churchofjesuschrist.org website, downloading and organising every talk of interest to you. Everything will be saved to a local folder of your choice. Load these files onto a *memory stick* for your car, or into your *favourite media player*. @@ -31,12 +31,13 @@ Load these files onto a *memory stick* for your car, or into your *favourite med |Argument|Values|Meaning| |--------|------|-------| |`-h` or `--help`| |List all arguments and exit| -|`-l` or `-lang`| 3-letter language code|Indicates which language version is to be downloaded. See https://www.lds.org/languages for full list. Click on the language you want, then take note of the 3-letter code in the address bar. i.e. https://www.lds.org/?lang=*spa*| +|`-l` or `-lang`| 3-letter language code|Indicates which language version is to be downloaded. See https://www.churchofjesuschrist.org/languages for full list. Click on the language you want, then take note of the 3-letter code in the address bar. i.e. https://www.churchofjesuschrist.org/?lang=*spa*| |`-s` or `-start`|Year as 4 digit number|First year of conference to download. Defaults to 1971. _Note: not all historic sessions are available in all languages_| -|`-e` or `-end`|Year as 4 digit number|Last year to download (defaults to present year).| +|`-e` or `-end`|Year as 4 digit number|Last year to download (defaults to 2100).| |`-d` or `-dest`|folder relative to here. i.e. `./conference`|Destination folder to output files to. Defaults to `output`| |`-n` or `-nocleanup`| |Leaves temporary files after process completion.| |`-v` or `-verbose`| |Provides detailed activity logging instead of progress bars.| +|`-nonumbers`| |Excludes generated session and talk numbers from file and directory names.| _Note: Depending upon how many years worth of conferences you ask it to download, it may take some time!_ @@ -48,8 +49,9 @@ output └───Conferences │ └───2018 │ │ └───4 - │ │ │ Priesthood Session.m3u - │ │ │ Saturday Morning Session.m3u + │ │ │ 10-Saturday Morning Session.m3u + │ │ │ 20-Saturday Afternoon Session.m3u + │ │ │ 30-Priesthood Session.m3u │ │ │ ... │ │ └───10 │ │ ... @@ -58,11 +60,13 @@ output └───MP3 │ └───2018 │ │ └───4 - │ │ │ └───Priesthood Session - │ │ │ │ Am I a Child of God? (Brian K. Taylor).mp3 - │ │ │ │ Even as Christ Forgives You, So Also Do Ye (Larry J. Echo Hawk).mp3 + │ │ │ └───10-Saturday Morning Session │ │ │ │ ... - │ │ │ └───Saturday Morning Session + │ │ │ └───20-Saturday Afternoon Session + │ │ │ │ ... + │ │ │ └───30-Priesthood Session + │ │ │ │ 31 Am I a Child of God? (Brian K. Taylor).mp3 + │ │ │ │ 32 Even as Christ Forgives You, So Also Do Ye (Larry J. Echo Hawk).mp3 │ │ │ │ ... │ │ └───10 │ └───2017 @@ -76,4 +80,4 @@ output Atonement(6, 1h4m).m3u ... ``` -The playlists for the Topics and Speakers include in parenthesis the number of talks and the total duration. \ No newline at end of file +The playlists for the Topics and Speakers include in parenthesis the number of talks and the total duration. diff --git a/conference_headers.json b/conference_headers.json index c923598..060aea3 100644 --- a/conference_headers.json +++ b/conference_headers.json @@ -4,7 +4,7 @@ "Accept-Language": "en-US,en;q=0.5", "Cache-Control": "max-age=0", "Connection": "keep-alive", - "Host": "www.lds.org", + "Host": "www.churchofjesuschrist.org", "Upgrade-Insecure-Requests": "1", "User-Agent": "General Conference Downloader" } diff --git a/gen_conf_downloader.py b/gen_conf_downloader.py index 4938955..36a20f8 100644 --- a/gen_conf_downloader.py +++ b/gen_conf_downloader.py @@ -3,11 +3,13 @@ """ import html as html_tools +import io import json import os import re import shutil import sys +from html.parser import HTMLParser from urllib.parse import unquote_plus from urllib.parse import quote_plus import urllib.request @@ -17,8 +19,8 @@ from tqdm import tqdm Season = namedtuple('Season', 'link year month title') -Session = namedtuple('Session', 'html title season') -Talk = namedtuple('Talk', 'link speaker title session') +Session = namedtuple('Session', 'html title number season') +Talk = namedtuple('Talk', 'link speaker title number session') speakers_num = defaultdict(int) topics_num = defaultdict(int) @@ -32,11 +34,11 @@ AUDIO_DUR = 'MP3' PLAYLIST_FILE_EXT = 'm3u' -LDS_ORG_URL = 'https://www.lds.org' +LDS_ORG_URL = 'https://www.churchofjesuschrist.org' ALL_CONFERENCES_URL = f'{LDS_ORG_URL}/general-conference/conferences' GET_SESSION_TITLE_REGEX = '(.*?)' -TALK_LINK_REGEX = '' +TALK_LINK_REGEX = '.*?