From 9c3a2167d1ba08d663102823e16343299769ee14 Mon Sep 17 00:00:00 2001 From: Mike Penner Date: Tue, 16 Aug 2016 10:40:18 -0400 Subject: [PATCH] Review and Improved --- config.yml | 4 +- src/Config.php | 43 ++++++----- src/Tvscraper.php | 176 +++++++++++++++++++++++++++++++++++----------- 3 files changed, 162 insertions(+), 61 deletions(-) diff --git a/config.yml b/config.yml index e7be0af..0e201f7 100644 --- a/config.yml +++ b/config.yml @@ -1,8 +1,8 @@ firstRun: false lu: '36648' tz: America/Toronto -startTime: '0' -endTime: '0' +startTime: 0 +endTime: 0 channelNumber: true subChannelNumber: true callsign: false diff --git a/src/Config.php b/src/Config.php index 6862b20..9f16343 100644 --- a/src/Config.php +++ b/src/Config.php @@ -1,18 +1,33 @@ "1", @@ -75,12 +90,11 @@ public function __construct() { } - $this->saveSettings($config); + $this->saveSettings($this->config); } - public - function saveSetting($setting, $value) { + public function saveSetting($setting, $value) { foreach ($this->config as $k => $v) { if ($k == $setting) { $this->config[$setting] = $value; @@ -89,21 +103,18 @@ function saveSetting($setting, $value) { file_put_contents(__DIR__ . '/../config.yml', Yaml::dump($this->config)); } - public - function saveSettings(&$config) { + public function saveSettings($config) { foreach ((array) $config as $k => $v) { $this->config[$k] = $v; } file_put_contents(__DIR__ . '/../config.yml', Yaml::dump($this->config)); } - public - function getSetting($setting) { + public function getSetting($setting) { return $this->config[$setting]; } - public - function getSettings() { + public function getSettings() { return $this->config; } } \ No newline at end of file diff --git a/src/Tvscraper.php b/src/Tvscraper.php index 2ada869..eca5d42 100644 --- a/src/Tvscraper.php +++ b/src/Tvscraper.php @@ -3,40 +3,72 @@ require_once('Config.php'); -use Symfony\Component\Yaml\Yaml; - -/* tvscraper - tvpassport tv schedule scraper +/** + * tvscraper - tvpassport tv schedule scraper + * + * @author module17 + * @author Fenrisulfir * - * Hard-coded to use luid 41501 - Rogers cable Toronto - Digital adapter + * @package tvscraper * - * ie. http://tvpassport.com/tvgrid.shtml?luid=41501&st=1465584150&sch=15&size=1&tzo=-5&dsto=1#x + * @version 2.01 * */ class Tvscraper { + /** + * DEBUGGING FLAGS + * @var boolean $debugLineupNames Display Broadcast Provider Array + * @var boolean $debugPostalCode Display Postal Code and URL + * @var boolean $debugUsingParams Display Params Used To Generate ShowData + * @var boolean $debugEndpoint Display ShowData URL + * @var boolean $debugDataPattern Display ShowData Array + * + * MEMBER FIELDS + * @var string $base_url TVPassport URL + * @var string $schedule_url ShowData URL + * @var string $provider_url BroadCast Provider URL + * @var Config $config Configuration Array + * @var string[] $showData ShowData Array + * + * METHODS + * @method __construct() + * @method void outputBanner() + * @method void run() + * @method void setTimeZone() + * @method void setDisplayOptions() + * @method mixed validateInput(string $input, FILTER_TYPE $filter) + * @method void getLineups() + * @method void getSchedule(string $code, string $timezone) + * @method void buildPattern(string $pattern, string &$html) + * @method string fetchHTML(string $url, string $method, string[] $post_fields) + * + */ public $debugLineupNames = false; public $debugPostalCode = true; public $debugUserParams = true; public $debugEndPoint = true; - public $debugDataPattern = false; - public $base_url = 'http://www.tvpassport.com/'; - public $schedule_url = 'tvlistings/tvlistings/listings'; - public $provider_search_url = 'index.php/lineups'; - public $postal_code = ''; - public $timeZone = ''; + public $debugDataPattern = true; + + public $baseUrl = 'http://www.tvpassport.com/'; + public $scheduleUrl = 'tvlistings/tvlistings/listings'; + public $providerSearchUrl = 'index.php/lineups'; protected $config; protected $showData = array(); - /* - * @param $config array of configuration settings + /** + * Constructorizationify */ public function __construct() { $this->config = new Config(); $this->outputBanner(); } + /** + * Claim this console output in the name of tvscraper + */ public function outputBanner() { echo <<validateInput($input, FILTER_VALIDATE_BOOLEAN) || $this->config->getSetting("firstRun")) { @@ -63,20 +102,18 @@ public function run() { } - public function setTimezone() { - $tz = readline('Enter your timezone (ie, America/Toronto): '); - - $timeZoneList = explode(PHP_EOL, file_get_contents(__DIR__ . '/../timezones.yml')); - - foreach ($timeZoneList as $timeZone) { - if ($timeZone === $tz) { - $config['tz'] = $tz; - $this->config->saveSettings($config); - break; - } - } - } - + /** + * Validate user input + * + * @param mixed $input User input data to be validated + * @param filter $filter Type of filter used to validate input + * + * @return mixed NULL if validation fails + * + * @todo sanitize input + * @todo handle different data types + * @todo pass all input through here + */ public function validateInput($input, $filter) { if ($filter === FILTER_VALIDATE_BOOLEAN) { @@ -95,6 +132,32 @@ public function validateInput($input, $filter) { } + /** + * Reads user input and searches a precompiled list to select the appropriate IANA timezone + * + * @return void + * + * @todo create greedy search function for timezone cuz I'm lazy + */ + public function setTimezone() { + $tz = readline('Enter your timezone (ie, America/Toronto): '); + + $timeZoneList = explode(PHP_EOL, file_get_contents(__DIR__ . '/../timezones.yml')); + + foreach ($timeZoneList as $timeZone) { + if ($timeZone === $tz) { + $config['tz'] = $tz; + $this->config->saveSettings($config); + break; + } + } + } + + /** + * Interactively allows the user to choose which data points to display and saves each option to an array + * + * @return void + */ public function setDisplayOptions() { $count = 0; foreach ($this->config->getSettings() as $k => $v) { @@ -114,17 +177,20 @@ public function setDisplayOptions() { } ++$count; } - - $this->config->saveSettings($config); } + /** + * Gets a list of broadcast providers based on the users postal code. + * + * @throws \Exception + */ public function getLineups() { $postalCode = readline('Enter your postal/zip code: '); if ($postalCode && preg_match('/^[0-9A-Za-z]{5,6}$/', $postalCode)) { if ($this->debugPostalCode) { - echo sprintf(PHP_EOL . 'POST %s to %s%s' . PHP_EOL . PHP_EOL, $postalCode, $this->base_url, $this->provider_search_url); + echo sprintf(PHP_EOL . 'POST %s to %s%s' . PHP_EOL . PHP_EOL, $postalCode, $this->baseUrl, $this->providerSearchUrl); } - $url = sprintf('%s%s', $this->base_url, $this->provider_search_url); + $url = sprintf('%s%s', $this->baseUrl, $this->providerSearchUrl); $html = $this->fetchHTML($url, 'POST', array('postalCode' => $postalCode)); if ($html) { @@ -134,6 +200,7 @@ public function getLineups() { if ($this->debugLineupNames) { var_dump($lineupNames); } + if (sizeof($lineupNames[0]) < 1) { echo sprintf( 'Cannot find any service area providers for postal/zip code: %s' . PHP_EOL . PHP_EOL, @@ -169,11 +236,25 @@ public function getLineups() { } } + /** + * Uses the broadcast code and timezone to get all of the shows currently playing. + * + * @param int $code The code used to represent the Broadcast Provider Lineup + * @param string $timezone The timezone + * + * @return void + * + * @todo build the regex pattern more gracefully + * @todo figure out how to nicely display potentially all the data + * @todo sanitize the data before displaying it + * @todo display data headings + * @todo handle missing data causing different sized arrays + */ public function getSchedule($code, $timezone) { $params = array( 'lu' => $code, 'st' => time(), // start timestamp, if null automatically starts at most recent hour mark - 'et' => time(), + 'et' => time(), //end timestamp, set to same as st to find only currently playing shows 'tz' => $timezone ); @@ -182,7 +263,7 @@ public function getSchedule($code, $timezone) { var_dump($params); } - $url = $this->base_url . $this->schedule_url; + $url = $this->baseUrl . $this->scheduleUrl; if ($this->debugEndPoint) { echo sprintf(PHP_EOL . 'ENDPOINT: %s' . PHP_EOL . PHP_EOL, $url); } @@ -428,7 +509,7 @@ public function getSchedule($code, $timezone) { //Number of shows for ($i = 0; $i < sizeof($this->showData[0][0]); ++$i) { - //Number of columns + //Number of data points for ($j = 0; $j < $count; ++$j) { echo sprintf("%s\t", $this->showData[$j][1][$i]); } @@ -437,22 +518,31 @@ public function getSchedule($code, $timezone) { } + /** + * Scrapes the data from the HTML and pushes it into the showData array + * + * @param string $pattern Regex pattern for each data point + * @param string $html The scraped html + * + * @return void + * + * @todo stop adding raw data to showData array + */ public - function buildPattern($pattern, &$html) { + function buildPattern($pattern, $html) { preg_match_all('/' . $pattern . '/ms', $html, $arr); array_push($this->showData, $arr); } - public - function timeRemaining($showET) { - return round(($showET - time()) / 60); - } - - /* - * @param $url string URL to fetch + /** + * @param string $url URL to fetch + * @param string $method 'POST' or 'GET' + * @param mixed[] $post_fields + * + * @return string $html Response from URL endpoint * - * @return $html string Response from URL endpoint + * @todo handle invalid method */ public function fetchHTML($url, $method = 'GET', $post_fields = array()) {