-
Notifications
You must be signed in to change notification settings - Fork 8
Fixed an issue from PR #93 #100
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 8 commits
21c9acc
07528e3
0ebfc8e
10c5114
23e5f7a
f006df3
1682b8d
e1abfc5
6ce10cb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| <?php | ||
| /** | ||
| * @file | ||
| * HTML parser. | ||
| */ | ||
|
|
||
| /** | ||
| * Class PPGetStatHTML. | ||
| */ | ||
| class PPGetStatHTML { | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Needed to parse HTML using XPath. Works faster then regexps.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Understood. Thx. |
||
|
|
||
| /** | ||
| * @var bool | ||
| */ | ||
| private $libxmlState; | ||
| /** | ||
| * @var DOMDocument | ||
| */ | ||
| private $document; | ||
| /** | ||
| * @var DOMXPath | ||
| */ | ||
| private $xpath; | ||
|
|
||
| /** | ||
| * PPGetStatHTML constructor. | ||
| * | ||
| * @param string $content | ||
| * HTML content. | ||
| */ | ||
| public function __construct($content) { | ||
| $this->document = new DOMDocument(); | ||
| // Handle errors/warnings and don't mess up output of your script. | ||
| // @see http://stackoverflow.com/a/17559716 | ||
| $this->libxmlState = libxml_use_internal_errors(true); | ||
| $this->document->loadHTML($content); | ||
| } | ||
|
|
||
| /** | ||
| * Clear XML library errors. | ||
| */ | ||
| public function __destruct() { | ||
| libxml_clear_errors(); | ||
| libxml_use_internal_errors($this->libxmlState); | ||
| } | ||
|
|
||
| /** | ||
| * Make XPath query to the document object. | ||
| * | ||
| * @return DOMXPath | ||
| * XPath object to perform queries. | ||
| */ | ||
| public function xpath() { | ||
| if (NULL === $this->xpath) { | ||
| $this->xpath = new DOMXPath($this->document); | ||
| } | ||
|
|
||
| return $this->xpath; | ||
| } | ||
|
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,21 +45,22 @@ function ppcmnt_ppgetstat_stats_job($user_node) { | |
| * Build urls for scanning core commits. | ||
| */ | ||
| function _ppcmnt_get_comments_tracking_urls($data) { | ||
| $last_scan = $data['last_scan']; | ||
| $cmt_zero_page = _ppgetstat_fetch_page('https://www.drupal.org/api-d7/comment.json?author=' . urlencode($data['doid'])); | ||
| $user_data = json_decode($cmt_zero_page); | ||
| $count = parse_url($user_data->last); | ||
| $urls = array(); | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Prevent changing the type of return value. |
||
|
|
||
| parse_str($count['query'], $query); | ||
|
|
||
| if (empty($user_data->list)) { | ||
| return NULL; | ||
| return $urls; | ||
| } | ||
| if ($query['page'] == 0) { | ||
| return array($user_data->self); | ||
| } | ||
| // In worst scenario we expect one page of posts per four hours. | ||
| // See webchick as example https://www.drupal.org/user/24967/track | ||
| $days_number = round((REQUEST_TIME - $last_scan) / (4 * 60 * 60)); | ||
| $urls = array(); | ||
| // $days_number = round((REQUEST_TIME - $last_scan) / (4 * 60 * 60)); | ||
| for ($i = 0; $i <= (int)$query['page']; $i++) { | ||
| $urls[] = "https://www.drupal.org/api-d7/comment.json?name=" . urlencode($data['doid']) . "&page=" . $i; | ||
| } | ||
|
|
@@ -143,27 +144,24 @@ function _ppcmnt_parse_posts_list($page_content, $data) { | |
| * Parse post page for comments. | ||
| */ | ||
| function _ppcmnt_parse_post_page($page_content, $data) { | ||
|
|
||
| $user_data = json_decode($page_content); | ||
| $count = count($user_data->list); | ||
| $end = REQUEST_TIME - PPGETSTAT_DEFAULT_TIME_PERIOD_FOR_SCANNING; | ||
|
|
||
| $comments_counter_array = []; | ||
| $list = &$user_data->list; | ||
| $result = []; | ||
|
|
||
| $end = REQUEST_TIME - variable_get('ppgetstat_stats_period'); | ||
| for ($i = ($count - 1); $i >= 0; $i--) { | ||
| if ($list[$i]->created < $end) { | ||
| foreach ($user_data->list as $i => $item) { | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Simplification.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Desc loop is done for scanning last users first. Is this construction do the same? |
||
| if ($item->created < $end) { | ||
| continue; | ||
| } | ||
| $period_timestamp = $list[$i]->created - ($list[$i]->created % PPGETSTAT_TIME_PERIOD_GRANULARITY); | ||
| $comments_counter_array[$period_timestamp][] = $list[$i]->cid; | ||
|
|
||
| $result[$item->created - ($item->created % PPGETSTAT_TIME_WEEK_GRANULARITY)][] = $item->cid; | ||
| } | ||
|
|
||
| if (count($comments_counter_array) != 0) { | ||
| $comments_counter_array['#type'] = PPGETSTAT_TYPE_COMMENTS; | ||
| if (count($result) > 0) { | ||
| $result['#type'] = PPGETSTAT_TYPE_COMMENTS; | ||
| } | ||
|
|
||
| return $comments_counter_array; | ||
| return $result; | ||
| } | ||
|
|
||
| /** | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Prevented division by zero (when no users in DB).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thx.