From 3669d9c432a5b7a10bc7e8c0ad8091d8999e9aa8 Mon Sep 17 00:00:00 2001 From: j-h-s Date: Wed, 10 Jan 2018 21:10:01 +0100 Subject: [PATCH] [#71] streamlined twitter scraping a bit --- src/AppBundle/Service/TwitterService.php | 123 ++++++++++++++--------- 1 file changed, 73 insertions(+), 50 deletions(-) diff --git a/src/AppBundle/Service/TwitterService.php b/src/AppBundle/Service/TwitterService.php index 430c9fe..7d3442d 100644 --- a/src/AppBundle/Service/TwitterService.php +++ b/src/AppBundle/Service/TwitterService.php @@ -171,14 +171,16 @@ public function getTweets() { $twTime = \DateTime::createFromFormat('D M d H:i:s P Y', $item->created_at); // original string e.g. 'Mon Sep 08 15:19:11 +0000 2014' - $tweet = $this->getTweetDetails($item, $twTime); - - if ($tweet === 'txt') { + if (empty($item->entities->media)) { + $this->getTweetDetails($item, $twTime); $txtCount++; + } else { + $tweet = $this->getMedia($item, $twTime); $imgCount += $tweet['img']; $vidCount += $tweet['vid']; } + } $timeCheck = $twTime->getTimestamp(); // check time of last tweet scraped @@ -210,16 +212,7 @@ public function getTweets() { * @return string */ public function getTweetDetails($item, $twTime) { - if (!empty($item->full_text)) { - $twText = $item->full_text; - } else if (!empty($item->text)) { - $twText = $item->text; - } else $twText = null; - - if (!empty($item->entities->media)) { // if tweet contains media - $tweet = $this->getMediaDetails($item, $twTime, $twText); - return $tweet; - } + $twText = $this->getTwText($item); $allData = [ 'id' => $item->id, @@ -241,61 +234,32 @@ public function getTweetDetails($item, $twTime) { $item->favorite_count, $allData ); - - return 'txt'; } /** - * Retrieves the details of an image or video + * Determines the type of media that a tweet contains * @param object $item * @param object $twTime - * @param string $twText * @return array */ - public function getMediaDetails($item, $twTime, $twText) { - $media = $item->extended_entities->media; - + public function getMedia($item, $twTime) { $imgCount = 0; $vidCount = 0; - foreach ($media as $photo) { // if tweet contains multiple images + $media = $item->extended_entities->media; + + foreach ($media as $photo) { if ($photo->type == 'video') { $subType = SocialMedia::SUBTYPE_VIDEO; $vidCount++; - } else { // if type == 'photo' or 'animated_gif' + + } else { // if type == 'photo' or 'animted_gif' $subType = SocialMedia::SUBTYPE_IMAGE; $imgCount++; } - $imgSrc = $photo->media_url . ":small"; - $imgId = $photo->id; - $img = $this->container - ->get('ImageService') - ->saveImage('tw', $this->partyCode, $imgSrc, $imgId); - - $allData = [ - 'id' => $imgId, - 'posted' => $twTime->format('Y-m-d H:i:s'), // string - 'text' => $twText, - 'image' => $img, - 'img_source' => $imgSrc, - 'url' => 'https://twitter.com/statuses/' . $item->id, - 'likes' => $item->favorite_count, - 'retweets' => $item->retweet_count - ]; - - $this->db->addSocial( - $this->partyCode, - SocialMedia::TYPE_TWITTER, - $subType, - $item->id, - $twTime, // DateTime - $twText, - $img, - $item->favorite_count, - $allData - ); + $this->getMediaDetails($item, $twTime, $photo, $subType); } $count['img'] = $imgCount; @@ -303,4 +267,63 @@ public function getMediaDetails($item, $twTime, $twText) { return $count; } + + /** + * Retrieves the details of an image or video + * @param object $item + * @param object $twTime + * @param string $photo + * @return array + */ + public function getMediaDetails($item, $twTime, $photo, $subType) { + $twText = $this->getTwText($item); + + $imgSrc = $photo->media_url . ":small"; + $imgId = $photo->id; + $img = $this->container + ->get('ImageService') + ->saveImage('tw', $this->partyCode, $imgSrc, $imgId); + + $allData = [ + 'id' => $imgId, + 'posted' => $twTime->format('Y-m-d H:i:s'), // string + 'text' => $twText, + 'image' => $img, + 'img_source' => $imgSrc, + 'url' => 'https://twitter.com/statuses/' . $item->id, + 'likes' => $item->favorite_count, + 'retweets' => $item->retweet_count + ]; + + $this->db->addSocial( + $this->partyCode, + SocialMedia::TYPE_TWITTER, + $subType, + $item->id, + $twTime, // DateTime + $twText, + $img, + $item->favorite_count, + $allData + ); + } + + + /** + * Returns text field of a tweet + * @param object $item + * @return string + */ + public function getTwText($item) { + if (!empty($item->full_text)) { + return $item->full_text; + } + + if (!empty($item->text)) { + return $item->text; + } + + return null; + } + }