Skip to content

Commit 43c9906

Browse files
committed
[#71] adds another level of abstraction to help catch duplicate entries
1 parent f5ce618 commit 43c9906

File tree

3 files changed

+166
-86
lines changed

3 files changed

+166
-86
lines changed

src/AppBundle/Service/DatabaseService.php

+43-6
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,6 @@ public function addStatistic($code, $type, $subType, $value) {
9898
}
9999

100100

101-
102-
103101
/**
104102
* Builds or updates a Metadata object
105103
* @param string $code
@@ -130,8 +128,43 @@ public function addMeta($code, $type, $value) {
130128
}
131129

132130

131+
/**
132+
* Processes social media posts before adding them to the database
133+
* @param array $posts
134+
* @return null
135+
*/
136+
public function processSocialMedia($posts) {
137+
$this->log->debug(" + Persisting to database");
138+
139+
$postCount = 0;
140+
141+
foreach ($posts as $post) {
142+
if (!isset($post['id'])) {
143+
continue;
144+
}
145+
146+
$this->addSocial(
147+
$post['code'],
148+
$post['type'],
149+
$post['subtype'],
150+
$post['id'],
151+
$post['time'],
152+
$post['text'],
153+
$post['img'],
154+
$post['likes'],
155+
$post['allData']
156+
);
157+
158+
$postCount++;
159+
}
160+
161+
$this->log->debug(" + " . $postCount . " items persisted");
162+
}
163+
164+
133165
/**
134166
* Builds or updates a SocialMedia object
167+
* @param string $code
135168
* @param string $type
136169
* @param string $subType
137170
* @param string $postId
@@ -145,7 +178,10 @@ public function addMeta($code, $type, $value) {
145178
public function addSocial($code, $type, $subType, $postId, $postTime, $postText, $postImage, $postLikes, $postData) {
146179
$p = $this->em
147180
->getRepository('AppBundle:SocialMedia')
148-
->findOneByPostId($postId);
181+
->findOneBy([
182+
'postId' => $postId,
183+
'postImage' => $postImage
184+
]);
149185

150186
if (!$p) {
151187
$p = new SocialMedia();
@@ -200,7 +236,7 @@ public function getTimeLimit($partyCode, $type, $subType, $scrapeFull = false) {
200236
if (!empty($p)) {
201237
$this->log->info(" + Database !empty, updating... ");
202238
$limit = $p->getPostTime()->getTimestamp();
203-
$this->log->debug(" + (Lastest databse entry: " . date('d/m/Y', $limit) . ")");
239+
$this->log->debug(" + (Latest entry: " . date('d/m/Y', $limit) . ")");
204240
return $limit;
205241
}
206242

@@ -254,7 +290,7 @@ public function getStatLimit($partyCode, $statType, $subType, $scrapeFull = fals
254290
return $limit;
255291
}
256292

257-
$this->log->debug(" + (Latest count: " . $stat->getValue() . " at " . $stat->getTimestamp()->format('H:i:s Y/m/d') . ")");
293+
$this->log->debug(" + (Latest count: " . $stat->getValue() . " at " . $stat->getTimestamp()->format('H:i:s d/m/Y') . ")");
258294
$limit['time'] = $stat->getTimestamp()->getTimestamp();
259295
$limit['value'] = $stat->getValue();
260296
return $limit;
@@ -284,4 +320,5 @@ public function getLaunchDate($site) {
284320

285321
return $date;
286322
}
287-
}
323+
324+
}

src/AppBundle/Service/FacebookService.php

+66-45
Original file line numberDiff line numberDiff line change
@@ -106,24 +106,26 @@ public function getPosts() {
106106

107107
$this->log->info(" + Getting post details...");
108108
$fdPosts = $graphNode->getField('posts');
109-
$timeLimit = $this->db->getTimeLimit($this->partyCode, 'fb', 'T', $this->scrapeFull);
110109

111110
$pageCount = 0;
112111
$txtCount = 0;
113112
$vidCount = 0;
114113
$loopCount = 0;
115114
$temp = [];
116115

116+
$timeLimit = $this->db->getTimeLimit($this->partyCode, 'fb', 'T', $this->scrapeFull);
117+
117118
do {
118119
$this->log->debug(" + Page " . $pageCount);
119120

120121
foreach ($fdPosts as $key => $post) {
121-
if (in_array($post->getField('id'), $temp, true)) {
122+
$id = $post->getField('id');
123+
124+
if (in_array($id, $temp, true)) {
122125
// if post was already scraped this session
123126
$loopCount++;
124127
continue;
125128
}
126-
$temp[] = $post->getField('id');
127129

128130
$type = $post->getField('type');
129131
// types = 'status', 'link', 'photo', 'video', 'event'
@@ -138,7 +140,7 @@ public function getPosts() {
138140
$txtCount++;
139141
}
140142

141-
$this->getPostDetails($post, $subType);
143+
$temp[$id] = $this->getPostDetails($post, $subType);
142144
}
143145

144146
$timeCheck = $post->getField('created_time')->getTimestamp(); // check time of last scraped post
@@ -151,6 +153,8 @@ public function getPosts() {
151153
$this->log->warning(" - Facebook post scraping for " . $this->partyCode . " looped " . $loopCount . " times");
152154
}
153155

156+
$this->db->processSocialMedia($temp);
157+
154158
$out['posts'] = $txtCount;
155159
$out['videos'] = $vidCount;
156160
$this->log->info(" + " . $txtCount . " text posts and " . $vidCount . " videos since " . date('d/m/Y', $timeCheck) . " processed");
@@ -194,19 +198,22 @@ public function getPostDetails($post, $subType) {
194198
'shares' => $shareCount
195199
];
196200

197-
$this->db->addSocial(
198-
$this->partyCode,
199-
SocialMedia::TYPE_FACEBOOK,
200-
$subType,
201-
$post->getField('id'),
202-
$post->getField('updated_time'), // DateTime
203-
$text,
204-
$img,
205-
$reactionCount,
206-
$allData
207-
);
201+
$out = [
202+
'code' => $this->partyCode,
203+
'type' => SocialMedia::TYPE_FACEBOOK,
204+
'subtype' => $subType,
205+
'id' => $post->getField('id'),
206+
'time' => $post->getField('updated_time'), // DateTime
207+
'text' => $text,
208+
'img' => $img,
209+
'likes' => $reactionCount,
210+
'allData' => $allData
211+
];
212+
213+
return $out;
208214
}
209215

216+
210217
/**
211218
* Processes images
212219
* @param string $requestFields
@@ -223,13 +230,14 @@ public function getImages() {
223230

224231
$this->log->info(" + Getting image details...");
225232
$fdAlbums = $graphNode->getField('albums');
226-
$timeLimit = $this->db->getTimeLimit($this->partyCode, 'fb', 'I', $this->scrapeFull);
227233

228234
$pageCount = 0;
229235
$imgCount = 0;
230236
$loopCount = 0;
231237
$temp = [];
232238

239+
$timeLimit = $this->db->getTimeLimit($this->partyCode, 'fb', 'I', $this->scrapeFull);
240+
233241
foreach ($fdAlbums as $key => $album) {
234242
$photoCount[] = $album->getField('photo_count');
235243
$fdPhotos = $album->getField('photos');
@@ -240,15 +248,17 @@ public function getImages() {
240248

241249
do {
242250
$this->log->debug(" + Page " . $pageCount);
251+
243252
foreach ($fdPhotos as $key => $photo) {
244-
if (in_array($photo->getField('picture'), $temp, true)) {
253+
$id = $photo->getField('picture');
254+
255+
if (in_array($id, $temp, true)) {
245256
// if image was already scraped this session
246257
$loopCount++;
247258
continue;
248259
}
249-
$temp[] = $photo->getField('picture');
250260

251-
$this->getImageDetails($photo, $album);
261+
$temp[$id] = $this->getImageDetails($photo, $album);
252262
$imgCount++;
253263
}
254264

@@ -263,6 +273,8 @@ public function getImages() {
263273
$this->log->warning(" - Facebook image scraping for " . $this->partyCode . " looped " . $loopCount . " times");
264274
}
265275

276+
$this->db->processSocialMedia($temp);
277+
266278
$out['imageCount'] = array_sum($photoCount);
267279
$out['images'] = $imgCount;
268280
$this->log->info(" + " . $out['imageCount'] . " images found, " . $imgCount . " since " . date('d/m/Y', $timeCheck) . " processed");
@@ -305,17 +317,19 @@ public function getImageDetails($photo, $album) {
305317
'shares' => $shareCount
306318
];
307319

308-
$this->db->addSocial(
309-
$this->partyCode,
310-
SocialMedia::TYPE_FACEBOOK,
311-
SocialMedia::SUBTYPE_IMAGE,
312-
$photo->getField('id'),
313-
$photo->getField('updated_time'), // DateTime
314-
$photo->getField('name'),
315-
$img,
316-
$reactionCount,
317-
$allData
318-
);
320+
$out = [
321+
'code' => $this->partyCode,
322+
'type' => SocialMedia::TYPE_FACEBOOK,
323+
'subtype' => SocialMedia::SUBTYPE_IMAGE,
324+
'id' => $photo->getField('id'),
325+
'time' => $photo->getField('updated_time'), // DateTime
326+
'text' => $photo->getField('name'),
327+
'img' => $img,
328+
'likes' => $reactionCount,
329+
'allData' => $allData
330+
];
331+
332+
return $out;
319333
}
320334

321335

@@ -335,24 +349,27 @@ public function getEvents() {
335349

336350
$this->log->info(" + Getting event details...");
337351
$fdEvents = $graphNode->getField('events');
338-
$timeLimit = $this->db->getTimeLimit($this->partyCode, 'fb', 'E', $this->scrapeFull);
339352

340353
$pageCount = 0;
341354
$eveCount = 0;
342355
$loopCount = 0;
343356
$temp = [];
344357

358+
$timeLimit = $this->db->getTimeLimit($this->partyCode, 'fb', 'E', $this->scrapeFull);
359+
345360
do { // process current page of results
346361
$this->log->debug(" + Page " . $pageCount);
362+
347363
foreach ($fdEvents as $key => $event) {
348-
if (in_array($event->getField('id'), $temp, true)) {
364+
$id = $event->getField('id');
365+
366+
if (in_array($id, $temp, true)) {
349367
// if event was already scraped this session
350368
$loopCount++;
351369
continue;
352370
}
353-
$temp[] = $event->getField('id');
354371

355-
$this->getEventDetails($event);
372+
$temp[$id] = $this->getEventDetails($event);
356373
$eveCount++;
357374
}
358375

@@ -366,6 +383,8 @@ public function getEvents() {
366383
$this->log->warning(" - Facebook event scraping for " . $this->partyCode . " looped " . $loopCount . " times");
367384
}
368385

386+
$this->db->processSocialMedia($temp);
387+
369388
$out['eventCount'] = $eveCount;
370389
$out['events'] = true;
371390
$this->log->info(" + " . $out['eventCount'] . " events found and processed");
@@ -421,17 +440,19 @@ public function getEventDetails($event) {
421440
'comments' => $commentCount
422441
];
423442

424-
$this->db->addSocial(
425-
$this->partyCode,
426-
SocialMedia::TYPE_FACEBOOK,
427-
SocialMedia::SUBTYPE_EVENT,
428-
$event->getField('id'),
429-
$event->getField('updated_time'), // DateTime
430-
$event->getField('name'),
431-
$img,
432-
$event->getField('interested_count'),
433-
$allData
434-
);
443+
$out = [
444+
'code' => $this->partyCode,
445+
'type' => SocialMedia::TYPE_FACEBOOK,
446+
'subtype' => SocialMedia::SUBTYPE_EVENT,
447+
'id' => $event->getField('id'),
448+
'time' => $event->getField('updated_time'), // DateTime
449+
'text' => $event->getField('name'),
450+
'img' => $img,
451+
'likes' => $event->getField('interested_count'),
452+
'allData' => $allData
453+
];
454+
455+
return $out;
435456
}
436457

437458
}

0 commit comments

Comments
 (0)