From 55bea68b3ca9b21d9052f6ec5046e4e3148fff0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean=20K=C3=A1ssio?= Date: Fri, 28 Apr 2023 19:07:31 -0300 Subject: [PATCH 1/3] Update README.md --- README.md | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 123 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9b0ab9b..183db40 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,123 @@ -# Sioner-Metadata-Extractor -Sioner Metadata Extractor uses Chromedriver to extract metadata from websites with javascript, even if it is written in PHP +# Sioner Metadata Extractor +Sioner Metadata Extractor uses Chromedriver to extract metadata from websites with javascript using Symfony/Panther. + +### Installing Sioner + +Use [Composer](https://getcomposer.org/) to install Sioner in your project: + + ```sh +composer require jeankassio/sioner-metadata-extractor +```` + +### Installing ChromeDriver + +Sioner uses the WebDriver protocol usef by Panther to crawl sites. + +On all systems, you can use [`dbrekelmans/browser-driver-installer`](https://github.com/dbrekelmans/browser-driver-installer) +to install ChromeDriver locally: + + composer require --dev dbrekelmans/bdi + vendor/bin/bdi detect drivers + + +# Usage + +### Here is some available metadata that was returned by Sioner Metadata Extractor: + + +```json +{ + "domain":"github.com", + "canonical":"https:\/\/github.com\/jeankassio\/Sioner-Metadata-Extractor", + "title":"GitHub - jeankassio\/Sioner-Metadata-Extractor: Sioner Metadata Extractor uses Chromedriver to extract metadata from websites with javascript, even if it is written in PHP", + "image":"https:\/\/opengraph.githubassets.com\/b22dbba9d6ae7f1bf3f540334ce5b7c01e728daa06739db48430ca0804af9ab0\/jeankassio\/Sioner-Metadata-Extractor", + "description":"Sioner Metadata Extractor uses Chromedriver to extract metadata from websites with javascript, even if it is written in PHP - GitHub - jeankassio\/Sioner-Metadata-Extractor: Sioner Metadata Extracto...", + "icon":"https:\/\/github.com\/favicon.ico" +} +``` + +```json +{ + "domain":"techland.time.com", + "canonical":"https:\/\/techland.time.com\/2011\/04\/06\/linux-exec-competing-against-microsoft-is-like-kicking-a-puppy\/", + "title":"Linux Exec: Competing Against Microsoft Is Like “Kicking a Puppy” | TIME.com", + "image":"https:\/\/techland.time.com\/wp-content\/themes\/time2012\/library\/assets\/images\/time-logo-og.png", + "description":"Depending who you ask, you'll get a different answer about who's winning the operating system wars. Of course, the Linux people think they've won, but here's the thing--they may be right.", + "keywords":"business, news, linux, open-source, windows", + "icon":"https:\/\/techland.time.com\/favicon.ico" +} +``` + +```json +{ + "domain": "domain string", + "canonical": "og:canonical link string", + "title": "og:title/title website string", + "image": "og:image/first image string", + "description": "og:description/description string", + "keywords": "keywords string", + "icon": "apple-touch-icon/icon string", + "author": "og:author/author string", + "copyright": "copyright string" +} +``` + +## How it works? + +Sioner Metadata Extractor can, before running completely in the determined amount of seconds (explained later), quickly run the search for the determined data and obtain this data without the need to use javascript, thus saving its execution time. +To do this, just pass the data you want to obtain as mandatory in the first verification as parameter #4 + +```php +use JeanKassio\Sioner\MetadataExtractor + +$YourLink = "https://github.com/jeankassio/Sioner-Metadata-Extractor"; + +$code = new MetadataExtractor($YourLink, null, null, ['website', 'title', 'image', 'description']); + +$response = $code->ExtractMetadata(); + +echo json_encode($response, JSON_UNESCAPED_UNICODE); + +``` + +But if that doesn't happen, it will run the Browser with javascript to get the data. The time for which it will run by default is 3 seconds, but you can change this value by setting parameter #2 + +```php +use JeanKassio\Sioner\MetadataExtractor + +$YourLink = "https://github.com/jeankassio/Sioner-Metadata-Extractor"; + +$code = new MetadataExtractor($YourLink, 2.5); //2.5 seconds + +$response = $code->ExtractMetadata(); + +echo json_encode($response, JSON_UNESCAPED_UNICODE); +``` + +We get og:image by default of 200x200, and if it's not found, the next image larger than that dimension will be returned. If none match, the largest is returned. But you can also set these values. + +```php +use JeanKassio\Sioner\MetadataExtractor + +$YourLink = "https://github.com/jeankassio/Sioner-Metadata-Extractor"; + +$code = new MetadataExtractor($YourLink, null, [250,300]); //250 width, 300 height + +$response = $code->ExtractMetadata(); + +echo json_encode($response, JSON_UNESCAPED_UNICODE); +``` + +And that way you can pass the parameters you want and build the way you want. Watch + +```php +use JeanKassio\Sioner\MetadataExtractor + +$YourLink = "https://github.com/jeankassio/Sioner-Metadata-Extractor"; + +$code = new MetadataExtractor($YourLink, 5, [500,100], ['website', 'title', 'image', 'description']); + +$response = $code->ExtractMetadata(); + +echo json_encode($response, JSON_UNESCAPED_UNICODE); +``` From 7e913a105cb82b6e850de43571316d4e23f2248b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean=20K=C3=A1ssio?= Date: Fri, 28 Apr 2023 19:08:12 -0300 Subject: [PATCH 2/3] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 183db40..070d64c 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ composer require jeankassio/sioner-metadata-extractor ### Installing ChromeDriver -Sioner uses the WebDriver protocol usef by Panther to crawl sites. +Sioner uses the WebDriver protocol used by Panther to crawl sites. On all systems, you can use [`dbrekelmans/browser-driver-installer`](https://github.com/dbrekelmans/browser-driver-installer) to install ChromeDriver locally: From 4f99c3adaff8a9c4ba0e585c6c534b2dcb31e0b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean=20K=C3=A1ssio?= Date: Fri, 28 Apr 2023 22:14:10 -0300 Subject: [PATCH 3/3] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 070d64c..533abb0 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,9 @@ # Sioner Metadata Extractor Sioner Metadata Extractor uses Chromedriver to extract metadata from websites with javascript using Symfony/Panther. +[![Total Downloads](https://poser.pugx.org/jeankassio/Sioner-Metadata-Extractor/downloads)](https://packagist.org/packages/jeankassio/sioner-metadata-extractor) +[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) + ### Installing Sioner Use [Composer](https://getcomposer.org/) to install Sioner in your project: