diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..fac4aa4 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,20 @@ +# This file is for unifying the coding style for different editors and IDEs +# editorconfig.org + +[*] +charset = utf-8 +indent_style = space +indent_size = 2 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +[*.php] +indent_size = 4 + +[*.md,*.txt] +trim_trailing_whitespace = false +insert_final_newline = false + +[composer.json] +indent_size = 4 \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d465b71 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +# OS files +.DS_Store + +# npm modules +/node_modules + +# Composer files +/vendor diff --git a/README.md b/README.md new file mode 100755 index 0000000..3a748a9 --- /dev/null +++ b/README.md @@ -0,0 +1,152 @@ +![GitHub release](https://img.shields.io/github/release/texnixe/kirby3-similar.svg?maxAge=1800) ![License](https://img.shields.io/github/license/mashape/apistatus.svg) ![Kirby 3 Pluginkit](https://img.shields.io/badge/Pluginkit-YES-cca000.svg) + +# Kirby Similar + +Kirby 3 Similar is a [Kirby CMS](https://getkirby.com) plugin that lets you find items related to the current item based on similarity between fields. For each given field, the plugin calculates the Jaccard Index and then weighs all indices based on the factor for each field. + +Example use case: +The current page has a tags field with three values (red, green, blue). You want to find all sibling pages with a minimum Jaccard Index of 0.3. + +## Commercial Usage + +This plugin is free but if you use it in a commercial project please consider + +- [making a donation](https://www.paypal.me/texnixe/10) or +- [buying a Kirby license using this affiliate link](https://a.paddle.com/v2/click/1129/38380?link=1170) + +## How is it different from the Kirby 3 Related plugin + +- It allows you to pass multiple fields as an array with a factor for each field, depending on the importance of this field for determining the similarity. +- The similarity is calculated according to the Jaccard Index, rather than by the number of matches as in the Kirby 3 Related plugin. + +A quick example that describes the difference: + +**Example 1:** + +Page A: blue, green +Page B: blue, green + +Matches: 2 +Jaccard Index: 2/2 = 1 + +**Example 2:** + +Page A: blue, green, yellow +Page B: blue, green + +Matches: 2 +Jaccard Index: 2/3 = 0.66666 + +While both pages have the same number of matches, the Jaccard Index is lower in the second example, because the number of unique tags is taken into account as well. + + +## Installation + +### Download + +[Download the files](https://github.com/texnixe/kirby3-related/archive/master.zip) and place them inside `site/plugins/kirby-similar`. + +### Git Submodule +You can add the plugin as a Git submodule. + + $ cd your/project/root + $ git submodule add https://github.com/texnixe/kirby3-similar.git site/plugins/kirby-similar + $ git submodule update --init --recursive + $ git commit -am "Add Kirby Similar plugin" + +Run these commands to update the plugin: + + $ cd your/project/root + $ git submodule foreach git checkout master + $ git submodule foreach git pull + $ git commit -am "Update submodules" + $ git submodule update --init --recursive + + +## Usage + +### Similar pages +``` +similar($options); + +foreach($similarPages as $p) { + echo $p->title(); +} + +``` + +### Similar files + +``` +similar($options); + +foreach($similarImages as $image) { + echo $image->filename(); +} + +``` + +### Options + +You can pass an array of options: + +``` +similar([ + 'index' => $page->siblings(false)->visible(), + 'fields' => 'tags', + 'threshold' => 0.2, + 'delimiter' => ',', + 'languageFilter' => false +]); +?> +``` +#### index + +The collection to search in. +Default: `$item->siblings()` + +#### fields + +The name of the field to search in. +Default: tags + +You can pass either a single field as string, or an array of fields with a factor that serves as multiplier: + +``` +fields => ['tags' => 1, 'size' => 1.5, 'category' => 3] +``` + +You can change the factor to get better results when filtering collections. For example, if the above is your standard field setting, you might want to change it when a filter paramter is set to size: + +``` +fields => ['tags' => 0.5, 'size' => 2, 'category' => 3] +``` + +**Note that the factor is required when using multiple fields, even if you want to set the factor to the same value for each field. You can't set a simple array of fields.** + +#### delimiter + +The delimiter that you use to separate values in a field +Default: , + +#### threshold + +The minimum Jaccard Index, i.e. a value between 0 (no similarity) and 1 (full similarity) +Default: 0.1 + +#### languageFilter + +Filter similar items by language in a multi-language installation. +Default: false + + +## License + +Kirby 3 Similar is open-sourced software licensed under the MIT license. + +Copyright © 2019 Sonja Broda info@texniq.de https://sonjabroda.com diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..27a4724 --- /dev/null +++ b/composer.json @@ -0,0 +1,21 @@ +{ + "name": "texnixe/similar", + "description": "Find similar pages or files based on similarities between fields", + "version": "0.9.0", + "type": "kirby-plugin", + "license": "MIT", + "authors": [ + { + "name": "Sonja Broda", + "email": "info@texniq.de" + } + ], + "keywords": [ + "kirby3", + "kirby3-cms", + "kirby3-plugin" + ], + "require": { + "getkirby/composer-installer": "^1.1" + } +} diff --git a/config/hooks.php b/config/hooks.php new file mode 100644 index 0000000..77a030c --- /dev/null +++ b/config/hooks.php @@ -0,0 +1,18 @@ + function() { + Similar::flush(); + }, + 'page.create:after' => function() { + Similar::flush(); + }, + 'file.create:after' => function() { + Similar::flush(); + }, + 'page.update:after' => function() { + Similar::flush(); + } +]; diff --git a/index.php b/index.php new file mode 100755 index 0000000..0e31f18 --- /dev/null +++ b/index.php @@ -0,0 +1,43 @@ + + * @copyright Sonja Broda + * @link https://github.com/texnixe/kirby3-similar + * @license MIT + */ + +load([ + 'texnixe\\similar\\similar' => 'src/Similar.php' +], __DIR__); + +\Kirby::plugin('texnixe/similar', [ + 'options' => [ + 'cache' => true, + 'expires' => (60*24*7), // minutes + 'defaults' => [ + 'fields' => 'tags', + 'threshold' => 0.1, + 'delimiter' => ',', + 'languageFilter' => false, + ] + ], + 'pageMethods' => [ + 'similar' => function (array $options = []) { + return Similar::getSimilar($this, $options); + } + ], + 'fileMethods' => [ + 'similar' => function (array $options = []) { + return Similar::getSimilar($this, $options); + } + ], + 'hooks' => require 'config/hooks.php' +]); + + + diff --git a/package.json b/package.json new file mode 100755 index 0000000..de62217 --- /dev/null +++ b/package.json @@ -0,0 +1,12 @@ +{ + "name": "kirby-similar", + "description": "Kirby 3 Similar plugin", + "author": "Sonja Broda ", + "license": "MIT", + "version": "0.9.0", + "repository": { + "type": "git", + "url": "https://github.com/texnixe/kirby3-similar" + }, + "type": "kirby-plugin" +} diff --git a/src/Similar.php b/src/Similar.php new file mode 100644 index 0000000..0aa42e9 --- /dev/null +++ b/src/Similar.php @@ -0,0 +1,138 @@ +cache('texnixe.similar'); + } + // create new index table on new version of plugin + if (!static::$indexname) { + static::$indexname = 'index'.str_replace('.', '', kirby()->plugin('texnixe/similar')->version()[0]); + } + return static::$cache; + } + + public static function flush() + { + return static::cache()->flush(); + } + + public static function data($basis, $options = []) + { + // new empty collection + $similar = static::getClassName($basis, []); + + $defaults = option('texnixe.similar.defaults'); + // add the default search collection to defaults + $defaults['index'] = $basis->siblings(false); + + // Merge default and user options + $options = array_merge($defaults, $options); + + // define variables + $index = $options['index']; + $fields = $options['fields']; + $threshold = $options['threshold']; + $delimiter = $options['delimiter']; + $languageFilter = $options['languageFilter']; + + $searchItems = []; + // get search items from active basis + if(is_array($fields)) { + //$searchField = null; + foreach($fields as $field => $factor) { + // only include fields that have values + $values = $basis->{$field}()->split(','); + if(count($values) > 0) { + $searchItems[$field][$field] = $values; + $searchItems[$field]['factor'] = $factor; + } + } + + } + if(is_string($fields)) { + $field = $fields; + $searchItems[$field][$field] = $basis->{$field}()->split($delimiter); + $searchItems[$field]['factor'] = 1; + } + + // stop and return an empty collection if the given field doesn't contain any values + if(empty($searchItems)) { + return $similar; + } + + // calculate Jaccard index for each item, filter by given JI threshold and sort + $similar = $index->map(function($item) use($searchItems, $delimiter) { + + $item->jaccardIndex = static::getSimilarityIndex($item, $searchItems, $delimiter); + return $item; + + })->filterBy('jaccardIndex', '>=', $threshold)->sortBy('jaccardIndex', 'desc'); + + + // filter collection by current language if $languageFilter set to true + if(kirby()->multilang() === true && $languageFilter === true) { + $similar = $similar->filter(function($item) { + return $item->translation(kirby()->language()->code())->exists(); + }); + } + + return $similar; + } + + public static function getSimilarityIndex($item, $searchItems, $delimiter) { + $indices = []; + foreach($searchItems as $field => $value) { + $comparisonArray = $item->{$field}()->split($delimiter); + $intersection = count(array_intersect($value[$field], $comparisonArray)); + $union = count(array_unique(array_merge($value[$field], $comparisonArray))); + $indices[] = number_format($intersection/$union * $value['factor'], 5); + } + return array_sum($indices)/count($indices); + + } + + public static function getClassName($basis, $items = '') + { + if(is_a($basis, 'Kirby\Cms\Page')) { + return pages($items); + } + if(is_a($basis, 'Kirby\Cms\File')) { + return new \Kirby\Cms\Files($items); + } + } + + public static function getSimilar($basis, $options = []) + { + $collection = $options['index']?? $basis->siblings(false); + if(option('texnixe.similar.cache') === true && $response = static::cache()->get(md5($basis->id().implode(',',$options)))) { + // try to get data from the cache, else generate new collection + $data = $response['data']; + $similar = static::getClassName($basis, array_keys($data)); + + } else { + if(option('texnixe.similar.cache') === false) { + static::cache()->flush(); + } + $similar = static::data($basis, $options); + static::cache()->set( + md5($basis->id() . implode(',', $options)), + $similar, + option('texnixe.similar.expires') + ); + + } + + return $similar; + } +}