Browse Source

Replaced my homebrew keyword generator

It now uses the one found here: https://github.com/Donatello-za/rake-php-plus
This is much better than the one I had hacked together.

Makes AntCMS a bit bigger.. but not by too much. I may end up removing the keyword generator outright, but for now I'm going to keep it.
Belle Aerni 2 years ago
parent
commit
3353be4920
3 changed files with 66 additions and 38 deletions
  1. 2 1
      composer.json
  2. 61 1
      composer.lock
  3. 3 36
      src/AntCMS/AntKeywords.php

+ 2 - 1
composer.json

@@ -14,7 +14,8 @@
         "league/commonmark": "^2.3",
         "elgigi/commonmark-emoji": "^2.0",
         "twig/twig": "^3.5",
-        "shapecode/twig-string-loader": "^1.1"
+        "shapecode/twig-string-loader": "^1.1",
+        "donatello-za/rake-php-plus": "^1.0"
     },
     "authors": [
         {

+ 61 - 1
composer.lock

@@ -4,7 +4,7 @@
         "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
         "This file is @generated automatically"
     ],
-    "content-hash": "cc1e950196a545bb666399f2d1eab986",
+    "content-hash": "4c1c73dcfd1b9e69aa3b18324c390ac3",
     "packages": [
         {
             "name": "dflydev/dot-access-data",
@@ -81,6 +81,66 @@
             },
             "time": "2022-10-27T11:44:00+00:00"
         },
+        {
+            "name": "donatello-za/rake-php-plus",
+            "version": "v1.0.18",
+            "source": {
+                "type": "git",
+                "url": "https://github.com/Donatello-za/rake-php-plus.git",
+                "reference": "e9e9c0862b3dc953d288e8f42c76e4ceaeca0619"
+            },
+            "dist": {
+                "type": "zip",
+                "url": "https://api.github.com/repos/Donatello-za/rake-php-plus/zipball/e9e9c0862b3dc953d288e8f42c76e4ceaeca0619",
+                "reference": "e9e9c0862b3dc953d288e8f42c76e4ceaeca0619",
+                "shasum": ""
+            },
+            "require": {
+                "ext-json": "*",
+                "ext-mbstring": "*",
+                "php": ">=5.4.0"
+            },
+            "require-dev": {
+                "php": ">=5.5.0",
+                "phpunit/phpunit": "~4.0|~5.0"
+            },
+            "type": "library",
+            "extra": {
+                "branch-alias": {
+                    "dev-master": "1.0.13-dev"
+                }
+            },
+            "autoload": {
+                "psr-4": {
+                    "DonatelloZa\\RakePlus\\": "src/"
+                }
+            },
+            "notification-url": "https://packagist.org/downloads/",
+            "license": [
+                "MIT"
+            ],
+            "authors": [
+                {
+                    "name": "Don Schoeman",
+                    "email": "ta.maximus@gmail.com"
+                }
+            ],
+            "description": "Yet another PHP implementation of the Rapid Automatic Keyword Extraction algorithm (RAKE).",
+            "homepage": "https://github.com/Donatello-za/rake-php-plus",
+            "keywords": [
+                "Algorithm",
+                "automatic",
+                "extraction",
+                "keyword",
+                "rake",
+                "rapid"
+            ],
+            "support": {
+                "issues": "https://github.com/Donatello-za/rake-php-plus/issues",
+                "source": "https://github.com/Donatello-za/rake-php-plus"
+            },
+            "time": "2022-02-23T18:42:03+00:00"
+        },
         {
             "name": "elgigi/commonmark-emoji",
             "version": "2.0.0",

+ 3 - 36
src/AntCMS/AntKeywords.php

@@ -4,6 +4,7 @@ namespace AntCMS;
 
 use AntCMS\AntCache;
 use AntCMS\AntConfig;
+use DonatelloZa\RakePlus\RakePlus;
 
 class AntKeywords
 {
@@ -29,42 +30,8 @@ class AntKeywords
             }
         }
 
-        // A bunch of characters we don't want to use for keyword generation
-        $stopWords = array(' a ', ' an ', ' and ', ' are ', ' as ', ' at ', ' be ', ' by ', ' for ', ' from ', ' has ', ' have ', ' in ', ' is ', ' it ', ' its ', ' of ', ' on ', ' that ', ' the ', ' to ', ' was ', ' were ', ' will ', ' with ');
-        $symbols = array('$', '€', '£', '¥', 'CHF', '₹', '+', '-', '×', '÷', '=', '>', '<', '.', ',', ';', ':', '!', '?', '"', '\'', '(', ')', '[', ']', '{', '}', '©', '™', '°', '§', '¶', '•', '_', '/');
-        $markdownSymbols = array('#', '##', '###', '####', '#####', '~~', '__', '**', '`', '``', '```', '*', '+', '>', '[', ']', '(', ')', '!', '&', '|');
-        $numbers = array('0', '1', '2', '3', '4', '5', '6', '7', '8', '9');
-        $commonPronouns = array('he', 'him', 'his', 'she', 'her', 'hers', 'they', 'them', 'theirs');
-
-        //Strip the aforementioned characters away
-        $content = strtolower($content);
-        $content = str_replace($stopWords, ' ', $content);
-        $content = str_replace($symbols, ' ', $content);
-        $content = str_replace($markdownSymbols, ' ', $content);
-        $content = str_replace($numbers, ' ', $content);
-        $content = str_replace($commonPronouns, ' ', $content);
-
-        //Convert to an arrays
-        $words = explode(' ', $content);
-
-        // Remove newlines
-        $words = array_map(function ($key) {
-            return preg_replace('~[\r\n]+~', ' ', $key);
-        }, $words);
-
-        // Handle potentially empty keys
-        $words = array_filter($words);
-
-        // Then finally we count and sort the keywords, returning the top ones
-        $word_counts = array_count_values($words);
-
-        arsort($word_counts);
-
-        $count = (count($word_counts) < $count) ? count($word_counts) : $count;
-        $keywords = array_slice(array_keys($word_counts), 0, $count);
-        $keywords = implode(', ', $keywords);
-        $keywords = mb_substr($keywords, 3);
-
+        $keywords = RakePlus::create($content, 'en_US', $count)->keywords();
+        $keywords = implode(",", $keywords);    
         $cache->setCache($cacheKey, $keywords);
         return $keywords;
     }