Replaced my homebrew keyword generator
It now uses the one found here: https://github.com/Donatello-za/rake-php-plus This is much better than the one I had hacked together. Makes AntCMS a bit bigger.. but not by too much. I may end up removing the keyword generator outright, but for now I'm going to keep it.
This commit is contained in:
parent
91395db9c4
commit
3353be4920
3 changed files with 66 additions and 38 deletions
|
@ -14,7 +14,8 @@
|
|||
"league/commonmark": "^2.3",
|
||||
"elgigi/commonmark-emoji": "^2.0",
|
||||
"twig/twig": "^3.5",
|
||||
"shapecode/twig-string-loader": "^1.1"
|
||||
"shapecode/twig-string-loader": "^1.1",
|
||||
"donatello-za/rake-php-plus": "^1.0"
|
||||
},
|
||||
"authors": [
|
||||
{
|
||||
|
|
62
composer.lock
generated
62
composer.lock
generated
|
@ -4,7 +4,7 @@
|
|||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
||||
"This file is @generated automatically"
|
||||
],
|
||||
"content-hash": "cc1e950196a545bb666399f2d1eab986",
|
||||
"content-hash": "4c1c73dcfd1b9e69aa3b18324c390ac3",
|
||||
"packages": [
|
||||
{
|
||||
"name": "dflydev/dot-access-data",
|
||||
|
@ -81,6 +81,66 @@
|
|||
},
|
||||
"time": "2022-10-27T11:44:00+00:00"
|
||||
},
|
||||
{
|
||||
"name": "donatello-za/rake-php-plus",
|
||||
"version": "v1.0.18",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/Donatello-za/rake-php-plus.git",
|
||||
"reference": "e9e9c0862b3dc953d288e8f42c76e4ceaeca0619"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/Donatello-za/rake-php-plus/zipball/e9e9c0862b3dc953d288e8f42c76e4ceaeca0619",
|
||||
"reference": "e9e9c0862b3dc953d288e8f42c76e4ceaeca0619",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"ext-json": "*",
|
||||
"ext-mbstring": "*",
|
||||
"php": ">=5.4.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"php": ">=5.5.0",
|
||||
"phpunit/phpunit": "~4.0|~5.0"
|
||||
},
|
||||
"type": "library",
|
||||
"extra": {
|
||||
"branch-alias": {
|
||||
"dev-master": "1.0.13-dev"
|
||||
}
|
||||
},
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"DonatelloZa\\RakePlus\\": "src/"
|
||||
}
|
||||
},
|
||||
"notification-url": "https://packagist.org/downloads/",
|
||||
"license": [
|
||||
"MIT"
|
||||
],
|
||||
"authors": [
|
||||
{
|
||||
"name": "Don Schoeman",
|
||||
"email": "ta.maximus@gmail.com"
|
||||
}
|
||||
],
|
||||
"description": "Yet another PHP implementation of the Rapid Automatic Keyword Extraction algorithm (RAKE).",
|
||||
"homepage": "https://github.com/Donatello-za/rake-php-plus",
|
||||
"keywords": [
|
||||
"Algorithm",
|
||||
"automatic",
|
||||
"extraction",
|
||||
"keyword",
|
||||
"rake",
|
||||
"rapid"
|
||||
],
|
||||
"support": {
|
||||
"issues": "https://github.com/Donatello-za/rake-php-plus/issues",
|
||||
"source": "https://github.com/Donatello-za/rake-php-plus"
|
||||
},
|
||||
"time": "2022-02-23T18:42:03+00:00"
|
||||
},
|
||||
{
|
||||
"name": "elgigi/commonmark-emoji",
|
||||
"version": "2.0.0",
|
||||
|
|
|
@ -4,6 +4,7 @@ namespace AntCMS;
|
|||
|
||||
use AntCMS\AntCache;
|
||||
use AntCMS\AntConfig;
|
||||
use DonatelloZa\RakePlus\RakePlus;
|
||||
|
||||
class AntKeywords
|
||||
{
|
||||
|
@ -29,42 +30,8 @@ class AntKeywords
|
|||
}
|
||||
}
|
||||
|
||||
// A bunch of characters we don't want to use for keyword generation
|
||||
$stopWords = array(' a ', ' an ', ' and ', ' are ', ' as ', ' at ', ' be ', ' by ', ' for ', ' from ', ' has ', ' have ', ' in ', ' is ', ' it ', ' its ', ' of ', ' on ', ' that ', ' the ', ' to ', ' was ', ' were ', ' will ', ' with ');
|
||||
$symbols = array('$', '€', '£', '¥', 'CHF', '₹', '+', '-', '×', '÷', '=', '>', '<', '.', ',', ';', ':', '!', '?', '"', '\'', '(', ')', '[', ']', '{', '}', '©', '™', '°', '§', '¶', '•', '_', '/');
|
||||
$markdownSymbols = array('#', '##', '###', '####', '#####', '~~', '__', '**', '`', '``', '```', '*', '+', '>', '[', ']', '(', ')', '!', '&', '|');
|
||||
$numbers = array('0', '1', '2', '3', '4', '5', '6', '7', '8', '9');
|
||||
$commonPronouns = array('he', 'him', 'his', 'she', 'her', 'hers', 'they', 'them', 'theirs');
|
||||
|
||||
//Strip the aforementioned characters away
|
||||
$content = strtolower($content);
|
||||
$content = str_replace($stopWords, ' ', $content);
|
||||
$content = str_replace($symbols, ' ', $content);
|
||||
$content = str_replace($markdownSymbols, ' ', $content);
|
||||
$content = str_replace($numbers, ' ', $content);
|
||||
$content = str_replace($commonPronouns, ' ', $content);
|
||||
|
||||
//Convert to an arrays
|
||||
$words = explode(' ', $content);
|
||||
|
||||
// Remove newlines
|
||||
$words = array_map(function ($key) {
|
||||
return preg_replace('~[\r\n]+~', ' ', $key);
|
||||
}, $words);
|
||||
|
||||
// Handle potentially empty keys
|
||||
$words = array_filter($words);
|
||||
|
||||
// Then finally we count and sort the keywords, returning the top ones
|
||||
$word_counts = array_count_values($words);
|
||||
|
||||
arsort($word_counts);
|
||||
|
||||
$count = (count($word_counts) < $count) ? count($word_counts) : $count;
|
||||
$keywords = array_slice(array_keys($word_counts), 0, $count);
|
||||
$keywords = implode(', ', $keywords);
|
||||
$keywords = mb_substr($keywords, 3);
|
||||
|
||||
$keywords = RakePlus::create($content, 'en_US', $count)->keywords();
|
||||
$keywords = implode(",", $keywords);
|
||||
$cache->setCache($cacheKey, $keywords);
|
||||
return $keywords;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue