g2-8.x-1.x-dev/tests/src/Kernel/AhoCorasickTest.php
tests/src/Kernel/AhoCorasickTest.php
<?php
declare(strict_types=1);
namespace Drupal\Tests\g2\Kernel;
use AhoCorasick\MultiStringMatcher;
use Drupal\Core\KeyValueStore\KeyValueMemoryFactory;
use Drupal\g2\G2;
use Drupal\g2\Matcher;
use Drupal\KernelTests\KernelTestBase;
use Drupal\TestTools\Random;
use Psr\Log\LoggerInterface;
/**
* Class G2UnitTest provides unit test for G2 methods.
*
* Its purpose is to benchmark MSM rebuild vs the time it takes to retrieve
* it from KV storage. Retrieving is faster in all cases.
*
* See docs/rebuilding-vs-retrieving.png for details.
*
* @link https://docs.google.com/spreadsheets/d/1L7qUc7fZQjkJpj83MuR5uPN9C8nNDOK3Zj8h6sqXFcg
*
* @group G2
*/
class AhoCorasickTest extends KernelTestBase {
const MAX_LEN = 255;
const MODULES = [
// Needed for routing.
'system',
// Service node_preview (proxied) needs user.private_tempstore.
'user',
// Needed by text.module.
'field',
'filter',
// Needed by node module.
'text',
// Needed by g2.module.
'node',
'path_alias',
'taxonomy',
'views',
'g2',
];
/**
* The modules to enable for the test.
*
* @var string[]
*/
protected static $modules = self::MODULES;
/**
* The number of nodes to generate, as a power of two (i.e. 5 => 32).
*/
const MAX_SCALE = 12;
/**
* The core keyvalue service.
*
* @var \Drupal\Core\KeyValueStore\KeyValueMemoryFactory
*/
protected KeyValueMemoryFactory $kv;
/**
* The G2 logger.
*
* @var \Psr\Log\LoggerInterface
*/
protected LoggerInterface $logger;
/**
* {@inheritdoc}
*
* @throws \Exception
*/
protected function setUp(): void {
parent::setUp();
$this->installEntitySchema('path_alias');
/** @var \Drupal\Core\KeyValueStore\KeyValueMemoryFactory $kv */
$kv = $this->container->get('keyvalue');
assert($kv instanceof KeyValueMemoryFactory);
$this->kv = $kv;
/** @var \Psr\Log\LoggerInterface $logger */
$logger = $this->container->get(G2::SVC_LOGGER);
assert($logger instanceof LoggerInterface);
$this->logger = $logger;
}
/**
* Data provider for both tests.
*
* Note that the number of entries actually in the MSM may be less than the
* number of generated entries due to random string generation collisions.
*
* @return array<string,array{string[]}>
* A map of titles by number of entries generated.
*/
public static function providerTitles(): array {
$rows = [];
// RandomGeneratorTrait::getRandomGenerator is not static.
$rand = Random::getGenerator();
// Note we use "<"=, not "<" because this is a scale.
for ($scale = 0; $scale <= self::MAX_SCALE; $scale++) {
$n = pow(2, $scale);
$row = [];
for ($i = 0; $i < $n; $i++) {
$len = mt_rand(0, self::MAX_LEN);
$title = $rand->word($len);
$row[] = $title;
}
$rows["$n words"] = [$row];
}
return $rows;
}
/**
* Benchmark rebuilding the MSM on each case.
*
* @param string[] $titles
* The titles to build the MSM from.
*
* @dataProvider providerTitles
*/
public function testBenchmarkKeyValue(array $titles): void {
$t0 = microtime(TRUE);
$msm = new MultiStringMatcher($titles);
$t1 = microtime(TRUE);
$microseconds = ($t1 - $t0) * 1E6;
$n = count($msm->getKeywords());
$this->logger->info(
":microseconds µsec to build a MSM for :count words: :microsecondsPerWord µsec/word", [
':microseconds' => $microseconds,
':count' => $n,
':microsecondsPerWord' => sprintf("%.3f", $microseconds / $n),
]
);
$this->assertTrue(TRUE);
}
/**
* Benchmark retrieving the MSM from KV.
*
* @param string[] $titles
* The titles to build the MSM from.
*
* @dataProvider providerTitles
*/
public function testBenchmarkRebuild(array $titles): void {
$ref = new MultiStringMatcher($titles);
$coll = $this->kv->get(Matcher::COLLECTION);
$coll->set(G2::KV_TITLES, $ref);
$t0 = microtime(TRUE);
/** @var \AhoCorasick\MultiStringMatcher $msm */
$msm = $coll->get(G2::KV_TITLES);
$t1 = microtime(TRUE);
$microseconds = ($t1 - $t0) * 1E6;
$n = count($msm->getKeywords());
$this->logger->info(
":microseconds µsec to retrieve a MSM for :count words: :microsecondsPerWord µsec/word", [
':microseconds' => $microseconds,
':count' => $n,
':microsecondsPerWord' => sprintf("%.3f", $microseconds / $n),
]
);
$this->assertTrue(TRUE);
}
}
