g2-8.x-1.x-dev/tests/src/Kernel/AhoCorasickTest.php

tests/src/Kernel/AhoCorasickTest.php
<?php

declare(strict_types=1);

namespace Drupal\Tests\g2\Kernel;

use AhoCorasick\MultiStringMatcher;
use Drupal\Core\KeyValueStore\KeyValueMemoryFactory;
use Drupal\g2\G2;
use Drupal\g2\Matcher;
use Drupal\KernelTests\KernelTestBase;
use Drupal\TestTools\Random;
use Psr\Log\LoggerInterface;

/**
 * Class G2UnitTest provides unit test for G2 methods.
 *
 * Its purpose is to benchmark MSM rebuild vs the time it takes to retrieve
 * it from KV storage. Retrieving is faster in all cases.
 *
 * See docs/rebuilding-vs-retrieving.png for details.
 *
 * @link https://docs.google.com/spreadsheets/d/1L7qUc7fZQjkJpj83MuR5uPN9C8nNDOK3Zj8h6sqXFcg
 *
 * @group G2
 */
class AhoCorasickTest extends KernelTestBase {

  const MAX_LEN = 255;

  const MODULES = [
    // Needed for routing.
    'system',
    // Service node_preview (proxied) needs user.private_tempstore.
    'user',
    // Needed by text.module.
    'field',
    'filter',
    // Needed by node module.
    'text',
    // Needed by g2.module.
    'node',
    'path_alias',
    'taxonomy',
    'views',
    'g2',
  ];

  /**
   * The modules to enable for the test.
   *
   * @var string[]
   */
  protected static $modules = self::MODULES;

  /**
   * The number of nodes to generate, as a power of two (i.e. 5 => 32).
   */
  const MAX_SCALE = 12;

  /**
   * The core keyvalue service.
   *
   * @var \Drupal\Core\KeyValueStore\KeyValueMemoryFactory
   */
  protected KeyValueMemoryFactory $kv;

  /**
   * The G2 logger.
   *
   * @var \Psr\Log\LoggerInterface
   */
  protected LoggerInterface $logger;

  /**
   * {@inheritdoc}
   *
   * @throws \Exception
   */
  protected function setUp(): void {
    parent::setUp();
    $this->installEntitySchema('path_alias');

    /** @var \Drupal\Core\KeyValueStore\KeyValueMemoryFactory $kv */
    $kv = $this->container->get('keyvalue');
    assert($kv instanceof KeyValueMemoryFactory);
    $this->kv = $kv;

    /** @var \Psr\Log\LoggerInterface $logger */
    $logger = $this->container->get(G2::SVC_LOGGER);
    assert($logger instanceof LoggerInterface);
    $this->logger = $logger;
  }

  /**
   * Data provider for both tests.
   *
   * Note that the number of entries actually in the MSM may be less than the
   *  number of generated entries due to random string generation collisions.
   *
   * @return array<string,array{string[]}>
   *   A map of titles by number of entries generated.
   */
  public static function providerTitles(): array {
    $rows = [];
    // RandomGeneratorTrait::getRandomGenerator is not static.
    $rand = Random::getGenerator();
    // Note we use "<"=, not "<" because this is a scale.
    for ($scale = 0; $scale <= self::MAX_SCALE; $scale++) {
      $n = pow(2, $scale);
      $row = [];
      for ($i = 0; $i < $n; $i++) {
        $len = mt_rand(0, self::MAX_LEN);
        $title = $rand->word($len);
        $row[] = $title;
      }
      $rows["$n words"] = [$row];
    }
    return $rows;
  }

  /**
   * Benchmark rebuilding the MSM on each case.
   *
   * @param string[] $titles
   *   The titles to build the MSM from.
   *
   * @dataProvider providerTitles
   */
  public function testBenchmarkKeyValue(array $titles): void {
    $t0 = microtime(TRUE);
    $msm = new MultiStringMatcher($titles);
    $t1 = microtime(TRUE);
    $microseconds = ($t1 - $t0) * 1E6;
    $n = count($msm->getKeywords());
    $this->logger->info(
      ":microseconds µsec to build a MSM for :count words: :microsecondsPerWord µsec/word", [
        ':microseconds' => $microseconds,
        ':count' => $n,
        ':microsecondsPerWord' => sprintf("%.3f", $microseconds / $n),
      ]
    );
    $this->assertTrue(TRUE);
  }

  /**
   * Benchmark retrieving the MSM from KV.
   *
   * @param string[] $titles
   *   The titles to build the MSM from.
   *
   * @dataProvider providerTitles
   */
  public function testBenchmarkRebuild(array $titles): void {
    $ref = new MultiStringMatcher($titles);
    $coll = $this->kv->get(Matcher::COLLECTION);
    $coll->set(G2::KV_TITLES, $ref);
    $t0 = microtime(TRUE);
    /** @var \AhoCorasick\MultiStringMatcher $msm */
    $msm = $coll->get(G2::KV_TITLES);
    $t1 = microtime(TRUE);
    $microseconds = ($t1 - $t0) * 1E6;
    $n = count($msm->getKeywords());
    $this->logger->info(
      ":microseconds µsec to retrieve a MSM for :count words: :microsecondsPerWord µsec/word", [
        ':microseconds' => $microseconds,
        ':count' => $n,
        ':microsecondsPerWord' => sprintf("%.3f", $microseconds / $n),
      ]
    );
    $this->assertTrue(TRUE);
  }

}

Главная | Обратная связь

drupal hosting | друпал хостинг | it patrol .inc