search_api-8.x-1.15/src/Plugin/search_api/processor/Stemmer.php

src/Plugin/search_api/processor/Stemmer.php
<?php

namespace Drupal\search_api\Plugin\search_api\processor;

use Drupal\Core\Form\FormStateInterface;
use Drupal\search_api\Plugin\search_api\processor\Resources\Porter2;
use Drupal\search_api\Processor\FieldsProcessorPluginBase;
use Drupal\search_api\Query\QueryInterface;

/**
 * Stems search terms.
 *
 * @SearchApiProcessor(
 *   id = "stemmer",
 *   label = @Translation("Stemmer"),
 *   description = @Translation("Stems search terms (for example, <em>talking</em> to <em>talk</em>). Currently, this only acts on English language content. It uses the Porter 2 stemmer algorithm (<a href=""https://wikipedia.org/wiki/Stemming"">More information</a>). For best results, use after tokenizing."),
 *   stages = {
 *     "pre_index_save" = 0,
 *     "preprocess_index" = 0,
 *     "preprocess_query" = 0,
 *   }
 * )
 */
class Stemmer extends FieldsProcessorPluginBase {

  /**
   * Static cache for already-generated stems.
   *
   * @var string[]
   */
  protected $stems = [];

  /**
   * {@inheritdoc}
   */
  public function defaultConfiguration() {
    $configuration = parent::defaultConfiguration();

    $configuration += [
      'exceptions' => [
        'texan' => 'texa',
        'mexican' => 'mexic',
      ],
    ];

    return $configuration;
  }

  /**
   * {@inheritdoc}
   */
  public function buildConfigurationForm(array $form, FormStateInterface $form_state) {
    $form = parent::buildConfigurationForm($form, $form_state);

    $description = $this->t('If the <a href="http://snowball.tartarus.org/algorithms/english/stemmer.html">algorithm</a> does not stem words in your dataset in the desired way, you can enter specific exceptions in the form of WORD=STEM, where "WORD" is the original word in the text and "STEM" is the resulting stem. List each exception on a separate line.');

    // Convert the keyed array into a config format (word=stem)
    $default_value = http_build_query($this->configuration['exceptions'], NULL, "\n");

    $form['exceptions'] = [
      '#type' => 'textarea',
      '#title' => $this->t('Exceptions'),
      '#description' => $description,
      '#default_value' => $default_value,
    ];

    return $form;
  }

  /**
   * {@inheritdoc}
   */
  public function validateConfigurationForm(array &$form, FormStateInterface $form_state) {
    parent::validateConfigurationForm($form, $form_state);

    $exceptions = $form_state->getValue('exceptions');
    if (($parsed = parse_ini_string($exceptions)) === FALSE) {
      $el = $form['exceptions'];
      $form_state->setError($el, $el['#title'] . ': ' . $this->t('The entered text is not in valid WORD=STEM format.'));
    }
    else {
      $form_state->setValue('exceptions', $parsed);
    }
  }

  /**
   * {@inheritdoc}
   */
  public function preprocessIndexItems(array $items) {
    foreach ($items as $item) {
      // Limit this processor to English language data.
      if ($item->getLanguage() !== 'en') {
        continue;
      }
      foreach ($item->getFields() as $name => $field) {
        if ($this->testField($name, $field)) {
          $this->processField($field);
        }
      }
    }
  }

  /**
   * {@inheritdoc}
   */
  public function preprocessSearchQuery(QueryInterface $query) {
    // Only process queries that can (also) return English language content.
    $languages = $query->getLanguages();
    if ($languages && !in_array('en', $languages)) {
      return;
    }
    parent::preprocessSearchQuery($query);
  }

  /**
   * {@inheritdoc}
   */
  protected function testType($type) {
    return $this->getDataTypeHelper()->isTextType($type);
  }

  /**
   * {@inheritdoc}
   */
  protected function process(&$value) {
    // In the absence of the tokenizer processor, this ensures split words.
    $words = preg_split('/[^\p{L}\p{N}]+/u', strip_tags($value), -1, PREG_SPLIT_NO_EMPTY);
    $stemmed = [];
    foreach ($words as $i => $word) {
      // To optimize processing, store processed stems in a static array.
      if (!isset($this->stems[$word])) {
        $stem = new Porter2($word, $this->configuration['exceptions']);
        $this->stems[$word] = $stem->stem();
      }
      $stemmed[] = $this->stems[$word];
    }
    $value = implode(' ', $stemmed);
  }

}

Главная | Обратная связь

drupal hosting | друпал хостинг | it patrol .inc