search_api-8.x-1.15/src/Plugin/search_api/processor/IgnoreCharacters.php
src/Plugin/search_api/processor/IgnoreCharacters.php
<?php namespace Drupal\search_api\Plugin\search_api\processor; use Drupal\Core\Form\FormStateInterface; use Drupal\Core\Url; use Drupal\search_api\Processor\FieldsProcessorPluginBase; /** * Configure types of characters which should be ignored for searches. * * @SearchApiProcessor( * id = "ignore_character", * label = @Translation("Ignore characters"), * description = @Translation("Configure types of characters which should be ignored for searches."), * stages = { * "pre_index_save" = 0, * "preprocess_index" = -10, * "preprocess_query" = -10, * } * ) */ class IgnoreCharacters extends FieldsProcessorPluginBase { /** * The escaped regular expression for ignorable characters. * * @var string */ protected $ignorable; /** * {@inheritdoc} */ public function defaultConfiguration() { $configuration = parent::defaultConfiguration(); $configuration += [ 'ignorable' => "['¿¡!?,.:;]", 'ignorable_classes' => [ 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po', 'Ps', ], ]; return $configuration; } /** * {@inheritdoc} */ public function buildConfigurationForm(array $form, FormStateInterface $form_state) { $form = parent::buildConfigurationForm($form, $form_state); $form['ignorable'] = [ '#type' => 'textfield', '#title' => $this->t('Strip by regular expression'), '#description' => $this->t('Specify characters which should be removed from fulltext fields and search strings, as a <a href=":url">PCRE regular expression</a>.', [':url' => Url::fromUri('https://secure.php.net/manual/reference.pcre.pattern.syntax.php')->toString()]), '#default_value' => $this->configuration['ignorable'], '#maxlength' => 1000, ]; $character_sets = $this->getCharacterSets(); $form['strip'] = [ '#type' => 'details', '#title' => $this->t('Strip by character property'), '#description' => $this->t('Specify <a href=":url">Unicode character properties</a> of characters to be ignored.', [':url' => Url::fromUri('https://en.wikipedia.org/wiki/Unicode_character_property')->toString()]), '#open' => FALSE, '#maxlength' => 300, ]; $classes = $this->configuration['ignorable_classes']; $form['strip']['character_sets'] = [ '#type' => 'checkboxes', '#title' => $this->t('Ignored character properties'), '#options' => $character_sets, '#default_value' => array_combine($classes, $classes), '#multiple' => TRUE, ]; return $form; } /** * {@inheritdoc} */ public function validateConfigurationForm(array &$form, FormStateInterface $form_state) { parent::validateConfigurationForm($form, $form_state); $ignorable = str_replace('/', '\/', $form_state->getValues()['ignorable']); if ($ignorable !== '' && @preg_match('/(' . $ignorable . ')+/u', '') === FALSE) { $el = $form['ignorable']; $form_state->setError($el, $el['#title'] . ': ' . $this->t('The entered text is no valid regular expression.')); } } /** * {@inheritdoc} */ public function submitConfigurationForm(array &$form, FormStateInterface $form_state) { $config = $form_state->getValues(); unset($config['strip']); // Get our own version of 'ignorable_classes' from form values. $classes = $form_state->getValue(['strip', 'character_sets'], []); $config['ignorable_classes'] = array_values(array_filter($classes)); $this->setConfiguration($config); } /** * {@inheritdoc} */ protected function process(&$value) { if ($this->configuration['ignorable']) { if (!isset($this->ignorable)) { $this->ignorable = str_replace('/', '\/', $this->configuration['ignorable']); } $value = preg_replace('/' . $this->ignorable . '+/u', '', $value); } // Loop over the character sets and strip the characters from the text. foreach ($this->configuration['ignorable_classes'] as $character_set) { $regex = $this->getFormatRegularExpression($character_set); if ($regex) { $value = preg_replace('/[' . $regex . ']+/u', '', $value); } } } /** * Retrieves an options list for available Unicode character properties. * * @return string[] * An options list with all available Unicode character properties. */ protected function getCharacterSets() { return [ 'Pc' => $this->t('Punctuation, Connector Characters'), 'Pd' => $this->t('Punctuation, Dash Characters'), 'Pe' => $this->t('Punctuation, Close Characters'), 'Pf' => $this->t('Punctuation, Final quote Characters'), 'Pi' => $this->t('Punctuation, Initial quote Characters'), 'Po' => $this->t('Punctuation, Other Characters'), 'Ps' => $this->t('Punctuation, Open Characters'), 'Cc' => $this->t('Other, Control Characters'), 'Cf' => $this->t('Other, Format Characters'), 'Co' => $this->t('Other, Private Use Characters'), 'Mc' => $this->t('Mark, Spacing Combining Characters'), 'Me' => $this->t('Mark, Enclosing Characters'), 'Mn' => $this->t('Mark, Nonspacing Characters'), 'Sc' => $this->t('Symbol, Currency Characters'), 'Sk' => $this->t('Symbol, Modifier Characters'), 'Sm' => $this->t('Symbol, Math Characters'), 'So' => $this->t('Symbol, Other Characters'), 'Zl' => $this->t('Separator, Line Characters'), 'Zp' => $this->t('Separator, Paragraph Characters'), 'Zs' => $this->t('Separator, Space Characters'), ]; } /** * Retrieves a regular expression for a certain Unicode character property. * * @param string $property * The abbreviation of the character property for which to get the regular * expression. * * @return string|null * The regular expression for the property, or NULL if it could not be * found. */ protected function getFormatRegularExpression($property) { $class = 'Drupal\search_api\Plugin\search_api\processor\Resources\\' . $property; if (class_exists($class) && in_array('Drupal\search_api\Plugin\search_api\processor\Resources\UnicodeCharacterPropertyInterface', class_implements($class))) { return $class::getRegularExpression(); } return NULL; } }