cbr-1.0.0/src/Plugin/Field/FieldType/CBRFieldHelper.php
src/Plugin/Field/FieldType/CBRFieldHelper.php
<?php
namespace Drupal\cbr\Plugin\Field\FieldType;
use Drupal\Core\Cache\Cache;
use Drupal\Core\Form\FormStateInterface;
use Drupal\field\Entity\FieldConfig;
class CBRFieldHelper
{
/**
* Add a form for setting the fields weight.
* @param array $form The form array.
* @param FormStateInterface $form_state The form state.
* @return array The form array.
*/
public static function cbrFieldSettingsForm(array $form, FormStateInterface $form_state): array
{
//unset default values, added later by drupal anyway
unset($form['label']);
unset($form['description']);
unset($form['required']);
/** @var FieldConfig $field_config */
$field_config = $form_state->getFormObject()->getEntity();
$form['cbr_settings'] = [
'#type' => 'details',
'#title' => t('Case Based Reasoning'),
'#open' => true,
'#weight' => 99
];
$form['cbr_settings']['weight'] = [
'#type' => 'number',
'#step' => '.001',
'#title' => t('Attribute Weight'),
'#default_value' => $field_config->getThirdPartySetting('cbr', 'weight', 1),
'#required' => true,
'#description' => t('Attribute weight for the calculation of the similarity. Can be changed, but you need to recalculate all saved similarity factors!'),
];
$form['#entity_builders'][] = [static::class, 'saveCBRFieldSettings'];
return $form;
}
/**
* Save the weight to the field config.
* @param $entity_type The entity type.
* @param FieldConfig $field_config The field config.
* @param $form The form array.
* @param FormStateInterface $form_state The form state.
*/
public static function saveCBRFieldSettings($entity_type, FieldConfig $fieldConfig, &$form, FormStateInterface $form_state)
{
$fieldConfig->setThirdPartySetting('cbr', 'weight', $form_state->getValue(['settings', 'cbr_settings', 'weight']));
}
public static function stringFieldSettingsForm(array $form, FormStateInterface $form_state): array
{
/** @var FieldConfig $field_config */
$field_config = $form_state->getFormObject()->getEntity();
$form['cbr_settings']['similarity_function'] = [
'#type' => 'select',
'#title' => t('Similarity function'),
'#description' => t('Select the similarity function to use for this field.'),
'#options' => [
'cosine' => t('Cosine'),
'jaccard' => t('Jaccard')
],
'#default_value' => $field_config->getThirdPartySetting('cbr', 'similarity_function', 'cosine'),
'#required' => true
];
$form['#entity_builders'][] = [static::class, 'saveCBRFieldStringSettings'];
return $form;
}
/**
* Save the selected string similarity method to the field config.
* @param $entity_type The entity type.
* @param FieldConfig $field_config The field config.
* @param $form The form array.
* @param FormStateInterface $form_state The form state.
*/
public static function saveCBRFieldStringSettings($entity_type, FieldConfig $fieldConfig, &$form, FormStateInterface $form_state)
{
$fieldConfig->setThirdPartySetting('cbr', 'similarity_function', $form_state->getValue(['settings', 'cbr_settings', 'similarity_function']));
}
public static function calculateSimilarityBetweenNumericValues(float $value1, float $value2, ?float $min, ?float $max): float
{
if (is_numeric($min) && is_numeric($max) && ($max - $min) > 0) {
return 1 - (abs($value1 - $value2) / ($max - $min));
} else {
return 1 / (abs($value1 - $value2) + 1);
}
}
public static function calculateSimilarityBetweenStringValues(string $value1, string $value2, string $similarity_function): float
{
switch ($similarity_function) {
case 'cosine':
return self::calculateCosineSimilarity($value1, $value2);
case 'jaccard':
return self::calculateJaccardSimilarity($value1, $value2);
default:
return 0;
}
}
private static function calculateCosineSimilarity($value1, $value2): float
{
//Calculate the cosine similarity between two strings.
$words1 = self::stringToArray($value1);
$words2 = self::stringToArray($value2);
$union = array_unique(array_merge($words1, $words2));
$number_of_words1 = array_count_values($words1);
$number_of_words2 = array_count_values($words2);
$vector1 = [];
$vector2 = [];
foreach ($union as $word) {
$vector1[] = $number_of_words1[$word] ?? 0;
$vector2[] = $number_of_words2[$word] ?? 0;
}
return self::dotProduct($vector1, $vector2) / (self::magnitude($vector1) * self::magnitude($vector2));
}
private static function dotProduct(array $vector1, array $vector2): float
{
return array_sum(array_map(function ($a, $b) {
return $a * $b;
}, $vector1, $vector2));
}
private static function magnitude(array $vector): float
{
return sqrt(array_sum(array_map(function ($a) {
return $a ** 2;
}, $vector)));
}
private static function calculateJaccardSimilarity($value1, $value2): float
{
//Calculate the jaccard similarity between two strings.
$words1 = self::stringToArray($value1);
$words2 = self::stringToArray($value2);
$words1 = array_unique($words1);
$words2 = array_unique($words2);
$intersection = array_intersect($words1, $words2);
$union = array_unique(array_merge($words1, $words2));
$size = count($union);
if ($size == 0) {
return 0;
}
return count($intersection) / $size;
}
private static function stringToArray(string $string): array
{
$string = strip_tags($string);
$string = strtolower($string);
$string = preg_replace('/[^a-zA-Z0-9äöüÄÖÜß\s]/', '', $string);
$string = preg_replace('/\s+/', ' ', $string);
return explode(' ', $string);
}
public static function summerizeNumericValues(array $values): float
{
//use median for numeric values
if ($values) {
$count = count($values);
sort($values);
$mid = floor(($count - 1) / 2);
return ($values[$mid] + $values[$mid + 1 - $count % 2]) / 2;
}
return 0;
}
public static function summarizeStringValues(array $fields): string
{
return join('|', $fields);
}
}