utilikit-1.0.0/src/Service/UtilikitContentScanner.php

src/Service/UtilikitContentScanner.php
<?php

declare(strict_types=1);

namespace Drupal\utilikit\Service;

use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Entity\FieldableEntityInterface;
use Drupal\Core\Field\FieldItemListInterface;
use Psr\Log\LoggerInterface;

/**
 * Scans entity content for UtiliKit utility classes.
 *
 * This service provides comprehensive scanning capabilities for finding
 * UtiliKit utility classes within entity field data. It validates classes
 * against defined rules, extracts classes from HTML content, and supports
 * batch processing for performance optimization.
 */
class UtilikitContentScanner {

  /**
   * The entity type manager service.
   *
   * @var \Drupal\Core\Entity\EntityTypeManagerInterface
   */
  protected EntityTypeManagerInterface $entityTypeManager;

  /**
   * The logger service.
   *
   * @var \Psr\Log\LoggerInterface
   */
  protected LoggerInterface $logger;

  /**
   * The UtiliKit rules service.
   *
   * @var \Drupal\utilikit\Service\UtilikitRules
   */
  protected UtilikitRules $rulesService;

  /**
   * Constructs a new UtilikitContentScanner object.
   *
   * @param \Drupal\Core\Entity\EntityTypeManagerInterface $entityTypeManager
   *   The entity type manager service.
   * @param \Psr\Log\LoggerInterface $logger
   *   The logger service for recording scanning operations.
   * @param \Drupal\utilikit\Service\UtilikitRules $rulesService
   *   The UtiliKit rules service for validation.
   */
  public function __construct(
    EntityTypeManagerInterface $entityTypeManager,
    LoggerInterface $logger,
    UtilikitRules $rulesService,
  ) {
    $this->entityTypeManager = $entityTypeManager;
    $this->logger = $logger;
    $this->rulesService = $rulesService;
  }

  /**
   * Scans all fieldable entities for UtiliKit utility classes.
   *
   * Iterates through all fieldable entity types and their entities to find
   * UtiliKit utility classes. Processing is done in batches to manage memory
   * usage and prevent timeouts during large content scans.
   *
   * @param int|null $batchSize
   *   Optional batch size for entity processing. Defaults to
   *   UtilikitConstants::BATCH_SIZE_DEFAULT if not specified.
   * @param int|null $maxExecutionTime
   *   Optional maximum execution time in seconds. Defaults to 30 seconds
   *   if not specified. Used to prevent timeouts during large scans.
   *
   * @return array
   *   Results array containing:
   *   - classes: Array of unique utility classes found
   *   - scanned_count: Total number of entities scanned
   *   - completed: Boolean indicating if scan completed or timed out
   *   - execution_time: Total execution time in seconds
   */
  public function scanAllContent(?int $batchSize = NULL, ?int $maxExecutionTime = NULL): array {
    $batchSize = $batchSize ?? UtilikitConstants::BATCH_SIZE_DEFAULT;
    $maxExecutionTime = $maxExecutionTime ?? 30;
    $startTime = time();

    $allClasses = [];
    $totalScanned = 0;
    $completed = TRUE;

    // Get scanning configuration.
    $config = \Drupal::config('utilikit.settings');
    $scanning_entity_types = $config->get('scanning_entity_types') ?? ['node', 'block_content', 'paragraph'];

    // Filter entity types based on configuration.
    $entityTypes = array_filter(
      $this->entityTypeManager->getDefinitions(),
      function ($entityType) use ($scanning_entity_types) {
        if (!$entityType->entityClassImplements(FieldableEntityInterface::class)) {
          return FALSE;
        }
        return in_array($entityType->id(), $scanning_entity_types, TRUE);
      }
    );

    foreach ($entityTypes as $entityTypeId => $entityType) {
      $storage = $this->entityTypeManager->getStorage($entityTypeId);
      $idKey = $entityType->getKey('id');
      $lastId = 0;
      $entityTypeCount = 0;

      while (TRUE) {
        // Check timeout.
        if ((time() - $startTime) > $maxExecutionTime) {
          $this->logger->warning('Scan timeout reached after @time seconds. Scanned @count entities.', [
            '@time' => $maxExecutionTime,
            '@count' => $totalScanned,
          ]);
          $completed = FALSE;
          break 2;
        }

        $query = $storage->getQuery()
          ->accessCheck(FALSE)
          ->condition($idKey, $lastId, '>')
          ->sort($idKey, 'ASC')
          ->range(0, $batchSize);

        $entityIds = $query->execute();

        if (empty($entityIds)) {
          break;
        }

        $entities = $storage->loadMultiple($entityIds);

        foreach ($entities as $entity) {
          $classes = $this->scanEntity($entity);
          if (!empty($classes)) {
            array_push($allClasses, ...$classes);
          }
          $totalScanned++;
          $entityTypeCount++;
        }

        // Progress logging every 10 batches.
        if ($entityTypeCount % ($batchSize * 10) === 0) {
          $this->logger->info('Scanned @count @type entities', [
            '@count' => $entityTypeCount,
            '@type' => $entityTypeId,
          ]);
        }

        $entityIdsArray = array_values($entityIds);
        $lastId = end($entityIdsArray);
        unset($entities, $entityIds, $entityIdsArray);
      }

      if ($entityTypeCount > 0) {
        $this->logger->info('Completed @type: @count entities scanned', [
          '@type' => $entityTypeId,
          '@count' => $entityTypeCount,
        ]);
      }
    }

    return [
      'classes' => array_unique($allClasses),
      'scanned_count' => $totalScanned,
      'completed' => $completed,
      'execution_time' => time() - $startTime,
    ];
  }

  /**
   * Scans a single entity for UtiliKit utility classes.
   *
   * Examines all fields and properties of a fieldable entity to extract
   * UtiliKit utility classes from string values that may contain HTML
   * with class attributes.
   *
   * @param mixed $entity
   *   The entity to scan. Must implement FieldableEntityInterface.
   *
   * @return array
   *   Array of unique utility classes found in the entity.
   */
  public function scanEntity($entity): array {
    if (!$entity instanceof FieldableEntityInterface) {
      return [];
    }

    $classes = [];

    foreach ($entity->getFields() as $field) {
      if ($field instanceof FieldItemListInterface) {
        foreach ($field as $item) {
          foreach ($item->getProperties() as $property) {
            $value = $property->getValue();

            if (is_string($value) && !empty($value)) {
              $extractedClasses = $this->extractClassesFromHtml($value);
              $classes = array_merge($classes, $extractedClasses);
            }
          }
        }
      }
    }

    return array_unique($classes);
  }

  /**
   * Extract UtiliKit classes from HTML content following strict validation.
   *
   * Requirements:
   * 1. Must have class attribute
   * 2. Must have "utilikit" marker class
   * 3. Must have at least one uk- class
   * 4. uk- classes must use valid abbreviations
   * 5. uk- classes must have exactly two dashes: uk-{abbrev}--{value}
   *
   * @param string $html
   *   HTML content to scan for utility classes.
   *
   * @return array
   *   Array of unique utility classes found in the HTML.
   */
  public function extractClassesFromHtml(string $html): array {
    if (empty($html)) {
      return [];
    }

    // Quick check - must contain both "utilikit" and "uk-".
    if (!str_contains($html, 'utilikit') || !str_contains($html, 'uk-')) {
      return [];
    }

    $classes = [];
    $validAbbreviations = $this->getValidAbbreviations();

    // Find all elements that have both "utilikit" and "uk-" in their
    // class attribute.
    $pattern = '/<[^>]+class=["\']([^"\']*utilikit[^"\']*uk-[^"\']*|[^"\']*uk-[^"\']*utilikit[^"\']*)["\'][^>]*>/i';

    if (preg_match_all($pattern, $html, $matches)) {
      foreach ($matches[1] as $classString) {
        $elementClasses = preg_split('/\s+/', trim($classString));

        // Verify this element has the utilikit marker.
        if (!in_array('utilikit', $elementClasses, TRUE)) {
          continue;
        }

        // Check each class in this element.
        foreach ($elementClasses as $class) {
          if ($this->isValidUtilityClass($class, $validAbbreviations)) {
            $classes[] = $class;
          }
        }
      }
    }

    return array_unique($classes);
  }

  /**
   * Get valid abbreviations from the rules service.
   *
   * Retrieves the list of valid utility class abbreviations that can be
   * used in UtiliKit classes from the rules service.
   *
   * @return array
   *   Array of valid abbreviation strings.
   */
  protected function getValidAbbreviations(): array {
    $rules = $this->rulesService->getRules();
    return array_keys($rules);
  }

  /**
   * Validate a utility class against all requirements.
   *
   * Performs comprehensive validation of a utility class including format,
   * length, abbreviation validity, breakpoint validity, and value format
   * checking.
   *
   * @param string $class
   *   The utility class name to validate.
   * @param array|null $validAbbreviations
   *   Optional array of valid abbreviations. If not provided, will be
   *   retrieved from the rules service.
   *
   * @return bool
   *   TRUE if the class is valid, FALSE otherwise.
   */
  public function isValidUtilityClass(string $class, ?array $validAbbreviations = NULL): bool {
    // Must start with uk-.
    if (!str_starts_with($class, 'uk-')) {
      return FALSE;
    }

    // Check length limit.
    if (strlen($class) > UtilikitConstants::MAX_CLASS_NAME_LENGTH) {
      return FALSE;
    }

    // Get valid abbreviations if not provided.
    if ($validAbbreviations === NULL) {
      $validAbbreviations = $this->getValidAbbreviations();
    }

    // Parse the class structure: uk-[breakpoint-]abbreviation--value.
    $pattern = '/^uk-(?:(sm|md|lg|xl|xxl)-)?([a-z]{2,4})--(.+)$/';
    if (!preg_match($pattern, $class, $matches)) {
      return FALSE;
    }

    $breakpoint = $matches[1] ?: NULL;
    $abbreviation = $matches[2];
    $value = $matches[3];

    // Validate abbreviation exists in rules.
    if (!in_array($abbreviation, $validAbbreviations, TRUE)) {
      return FALSE;
    }

    // Validate breakpoint if present.
    if ($breakpoint && !in_array($breakpoint, ['sm', 'md', 'lg', 'xl', 'xxl'])) {
      return FALSE;
    }

    // Validate value is not empty.
    if (empty($value)) {
      return FALSE;
    }

    // Additional validation for value format with 'd' notation support
    // Allow alphanumeric, hyphens, underscores, percentages, and 'd'
    // for decimals.
    if (!preg_match('/^[a-zA-Z0-9\-_%d]+$/', $value)) {
      return FALSE;
    }

    return TRUE;
  }

  /**
   * Validates an array of utility classes.
   *
   * Filters an array of utility class names to return only those that pass
   * validation according to UtiliKit rules and format requirements.
   *
   * @param array $classes
   *   Array of utility class names to validate.
   *
   * @return array
   *   Array containing only the valid utility classes.
   */
  public function validateUtilityClasses(array $classes): array {
    $validAbbreviations = $this->getValidAbbreviations();
    return array_filter($classes, function ($class) use ($validAbbreviations) {
      return $this->isValidUtilityClass($class, $validAbbreviations);
    });
  }

  /**
   * Extracts abbreviations from an array of utility classes.
   *
   * Parses utility class names to extract their abbreviation components,
   * useful for analyzing which types of utility classes are being used.
   *
   * @param array $classes
   *   Array of utility class names to parse.
   *
   * @return array
   *   Array of unique abbreviations found in the classes.
   */
  public function getAbbreviationsFromClasses(array $classes): array {
    $abbreviations = [];

    foreach ($classes as $class) {
      if (preg_match('/^uk-(?:(?:sm|md|lg|xl|xxl)-)?([a-z]{2,4})--/', $class, $matches)) {
        $abbreviations[] = $matches[1];
      }
    }

    return array_unique($abbreviations);
  }

  /**
   * Scans a batch of entities for utility classes.
   *
   * Processes multiple entities in a single operation for efficient batch
   * scanning when entities are already loaded.
   *
   * @param array $entities
   *   Array of entities to scan for utility classes.
   *
   * @return array
   *   Array of unique utility classes found across all entities.
   */
  public function scanEntityBatch(array $entities): array {
    $allClasses = [];

    foreach ($entities as $entity) {
      foreach ($this->scanEntity($entity) as $class) {
        $allClasses[] = $class;
      }
    }

    return array_unique($allClasses);
  }

  /**
   * Scan for new classes not in the known classes list.
   *
   * Performs a complete content scan and compares results against a list
   * of known classes to identify newly added utility classes.
   *
   * @param array $knownClasses
   *   Array of already known utility classes to compare against.
   *
   * @return array
   *   Results array containing:
   *   - new_classes: Array of newly found utility classes
   *   - total_scanned: Total number of entities scanned
   *   - new_count: Count of new classes found
   *   - total_found: Total count of all classes found
   */
  public function scanForNewClasses(array $knownClasses): array {
    $scanResult = $this->scanAllContent();
    $newClasses = array_diff($scanResult['classes'], $knownClasses);

    return [
      'new_classes' => $newClasses,
      'total_scanned' => $scanResult['scanned_count'],
      'new_count' => count($newClasses),
      'total_found' => count($scanResult['classes']),
    ];
  }

}

Главная | Обратная связь

drupal hosting | друпал хостинг | it patrol .inc