image_to_media_swapper-2.x-dev/src/BatchProcessorService.php

src/BatchProcessorService.php
<?php

declare(strict_types=1);

namespace Drupal\image_to_media_swapper;

use Drupal\Component\Datetime\TimeInterface;
use Drupal\Core\Entity\RevisionableInterface;
use Drupal\Core\Entity\FieldableEntityInterface;
use Drupal\Core\Entity\EntityDisplayRepositoryInterface;
use Drupal\Core\Entity\EntityFieldManagerInterface;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Extension\ModuleHandlerInterface;
use Drupal\Core\Field\FieldDefinitionInterface;
use Drupal\Core\Logger\LoggerChannelFactoryInterface;
use Drupal\Core\Logger\LoggerChannelInterface;
use Drupal\Core\Messenger\MessengerTrait;
use Drupal\Core\Session\AccountProxyInterface;
use Drupal\Core\StringTranslation\StringTranslationTrait;
use Drupal\file\FileInterface;
use Drupal\filter\FilterFormatInterface;

/**
 * Helper functions to find and process eligible text fields.
 */
class BatchProcessorService {

  use StringTranslationTrait;
  use MessengerTrait;

  /**
   * The entity type manager service.
   *
   * @var \Drupal\Core\Logger\LoggerChannelInterface
   */
  protected LoggerChannelInterface $loggerChannel;

  /**
   * Constructs an BatchProcessorService object.
   */
  public function __construct(
    private readonly EntityTypeManagerInterface $entityTypeManager,
    private readonly EntityFieldManagerInterface $entityFieldManager,
    LoggerChannelFactoryInterface $loggerChannelFactory,
    private readonly SwapperService $swapperService,
    private readonly EntityDisplayRepositoryInterface $entityDisplayRepository,
    private readonly AccountProxyInterface $currentUser,
    private readonly TimeInterface $time,
    private readonly ModuleHandlerInterface $moduleHandler,
  ) {
    $this->loggerChannel = $loggerChannelFactory->get('image_to_media_swapper');
  }

  /**
   * Kick off the batch process using the BatchHandler.
   *
   * @param string $fieldSelector
   *   The field selector in the format 'entity_type.bundle.field_name'.
   * @param string $category
   *   The category for the batch process, used for state tracking.
   */
  public function startBatch(string $fieldSelector, string $category): void {
    // Get entities filtered by category type.
    $categoryFilter = $category === 'mixed' ? [] : [$category];
    $entities = $this->getEntitiesWithFiles($fieldSelector, $categoryFilter);

    if (empty($entities)) {
      // Notify the user of no entities found.
      $this->messenger()
        ->addStatus(
          $this->t('No entities found with @category for @selector.',
            [
              '@category' => $category,
              '@selector' => $fieldSelector,
            ]));
      return;
    }

    $operations = [];
    $chunks = array_chunk(array_keys($entities), 20);

    foreach ($chunks as $chunk) {
      $operations[] = [
        ['\Drupal\image_to_media_swapper\BatchHandler', 'processChunk'],
        [$fieldSelector, $chunk, $category],
      ];
    }

    $batch = [
      'title' => $this->t('Converting expected @category to media entities...', ['@category' => $category]),
      'operations' => $operations,
      'finished' => [
        '\Drupal\image_to_media_swapper\BatchHandler',
        'batchFinished',
      ],
      'init_message' => $this->t('Initializing file conversion...'),
      'progress_message' => $this->t('Processed @current out of @total.'),
      'error_message' => $this->t('An error occurred during file-to-media conversion.'),
    ];

    batch_set($batch);
  }

  /**
   * Retrieve entities with <img> tags or file links in the specified field.
   *
   * @param string $fieldSelector
   *   The field selector in the format 'entity_type.bundle.field_name'.
   * @param array $types
   *   Array of types to check for ('images', 'links'). If empty, checks both.
   *
   * @return array
   *   An array of entities with <img> tags or file links in the specified
   *   field. If $types contains multiple types, returns categorized array with
   *   'images' and 'links' keys containing entity counts.
   */
  public function getEntitiesWithFiles(string $fieldSelector, array $types = []): array {
    [$entity_type_id, $bundle, $field_name] = explode('.', $fieldSelector);
    $output = [];
    $categorized = [];

    // Determine what to check for.
    $checkImages = empty($types) || in_array('images', $types);
    $checkLinks = empty($types) || in_array('links', $types);
    $returnCategorized = count($types) > 1 && !empty($types);

    if ($returnCategorized) {
      $categorized = ['images' => 0, 'links' => 0];
    }

    try {
      $storage = $this->entityTypeManager->getStorage($entity_type_id);
      $entityType = $this->entityTypeManager->getDefinition($entity_type_id);

      $query = $storage->getQuery()->accessCheck(TRUE);

      // Use the correct bundle key for the entity type.
      if ($entityType->hasKey('bundle')) {
        $bundle_key = $entityType->getKey('bundle');
        $query->condition($bundle_key, $bundle);
      }

      // Only get default revisions to avoid processing previous revisions.
      if ($entityType->isRevisionable()) {
        $query->latestRevision();
      }

      // For paragraph entities, filter to exclude orphaned revisions.
      if ($entity_type_id === 'paragraph') {
        // Only include paragraphs that have a parent entity reference.
        $query->exists('parent_id');
        $query->exists('parent_type');
      }

      $entity_ids = $query->execute();

      if (empty($entity_ids)) {
        return $returnCategorized ? $categorized : [];
      }
      /** @var \Drupal\Core\Entity\FieldableEntityInterface $entity */
      $entities = $storage->loadMultiple($entity_ids);
      foreach ($entities as $entity) {
        if ($entity->hasField($field_name)) {
          $fieldValue = $entity->get($field_name)->value ?? '';
          $hasImages = $checkImages && str_contains($fieldValue, '<img');
          $hasLinks = $checkLinks && $this->containsFileLinks($fieldValue);

          if ($returnCategorized) {
            if ($hasImages) {
              $categorized['images']++;
            }
            if ($hasLinks) {
              $categorized['links']++;
            }
          }
          else {
            // Original behavior - add entity if it matches any requested type.
            if ($hasImages || $hasLinks) {
              $output[$entity->id()] = $entity;
            }
          }
        }
      }
    }
    catch (\Throwable $e) {
      $this->loggerChannel->error('Error in getEntitiesWithFiles for @selector: @msg', [
        '@selector' => $fieldSelector,
        '@msg' => $e->getMessage(),
      ]);
    }

    return $returnCategorized ? $categorized : $output;
  }

  /**
   * Processes content based on the specified category type.
   *
   * @param string $fieldSelector
   *   The field selector in the format 'entity_type.bundle.field_name'.
   * @param \Drupal\Core\Entity\FieldableEntityInterface[] $entities
   *   (Optional) An array of already loaded entities to operate on.
   * @param string $category
   *   The category type: 'images', 'links', or 'mixed'.
   *
   * @return \Drupal\Core\Entity\FieldableEntityInterface[]
   *   An array of updated entities.
   *
   * @throws \Drupal\Core\Entity\EntityStorageException
   *   If there is an error saving the entity.
   */
  public function processContentByCategory(string $fieldSelector, array $entities = [], string $category = 'mixed'): array {
    return match ($category) {
      'links' => $this->processFileLinksToMedia($fieldSelector, $entities, $category),
      default => $this->swapImagesToMedia($fieldSelector, $entities, $category),
    };
  }

  /**
   * Converts <img> tags to <drupal-media> elements on specified entities.
   *
   * @param string $fieldSelector
   *   The field selector in the format 'entity_type.bundle.field_name'.
   * @param \Drupal\Core\Entity\FieldableEntityInterface[] $entities
   *   (Optional) An array of already loaded entities to operate on.
   * @param string $category
   *   (Optional) The batch category for creating swap records.
   *
   * @return \Drupal\Core\Entity\FieldableEntityInterface[]
   *   An array of updated entities.
   *
   * @throws \Drupal\Core\Entity\EntityStorageException
   *   If there is an error saving the entity.
   */
  public function swapImagesToMedia(string $fieldSelector, array $entities = [], string $category = 'mixed'): array {
    $processedEntities = [];
    $fieldSelectorParts = explode('.', $fieldSelector);
    if (count($fieldSelectorParts) !== 3) {
      throw new \InvalidArgumentException('Field selector must be in the format "entity_type.bundle.field_name".');
    }
    $field_name = array_pop($fieldSelectorParts);

    foreach ($entities as $entity) {
      try {
        if (!$entity->hasField($field_name)) {
          continue;
        }

        // Check if the entity is the default revision.
        if (!$this->isDefaultRevision($entity)) {
          $this->loggerChannel->info('Skipping non-current revision for @entity_type @id (revision @revision_id)', [
            '@entity_type' => $entity->getEntityTypeId(),
            '@id' => $entity->id(),
            '@revision_id' => $entity->getRevisionId(),
          ]);
          continue;
        }

        $richText = $entity->get($field_name)->value ?? '';

        // Check if content needs processing based on category.
        $needsProcessing = FALSE;
        if ($category === 'images' && str_contains($richText, '<img')) {
          $needsProcessing = TRUE;
        }
        elseif ($category === 'links' && $this->containsFileLinks($richText)) {
          $needsProcessing = TRUE;
        }
        elseif ($category === 'mixed' && (str_contains($richText, '<img') || $this->containsFileLinks($richText))) {
          $needsProcessing = TRUE;
        }

        if (!$needsProcessing) {
          continue;
        }

        $replacements = [];
        $conversionDetails = [];
        $allFileIds = [];
        $allMediaIds = [];

        // Process <img> tags based on category.
        if (($category === 'images' || $category === 'mixed') && str_contains($richText, '<img')) {
          preg_match_all('/<img[^>]+src="([^"]+)"[^>]*>/i', $richText, $matches, PREG_SET_ORDER);

          foreach ($matches as $match) {
            $imageDom = $match[0];
            $imageUrl = $match[1];
            $imageUuid = preg_match('/data-entity-uuid="([^"]+)"/i', $imageDom, $uuidMatch) ? $uuidMatch[1] : '';

            // Extract data-align attribute from the img tag.
            $dataAlign = '';
            if (preg_match('/data-align="([^"]+)"/i', $imageDom, $alignMatch)) {
              $dataAlign = $alignMatch[1];
            }

            $file = NULL;

            if ($imageUuid) {
              $file = $this->swapperService->findFileFromUuid($imageUuid);
            }
            elseif ($imageUrl) {
              $publicUri = $this->swapperService->convertWebPathToPublicUri($imageUrl);
              if ($publicUri !== NULL) {
                $file = $this->swapperService->findOrCreateFileEntityByUri($publicUri);
              }
              else {
                $this->loggerChannel->warning('Failed to convert web path to public URI: @url', ['@url' => $imageUrl]);
              }
            }

            if (!$file instanceof FileInterface) {
              continue;
            }

            $media = $this->swapperService->findOrCreateMediaFromFileEntity($file);
            if ($media) {
              // Build drupal-media tag with preserved data-align attribute.
              $mediaAttributes = [
                'data-entity-type="media"',
                'data-entity-uuid="' . $media->uuid() . '"',
                'data-view-mode="default"',
              ];

              if (!empty($dataAlign)) {
                $mediaAttributes[] = 'data-align="' . $dataAlign . '"';
              }

              $mediaDom = sprintf(
                '<drupal-media %s></drupal-media>',
                implode(' ', $mediaAttributes)
              );
              $replacements[$imageDom] = $mediaDom;

              // Collect metadata.
              $allFileIds[] = $file->id();
              $allMediaIds[] = $media->id();
              $detailText = sprintf('Image: %s -> Media ID %s', $imageUrl, $media->id());
              if (!empty($dataAlign)) {
                $detailText .= sprintf(' (data-align: %s)', $dataAlign);
              }
              $conversionDetails[] = $detailText;
            }
          }
        }

        // Process file links (including PDFs) based on category.
        if (($category === 'links' || $category === 'mixed') && $this->containsFileLinks($richText)) {
          $fileReplacements = $this->processFileLinksWithMetadata($richText, $allFileIds, $allMediaIds, $conversionDetails);
          $replacements = array_merge($replacements, $fileReplacements);
        }

        if (!empty($replacements)) {
          $metadata = [
            'source_file_id' => !empty($allFileIds) ? (int) $allFileIds[0] : NULL,
            'created_media_id' => !empty($allMediaIds) ? (int) $allMediaIds[0] : NULL,
            'conversion_details' => implode('; ', $conversionDetails),
          ];
          $this->saveEntityWithReplacements($entity, $field_name, $richText, $replacements, $fieldSelector, $category, $metadata);
          $processedEntities[$entity->id()] = $entity;
        }
      }
      catch (\Exception $e) {
        // Create failed record.
        $this->createSwapRecord($fieldSelector, $entity, $category, 'failed', [
          'error_message' => $e->getMessage(),
        ]);
      }
    }

    return $processedEntities;
  }

  /**
   * Processes file links to media entities on specified entities.
   *
   * @param string $fieldSelector
   *   The field selector in the format 'entity_type.bundle.field_name'.
   * @param \Drupal\Core\Entity\FieldableEntityInterface[] $entities
   *   (Optional) An array of already loaded entities to operate on.
   * @param string $category
   *   (Optional) The batch category for creating swap records.
   *
   * @return \Drupal\Core\Entity\FieldableEntityInterface[]
   *   An array of updated entities.
   *
   * @throws \Drupal\Core\Entity\EntityStorageException
   *   If there is an error saving the entity.
   */
  public function processFileLinksToMedia(string $fieldSelector, array $entities = [], string $category = 'links'): array {
    $processedEntities = [];
    $fieldSelectorParts = explode('.', $fieldSelector);
    if (count($fieldSelectorParts) !== 3) {
      throw new \InvalidArgumentException('Field selector must be in the format "entity_type.bundle.field_name".');
    }
    $field_name = array_pop($fieldSelectorParts);

    foreach ($entities as $entity) {
      try {
        if (!$entity->hasField($field_name)) {
          continue;
        }

        // Check if the entity is the default revision.
        if (!$this->isDefaultRevision($entity)) {
          $this->loggerChannel->info('Skipping non-current revision for @entity_type @id (revision @revision_id)', [
            '@entity_type' => $entity->getEntityTypeId(),
            '@id' => $entity->id(),
            '@revision_id' => $entity->getRevisionId(),
          ]);
          continue;
        }

        $richText = $entity->get($field_name)->value ?? '';

        // Check if content needs processing - only file links.
        if (!$this->containsFileLinks($richText)) {
          continue;
        }

        $replacements = [];
        $conversionDetails = [];
        $allFileIds = [];
        $allMediaIds = [];

        // Process file links only.
        $fileReplacements = $this->processFileLinksWithMetadata($richText, $allFileIds, $allMediaIds, $conversionDetails);
        $replacements = array_merge($replacements, $fileReplacements);

        if (!empty($replacements)) {
          $metadata = [
            'source_file_id' => !empty($allFileIds) ? (int) $allFileIds[0] : NULL,
            'created_media_id' => !empty($allMediaIds) ? (int) $allMediaIds[0] : NULL,
            'conversion_details' => implode('; ', $conversionDetails),
          ];
          $this->saveEntityWithReplacements($entity, $field_name, $richText, $replacements, $fieldSelector, $category, $metadata);
          $processedEntities[$entity->id()] = $entity;
        }
      }
      catch (\Exception $e) {
        // Create failed record.
        $this->createSwapRecord($fieldSelector, $entity, $category, 'failed', [
          'error_message' => $e->getMessage(),
        ]);
      }
    }

    return $processedEntities;
  }

  /**
   * Find eligible long text fields using media-capable formats.
   *
   * @return string[]
   *   An array of options in the form 'entity_type.bundle.field_name' =>
   *   'Label'.
   */
  public function getEligibleTextFields(): array {
    $options = [];

    $formats = filter_formats();
    $media_enabled_formats = array_filter($formats, function (FilterFormatInterface $format) {
      /** @var \Drupal\editor\EditorInterface $editor */
      $editor = $this->entityTypeManager->getStorage('editor')
        ->load($format->id());
      return $editor && $editor->getEditor() === 'ckeditor5';
    });
    // Get all the filter formats that have media_embed or
    // filter_html_image_secure filters enabled.
    $media_enabled_formats = array_keys(array_filter($media_enabled_formats, function (FilterFormatInterface $format) {
      $filters = array_keys($format->get('filters'));
      return in_array('media_embed', $filters, TRUE) || in_array('filter_html_image_secure', $filters, TRUE);
    }));
    $types = ['text', 'text_long', 'text_with_summary'];

    foreach ($types as $type) {
      foreach ($this->entityFieldManager->getFieldMapByFieldType($type) as $entity_type => $fields) {
        foreach ($fields as $field_name => $info) {
          foreach ($info['bundles'] as $bundle) {
            $definition = $this->entityFieldManager->getFieldDefinitions($entity_type, $bundle)[$field_name] ?? NULL;
            if ($definition instanceof FieldDefinitionInterface) {
              $field_formats = $this->getFieldAllowedFormats($entity_type, $bundle, $field_name, $definition);

              // If no allowed formats are set, include all text fields.
              if (empty($field_formats)) {
                $key = "$entity_type.$bundle.$field_name";
                $label = $this->t("@entity_type/@bundle/@field_name", [
                  '@entity_type' => $entity_type,
                  '@bundle' => $bundle,
                  '@field_name' => $field_name,
                ]);
                // Check if the field has files.
                $hasFiles = $this->getEntitiesWithFiles($key);
                if ($hasFiles) {
                  $options[$key] = $label;
                }
              }
              else {
                // Compare the field_formats with media_enabled_formats.
                $has_media_format = array_intersect($field_formats, $media_enabled_formats);
                if (!empty($has_media_format)) {
                  $key = "$entity_type.$bundle.$field_name";
                  $label = $this->t("@entity_type/@bundle/@field_name", [
                    '@entity_type' => $entity_type,
                    '@bundle' => $bundle,
                    '@field_name' => $field_name,
                  ]);
                  // Check if the field has files.
                  $hasFiles = $this->getEntitiesWithFiles($key);
                  if ($hasFiles) {
                    $options[$key] = $label;
                  }
                }
              }
            }
          }
        }
      }
    }
    return $options;
  }

  /**
   * Gets allowed formats for a field from both field definition & form display.
   *
   * @param string $entity_type
   *   The entity type.
   * @param string $bundle
   *   The bundle.
   * @param string $field_name
   *   The field name.
   * @param \Drupal\Core\Field\FieldDefinitionInterface $definition
   *   The field definition.
   *
   * @return array
   *   Array of allowed format IDs.
   */
  private function getFieldAllowedFormats(string $entity_type, string $bundle, string $field_name, FieldDefinitionInterface $definition): array {
    // Try field definition first (Drupal 10.1+ core method)
    $field_formats = $definition->getSetting('allowed_formats') ?? [];

    // If empty, check form display (allowed_formats module).
    if (empty($field_formats)) {
      try {
        $formDisplay = $this->entityDisplayRepository->getFormDisplay($entity_type, $bundle);
        $component = $formDisplay->getComponent($field_name);
        if (isset($component['third_party_settings']['allowed_formats']['allowed_formats'])) {
          $field_formats = array_keys(array_filter($component['third_party_settings']['allowed_formats']['allowed_formats']));
        }
      }
      catch (\Exception $e) {
        // Silently continue if form display can't be loaded.
      }
    }

    return $field_formats;
  }

  /**
   * Updates parent revisions for the given entity.
   *
   * @param \Drupal\Core\Entity\FieldableEntityInterface $entity
   *   The entity to update parent revisions for.
   *
   * @throws \Drupal\Core\Entity\EntityStorageException
   */
  private function updateRevision(FieldableEntityInterface $entity): void {
    // If the entity is revisionable, we need to create a new revision.
    if (method_exists($entity, 'getParentEntity')) {
      $parent = $entity->getParentEntity();
      $this->updateRevision($parent);
    }
    if ($entity instanceof RevisionableInterface) {
      $entity->setNewRevision();
      if (method_exists($entity, 'setRevisionLogMessage')) {
        $entity->setRevisionLogMessage($this->t('Updated media references in @entity_type @bundle.', [
          '@entity_type' => $entity->getEntityTypeId(),
          '@bundle' => $entity->bundle(),
        ]));
      }
      if (method_exists($entity, 'setRevisionUserId')) {
        // Set the revision user ID to the current user.
        $entity->setRevisionUserId($this->currentUser->id());
      }
      if (method_exists($entity, 'setRevisionCreationTime')) {
        $entity->setRevisionCreationTime($this->time->getRequestTime());
      }
      $entity->save();
    }
  }

  /**
   * Checks if the entity is the default revision.
   *
   * @param \Drupal\Core\Entity\FieldableEntityInterface $entity
   *   The entity to check.
   *
   * @return bool
   *   TRUE if the entity is the default revision, FALSE otherwise.
   */
  private function isDefaultRevision(FieldableEntityInterface $entity): bool {
    // For paragraphs and other entities with parent relationships.
    if (method_exists($entity, 'getParentEntity')) {
      $parent = $entity->getParentEntity();

      // If no parent, this entity is orphaned and should be skipped.
      if (!$parent) {
        return FALSE;
      }

      // For paragraphs, check if they're in the parent's current revision.
      if ($entity->getEntityTypeId() === 'paragraph' && $entity->hasField('parent_field_name')) {
        $parent_field_name = $entity->get('parent_field_name')->value;

        // Load the parent's default revision to check current field values.
        if ($parent instanceof RevisionableInterface && !$parent->isDefaultRevision()) {
          $parent_storage = $this->entityTypeManager->getStorage($parent->getEntityTypeId());
          $parent = $parent_storage->load($parent->id());
        }

        if ($parent && $parent->hasField($parent_field_name)) {
          $field = $parent->get($parent_field_name);

          // Check if this paragraph is in the parent's current field values.
          foreach ($field as $value) {
            if ($value->entity &&
                $value->entity->getRevisionId() == $entity->getRevisionId() &&
                $value->entity->id() == $entity->id()) {
              // Recursively check if parent is also in its default revision.
              return $this->isDefaultRevision($parent);
            }
          }

          // Paragraph not found in parent's current revision field values.
          return FALSE;
        }
      }

      // For other entities with parents, recursively check parent.
      if ($parent instanceof FieldableEntityInterface) {
        return $this->isDefaultRevision($parent);
      }
    }

    // For regular entities, check if they're the default revision.
    if ($entity instanceof RevisionableInterface) {
      return $entity->isDefaultRevision();
    }

    // Non-revisionable entities are always considered current.
    return TRUE;
  }

  /**
   * Checks if the given text contains <a> tags with file hrefs.
   *
   * @param string $text
   *   The text to check for file links.
   *
   * @return bool
   *   TRUE if the text contains file links, FALSE otherwise.
   */
  public function containsFileLinks(string $text): bool {
    $availableExtensions = $this->swapperService->getAvailableExtensions();
    if (empty($availableExtensions)) {
      return FALSE;
    }

    $pattern = '/<a[^>]*href="[^"]*\.(' . implode('|', array_map('preg_quote', $availableExtensions)) . ')(?:\?[^"]*)?(?:#[^"]*)?[^"]*"[^>]*>/i';
    return preg_match($pattern, $text) === 1;
  }

  /**
   * Processes file links in the given text and returns replacements.
   *
   * @param string $text
   *   The text to process for file links.
   *
   * @return array
   *   An array of replacements where keys are original link HTML and values
   *   are drupal-media HTML.
   *
   * @throws \Drupal\Core\Entity\EntityStorageException
   */
  private function processFileLinks(string $text): array {
    $replacements = [];

    // Skip link processing if Linkit module is not enabled.
    if (!$this->moduleHandler->moduleExists('linkit')) {
      return $replacements;
    }

    $availableExtensions = $this->swapperService->getAvailableExtensions();
    if (empty($availableExtensions)) {
      return $replacements;
    }

    // Find all file links (including PDFs)
    $pattern = '/<a[^>]*href="([^"]*\.(' . implode('|', array_map('preg_quote', $availableExtensions)) . ')(?:\?[^"]*)?(?:#[^"]*)?[^"]*?)"[^>]*>(.*?)<\/a>/is';
    preg_match_all($pattern, $text, $matches, PREG_SET_ORDER);

    foreach ($matches as $match) {
      $linkHtml = $match[0];
      $href = $match[1];
      // Capture the text between <a></a> tags.
      $linkText = $match[3];
      // Full tag for attribute extraction.
      $attributes = $match[0];

      // Check for data-media-uuid attribute (takes priority)
      $mediaUuid = NULL;
      if (preg_match('/data-media-uuid="([^"]+)"/i', $attributes, $uuidMatch)) {
        $mediaUuid = $uuidMatch[1];
      }

      $media = NULL;

      if ($mediaUuid) {
        // Process by UUID.
        $media = $this->swapperService->validateAndProcessFileUuid($mediaUuid);
      }
      else {
        // Determine if it's a local or remote URL.
        $isAbsoluteUrl = filter_var($href, FILTER_VALIDATE_URL) !== FALSE;

        if ($isAbsoluteUrl) {
          $currentDomain = $_SERVER['HTTP_HOST'] ?? '';
          $urlParts = parse_url($href);
          $urlHost = $urlParts['host'] ?? '';

          if ($currentDomain && $urlHost === $currentDomain) {
            // Same domain - process as local path.
            $media = $this->swapperService->validateAndProcessFilePath($href);
          }
          else {
            // Remote domain - process as remote file.
            $result = $this->swapperService->validateAndProcessRemoteFile($href);
            if (!is_string($result)) {
              $media = $result;
            }
          }
        }
        else {
          // Relative path - process as local path.
          $media = $this->swapperService->validateAndProcessFilePath($href);
        }
      }

      if ($media && method_exists($media, 'uuid')) {
        // Create linkit-style <a> tag preserving the original link text.
        $mediaDom = sprintf(
          '<a href="/media/%s" data-entity-type="media" data-entity-uuid="%s" data-entity-substitution="media">%s</a>',
          $media->id(),
          $media->uuid(),
          $linkText
        );
        $replacements[$linkHtml] = $mediaDom;
      }
    }

    return $replacements;
  }

  /**
   * Processes file links with metadata collection.
   *
   * @param string $text
   *   The text to process for file links.
   * @param array &$allFileIds
   *   Reference to array collecting file IDs.
   * @param array &$allMediaIds
   *   Reference to array collecting media IDs.
   * @param array &$conversionDetails
   *   Reference to array collecting conversion details.
   *
   * @return array
   *   An array of replacements where keys are original link HTML and values
   *   are drupal-media HTML.
   *
   * @throws \Drupal\Core\Entity\EntityStorageException
   */
  private function processFileLinksWithMetadata(string $text, array &$allFileIds, array &$allMediaIds, array &$conversionDetails): array {
    $replacements = [];

    // Skip link processing if Linkit module is not enabled.
    if (!$this->moduleHandler->moduleExists('linkit')) {
      return $replacements;
    }

    $availableExtensions = $this->swapperService->getAvailableExtensions();
    if (empty($availableExtensions)) {
      return $replacements;
    }

    // Find all file links (including PDFs)
    $pattern = '/<a[^>]*href="([^"]*\.(' . implode('|', array_map('preg_quote', $availableExtensions)) . ')(?:\?[^"]*)?(?:#[^"]*)?[^"]*?)"[^>]*>(.*?)<\/a>/is';
    preg_match_all($pattern, $text, $matches, PREG_SET_ORDER);

    foreach ($matches as $match) {
      $linkHtml = $match[0];
      $href = $match[1];
      // Capture the text between <a></a> tags.
      $linkText = $match[3];
      // Full tag for attribute extraction.
      $attributes = $match[0];

      // Check for data-media-uuid attribute (takes priority)
      $mediaUuid = NULL;
      if (preg_match('/data-media-uuid="([^"]+)"/i', $attributes, $uuidMatch)) {
        $mediaUuid = $uuidMatch[1];
      }

      $media = NULL;
      $file = NULL;

      if ($mediaUuid) {
        // Process by UUID.
        $media = $this->swapperService->validateAndProcessFileUuid($mediaUuid);
        if ($media && method_exists($media, 'getSource') && $media->getSource()) {
          $sourceField = $media->getSource()
            ->getConfiguration()['source_field'] ?? NULL;
          if ($sourceField && $media->hasField($sourceField)) {
            $file = $media->get($sourceField)->entity;
          }
        }
      }
      else {
        // Determine if it's a local or remote URL.
        $isAbsoluteUrl = filter_var($href, FILTER_VALIDATE_URL) !== FALSE;

        if ($isAbsoluteUrl) {
          $currentDomain = $_SERVER['HTTP_HOST'] ?? '';
          $urlParts = parse_url($href);
          $urlHost = $urlParts['host'] ?? '';

          if ($currentDomain && $urlHost === $currentDomain) {
            // Same domain - process as local path.
            $media = $this->swapperService->validateAndProcessFilePath($href);
          }
          else {
            // Remote domain - process as remote file.
            $result = $this->swapperService->validateAndProcessRemoteFile($href);
            if (!is_string($result)) {
              $media = $result;
            }
          }
        }
        else {
          // Relative path - process as local path.
          $media = $this->swapperService->validateAndProcessFilePath($href);
        }

        // Get file from media if available.
        if ($media && method_exists($media, 'getSource') && $media->getSource()) {
          $sourceField = $media->getSource()
            ->getConfiguration()['source_field'] ?? NULL;
          if ($sourceField && $media->hasField($sourceField)) {
            $file = $media->get($sourceField)->entity;
          }
        }
      }

      if ($media && method_exists($media, 'uuid')) {
        // Create linkit-style <a> tag preserving the original link text.
        $mediaDom = sprintf(
          '<a href="/media/%s" data-entity-type="media" data-entity-uuid="%s" data-entity-substitution="media">%s</a>',
          $media->id(),
          $media->uuid(),
          $linkText
        );
        $replacements[$linkHtml] = $mediaDom;

        // Collect metadata.
        if ($file) {
          $allFileIds[] = $file->id();
        }
        $allMediaIds[] = $media->id();
        $conversionDetails[] = sprintf('Link: %s -> Media ID %s', $href, $media->id());
      }
    }

    return $replacements;
  }

  /**
   * Process entities with custom content detection & replacement.
   *
   * @param string $fieldSelector
   *   The field selector in the format 'entity_type.bundle.field_name'.
   * @param array $entities
   *   Array of entities to process.
   * @param callable $contentDetector
   *   Function that takes ($richText) and returns bool if content should be
   *   processed.
   * @param callable $replacementProcessor
   *   Function that takes ($richText) and returns array of replacements.
   *   Context string for logging (e.g., 'images', 'file_links').
   *
   * @return array
   *   Array of updated entities.
   *
   * @throws \Drupal\Core\Entity\EntityStorageException
   */
  public function processEntitiesWithReplacements(
    string $fieldSelector,
    array $entities,
    callable $contentDetector,
    callable $replacementProcessor,
  ): array {
    $output = [];
    $fieldSelectorParts = explode('.', $fieldSelector);
    if (count($fieldSelectorParts) !== 3) {
      throw new \InvalidArgumentException('Field selector must be in the format "entity_type.bundle.field_name".');
    }
    $field_name = array_pop($fieldSelectorParts);

    foreach ($entities as $entity) {
      if (!$entity->hasField($field_name)) {
        continue;
      }

      // Check if the entity is the default revision.
      if (!$this->isDefaultRevision($entity)) {
        $this->loggerChannel->info('Skipping non-current revision for @entity_type @id (revision @revision_id)', [
          '@entity_type' => $entity->getEntityTypeId(),
          '@id' => $entity->id(),
          '@revision_id' => $entity->getRevisionId(),
        ]);
        continue;
      }

      $richText = $entity->get($field_name)->value ?? '';

      // Use the provided content detector.
      if (!$contentDetector($richText)) {
        continue;
      }

      // Use the provided replacement processor.
      $replacements = $replacementProcessor($richText);

      if (!empty($replacements)) {
        $new_value = strtr($richText, $replacements);
        $entity->set($field_name, [
          'value' => $new_value,
          'format' => $entity->get($field_name)->format ?? 'full_html',
        ]);
        $entity->save();
        $this->updateRevision($entity);
        $output[$entity->id()] = $entity;
      }
    }

    return $output;
  }

  /**
   * Saves entity with text replacements and creates swap record.
   *
   * @param \Drupal\Core\Entity\FieldableEntityInterface $entity
   *   The entity to save.
   * @param string $fieldName
   *   The field name to update.
   * @param string $originalText
   *   The original text content.
   * @param array $replacements
   *   Array of string replacements to apply.
   * @param string $fieldSelector
   *   The field selector for the swap record.
   * @param string $category
   *   The batch category.
   * @param array $metadata
   *   Metadata for the swap record.
   *
   * @throws \Drupal\Core\Entity\EntityStorageException
   */
  private function saveEntityWithReplacements(
    FieldableEntityInterface $entity,
    string $fieldName,
    string $originalText,
    array $replacements,
    string $fieldSelector,
    string $category,
    array $metadata,
  ): void {
    $newValue = strtr($originalText, $replacements);
    $entity->set($fieldName, [
      'value' => $newValue,
      'format' => $entity->get($fieldName)->format ?? 'full_html',
    ]);
    $entity->save();
    $this->updateRevision($entity);
    $this->createSwapRecord($fieldSelector, $entity, $category, 'completed', $metadata);
  }

  /**
   * Creates a MediaSwapRecord for successful or failed conversions.
   *
   * @param string $fieldSelector
   *   The field selector.
   * @param \Drupal\Core\Entity\FieldableEntityInterface $entity
   *   The entity that was processed.
   * @param string $category
   *   The batch category.
   * @param string $status
   *   The processing status ('completed', 'failed', etc.).
   * @param array $metadata
   *   Additional metadata including:
   *   - source_file_id: ID of the source file entity
   *   - created_media_id: ID of the created media entity
   *   - conversion_details: Details about the conversion
   *   - error_message: Error message if failed.
   *
   * @throws \Drupal\Core\Entity\EntityStorageException
   */
  public function createSwapRecord(
    string $fieldSelector,
    FieldableEntityInterface $entity,
    string $category,
    string $status,
    array $metadata = [],
  ): void {
    try {
      $swapRecordStorage = $this->entityTypeManager->getStorage('media_swap_record');

      /** @var \Drupal\image_to_media_swapper\Entity\MediaSwapRecordInterface $swapRecord */
      $swapRecord = $swapRecordStorage->create([
        'field_selector' => $fieldSelector,
        'target_entity_type' => $entity->getEntityTypeId(),
        'target_bundle' => $entity->bundle(),
        'target_entity_id' => $entity->id(),
        'batch_category' => $category,
        'status' => $status,
        'processed_time' => $this->time->getRequestTime(),
      ]);

      // Set optional metadata fields.
      if (!empty($metadata['source_file_id'])) {
        $swapRecord->setSourceFileId($metadata['source_file_id']);
      }

      if (!empty($metadata['created_media_id'])) {
        $swapRecord->setCreatedMediaId($metadata['created_media_id']);
      }

      if (!empty($metadata['conversion_details'])) {
        $swapRecord->setConversionDetails($metadata['conversion_details']);
      }

      if (!empty($metadata['error_message'])) {
        $swapRecord->setErrorMessage($metadata['error_message']);
      }

      $swapRecord->save();
    }
    catch (\Exception $e) {
      // Log error but don't fail the batch.
      $this->loggerChannel->error('Failed to create swap record for entity @id: @error', [
        '@id' => $entity->id(),
        '@error' => $e->getMessage(),
      ]);
    }
  }

}

Главная | Обратная связь

drupal hosting | друпал хостинг | it patrol .inc