image_to_media_swapper-2.x-dev/src/BatchProcessorService.php
src/BatchProcessorService.php
<?php
declare(strict_types=1);
namespace Drupal\image_to_media_swapper;
use Drupal\Component\Datetime\TimeInterface;
use Drupal\Core\Entity\RevisionableInterface;
use Drupal\Core\Entity\FieldableEntityInterface;
use Drupal\Core\Entity\EntityDisplayRepositoryInterface;
use Drupal\Core\Entity\EntityFieldManagerInterface;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Extension\ModuleHandlerInterface;
use Drupal\Core\Field\FieldDefinitionInterface;
use Drupal\Core\Logger\LoggerChannelFactoryInterface;
use Drupal\Core\Logger\LoggerChannelInterface;
use Drupal\Core\Messenger\MessengerTrait;
use Drupal\Core\Session\AccountProxyInterface;
use Drupal\Core\StringTranslation\StringTranslationTrait;
use Drupal\file\FileInterface;
use Drupal\filter\FilterFormatInterface;
/**
* Helper functions to find and process eligible text fields.
*/
class BatchProcessorService {
use StringTranslationTrait;
use MessengerTrait;
/**
* The entity type manager service.
*
* @var \Drupal\Core\Logger\LoggerChannelInterface
*/
protected LoggerChannelInterface $loggerChannel;
/**
* Constructs an BatchProcessorService object.
*/
public function __construct(
private readonly EntityTypeManagerInterface $entityTypeManager,
private readonly EntityFieldManagerInterface $entityFieldManager,
LoggerChannelFactoryInterface $loggerChannelFactory,
private readonly SwapperService $swapperService,
private readonly EntityDisplayRepositoryInterface $entityDisplayRepository,
private readonly AccountProxyInterface $currentUser,
private readonly TimeInterface $time,
private readonly ModuleHandlerInterface $moduleHandler,
) {
$this->loggerChannel = $loggerChannelFactory->get('image_to_media_swapper');
}
/**
* Kick off the batch process using the BatchHandler.
*
* @param string $fieldSelector
* The field selector in the format 'entity_type.bundle.field_name'.
* @param string $category
* The category for the batch process, used for state tracking.
*/
public function startBatch(string $fieldSelector, string $category): void {
// Get entities filtered by category type.
$categoryFilter = $category === 'mixed' ? [] : [$category];
$entities = $this->getEntitiesWithFiles($fieldSelector, $categoryFilter);
if (empty($entities)) {
// Notify the user of no entities found.
$this->messenger()
->addStatus(
$this->t('No entities found with @category for @selector.',
[
'@category' => $category,
'@selector' => $fieldSelector,
]));
return;
}
$operations = [];
$chunks = array_chunk(array_keys($entities), 20);
foreach ($chunks as $chunk) {
$operations[] = [
['\Drupal\image_to_media_swapper\BatchHandler', 'processChunk'],
[$fieldSelector, $chunk, $category],
];
}
$batch = [
'title' => $this->t('Converting expected @category to media entities...', ['@category' => $category]),
'operations' => $operations,
'finished' => [
'\Drupal\image_to_media_swapper\BatchHandler',
'batchFinished',
],
'init_message' => $this->t('Initializing file conversion...'),
'progress_message' => $this->t('Processed @current out of @total.'),
'error_message' => $this->t('An error occurred during file-to-media conversion.'),
];
batch_set($batch);
}
/**
* Retrieve entities with <img> tags or file links in the specified field.
*
* @param string $fieldSelector
* The field selector in the format 'entity_type.bundle.field_name'.
* @param array $types
* Array of types to check for ('images', 'links'). If empty, checks both.
*
* @return array
* An array of entities with <img> tags or file links in the specified
* field. If $types contains multiple types, returns categorized array with
* 'images' and 'links' keys containing entity counts.
*/
public function getEntitiesWithFiles(string $fieldSelector, array $types = []): array {
[$entity_type_id, $bundle, $field_name] = explode('.', $fieldSelector);
$output = [];
$categorized = [];
// Determine what to check for.
$checkImages = empty($types) || in_array('images', $types);
$checkLinks = empty($types) || in_array('links', $types);
$returnCategorized = count($types) > 1 && !empty($types);
if ($returnCategorized) {
$categorized = ['images' => 0, 'links' => 0];
}
try {
$storage = $this->entityTypeManager->getStorage($entity_type_id);
$entityType = $this->entityTypeManager->getDefinition($entity_type_id);
$query = $storage->getQuery()->accessCheck(TRUE);
// Use the correct bundle key for the entity type.
if ($entityType->hasKey('bundle')) {
$bundle_key = $entityType->getKey('bundle');
$query->condition($bundle_key, $bundle);
}
// Only get default revisions to avoid processing previous revisions.
if ($entityType->isRevisionable()) {
$query->latestRevision();
}
// For paragraph entities, filter to exclude orphaned revisions.
if ($entity_type_id === 'paragraph') {
// Only include paragraphs that have a parent entity reference.
$query->exists('parent_id');
$query->exists('parent_type');
}
$entity_ids = $query->execute();
if (empty($entity_ids)) {
return $returnCategorized ? $categorized : [];
}
/** @var \Drupal\Core\Entity\FieldableEntityInterface $entity */
$entities = $storage->loadMultiple($entity_ids);
foreach ($entities as $entity) {
if ($entity->hasField($field_name)) {
$fieldValue = $entity->get($field_name)->value ?? '';
$hasImages = $checkImages && str_contains($fieldValue, '<img');
$hasLinks = $checkLinks && $this->containsFileLinks($fieldValue);
if ($returnCategorized) {
if ($hasImages) {
$categorized['images']++;
}
if ($hasLinks) {
$categorized['links']++;
}
}
else {
// Original behavior - add entity if it matches any requested type.
if ($hasImages || $hasLinks) {
$output[$entity->id()] = $entity;
}
}
}
}
}
catch (\Throwable $e) {
$this->loggerChannel->error('Error in getEntitiesWithFiles for @selector: @msg', [
'@selector' => $fieldSelector,
'@msg' => $e->getMessage(),
]);
}
return $returnCategorized ? $categorized : $output;
}
/**
* Processes content based on the specified category type.
*
* @param string $fieldSelector
* The field selector in the format 'entity_type.bundle.field_name'.
* @param \Drupal\Core\Entity\FieldableEntityInterface[] $entities
* (Optional) An array of already loaded entities to operate on.
* @param string $category
* The category type: 'images', 'links', or 'mixed'.
*
* @return \Drupal\Core\Entity\FieldableEntityInterface[]
* An array of updated entities.
*
* @throws \Drupal\Core\Entity\EntityStorageException
* If there is an error saving the entity.
*/
public function processContentByCategory(string $fieldSelector, array $entities = [], string $category = 'mixed'): array {
return match ($category) {
'links' => $this->processFileLinksToMedia($fieldSelector, $entities, $category),
default => $this->swapImagesToMedia($fieldSelector, $entities, $category),
};
}
/**
* Converts <img> tags to <drupal-media> elements on specified entities.
*
* @param string $fieldSelector
* The field selector in the format 'entity_type.bundle.field_name'.
* @param \Drupal\Core\Entity\FieldableEntityInterface[] $entities
* (Optional) An array of already loaded entities to operate on.
* @param string $category
* (Optional) The batch category for creating swap records.
*
* @return \Drupal\Core\Entity\FieldableEntityInterface[]
* An array of updated entities.
*
* @throws \Drupal\Core\Entity\EntityStorageException
* If there is an error saving the entity.
*/
public function swapImagesToMedia(string $fieldSelector, array $entities = [], string $category = 'mixed'): array {
$processedEntities = [];
$fieldSelectorParts = explode('.', $fieldSelector);
if (count($fieldSelectorParts) !== 3) {
throw new \InvalidArgumentException('Field selector must be in the format "entity_type.bundle.field_name".');
}
$field_name = array_pop($fieldSelectorParts);
foreach ($entities as $entity) {
try {
if (!$entity->hasField($field_name)) {
continue;
}
// Check if the entity is the default revision.
if (!$this->isDefaultRevision($entity)) {
$this->loggerChannel->info('Skipping non-current revision for @entity_type @id (revision @revision_id)', [
'@entity_type' => $entity->getEntityTypeId(),
'@id' => $entity->id(),
'@revision_id' => $entity->getRevisionId(),
]);
continue;
}
$richText = $entity->get($field_name)->value ?? '';
// Check if content needs processing based on category.
$needsProcessing = FALSE;
if ($category === 'images' && str_contains($richText, '<img')) {
$needsProcessing = TRUE;
}
elseif ($category === 'links' && $this->containsFileLinks($richText)) {
$needsProcessing = TRUE;
}
elseif ($category === 'mixed' && (str_contains($richText, '<img') || $this->containsFileLinks($richText))) {
$needsProcessing = TRUE;
}
if (!$needsProcessing) {
continue;
}
$replacements = [];
$conversionDetails = [];
$allFileIds = [];
$allMediaIds = [];
// Process <img> tags based on category.
if (($category === 'images' || $category === 'mixed') && str_contains($richText, '<img')) {
preg_match_all('/<img[^>]+src="([^"]+)"[^>]*>/i', $richText, $matches, PREG_SET_ORDER);
foreach ($matches as $match) {
$imageDom = $match[0];
$imageUrl = $match[1];
$imageUuid = preg_match('/data-entity-uuid="([^"]+)"/i', $imageDom, $uuidMatch) ? $uuidMatch[1] : '';
// Extract data-align attribute from the img tag.
$dataAlign = '';
if (preg_match('/data-align="([^"]+)"/i', $imageDom, $alignMatch)) {
$dataAlign = $alignMatch[1];
}
$file = NULL;
if ($imageUuid) {
$file = $this->swapperService->findFileFromUuid($imageUuid);
}
elseif ($imageUrl) {
$publicUri = $this->swapperService->convertWebPathToPublicUri($imageUrl);
if ($publicUri !== NULL) {
$file = $this->swapperService->findOrCreateFileEntityByUri($publicUri);
}
else {
$this->loggerChannel->warning('Failed to convert web path to public URI: @url', ['@url' => $imageUrl]);
}
}
if (!$file instanceof FileInterface) {
continue;
}
$media = $this->swapperService->findOrCreateMediaFromFileEntity($file);
if ($media) {
// Build drupal-media tag with preserved data-align attribute.
$mediaAttributes = [
'data-entity-type="media"',
'data-entity-uuid="' . $media->uuid() . '"',
'data-view-mode="default"',
];
if (!empty($dataAlign)) {
$mediaAttributes[] = 'data-align="' . $dataAlign . '"';
}
$mediaDom = sprintf(
'<drupal-media %s></drupal-media>',
implode(' ', $mediaAttributes)
);
$replacements[$imageDom] = $mediaDom;
// Collect metadata.
$allFileIds[] = $file->id();
$allMediaIds[] = $media->id();
$detailText = sprintf('Image: %s -> Media ID %s', $imageUrl, $media->id());
if (!empty($dataAlign)) {
$detailText .= sprintf(' (data-align: %s)', $dataAlign);
}
$conversionDetails[] = $detailText;
}
}
}
// Process file links (including PDFs) based on category.
if (($category === 'links' || $category === 'mixed') && $this->containsFileLinks($richText)) {
$fileReplacements = $this->processFileLinksWithMetadata($richText, $allFileIds, $allMediaIds, $conversionDetails);
$replacements = array_merge($replacements, $fileReplacements);
}
if (!empty($replacements)) {
$metadata = [
'source_file_id' => !empty($allFileIds) ? (int) $allFileIds[0] : NULL,
'created_media_id' => !empty($allMediaIds) ? (int) $allMediaIds[0] : NULL,
'conversion_details' => implode('; ', $conversionDetails),
];
$this->saveEntityWithReplacements($entity, $field_name, $richText, $replacements, $fieldSelector, $category, $metadata);
$processedEntities[$entity->id()] = $entity;
}
}
catch (\Exception $e) {
// Create failed record.
$this->createSwapRecord($fieldSelector, $entity, $category, 'failed', [
'error_message' => $e->getMessage(),
]);
}
}
return $processedEntities;
}
/**
* Processes file links to media entities on specified entities.
*
* @param string $fieldSelector
* The field selector in the format 'entity_type.bundle.field_name'.
* @param \Drupal\Core\Entity\FieldableEntityInterface[] $entities
* (Optional) An array of already loaded entities to operate on.
* @param string $category
* (Optional) The batch category for creating swap records.
*
* @return \Drupal\Core\Entity\FieldableEntityInterface[]
* An array of updated entities.
*
* @throws \Drupal\Core\Entity\EntityStorageException
* If there is an error saving the entity.
*/
public function processFileLinksToMedia(string $fieldSelector, array $entities = [], string $category = 'links'): array {
$processedEntities = [];
$fieldSelectorParts = explode('.', $fieldSelector);
if (count($fieldSelectorParts) !== 3) {
throw new \InvalidArgumentException('Field selector must be in the format "entity_type.bundle.field_name".');
}
$field_name = array_pop($fieldSelectorParts);
foreach ($entities as $entity) {
try {
if (!$entity->hasField($field_name)) {
continue;
}
// Check if the entity is the default revision.
if (!$this->isDefaultRevision($entity)) {
$this->loggerChannel->info('Skipping non-current revision for @entity_type @id (revision @revision_id)', [
'@entity_type' => $entity->getEntityTypeId(),
'@id' => $entity->id(),
'@revision_id' => $entity->getRevisionId(),
]);
continue;
}
$richText = $entity->get($field_name)->value ?? '';
// Check if content needs processing - only file links.
if (!$this->containsFileLinks($richText)) {
continue;
}
$replacements = [];
$conversionDetails = [];
$allFileIds = [];
$allMediaIds = [];
// Process file links only.
$fileReplacements = $this->processFileLinksWithMetadata($richText, $allFileIds, $allMediaIds, $conversionDetails);
$replacements = array_merge($replacements, $fileReplacements);
if (!empty($replacements)) {
$metadata = [
'source_file_id' => !empty($allFileIds) ? (int) $allFileIds[0] : NULL,
'created_media_id' => !empty($allMediaIds) ? (int) $allMediaIds[0] : NULL,
'conversion_details' => implode('; ', $conversionDetails),
];
$this->saveEntityWithReplacements($entity, $field_name, $richText, $replacements, $fieldSelector, $category, $metadata);
$processedEntities[$entity->id()] = $entity;
}
}
catch (\Exception $e) {
// Create failed record.
$this->createSwapRecord($fieldSelector, $entity, $category, 'failed', [
'error_message' => $e->getMessage(),
]);
}
}
return $processedEntities;
}
/**
* Find eligible long text fields using media-capable formats.
*
* @return string[]
* An array of options in the form 'entity_type.bundle.field_name' =>
* 'Label'.
*/
public function getEligibleTextFields(): array {
$options = [];
$formats = filter_formats();
$media_enabled_formats = array_filter($formats, function (FilterFormatInterface $format) {
/** @var \Drupal\editor\EditorInterface $editor */
$editor = $this->entityTypeManager->getStorage('editor')
->load($format->id());
return $editor && $editor->getEditor() === 'ckeditor5';
});
// Get all the filter formats that have media_embed or
// filter_html_image_secure filters enabled.
$media_enabled_formats = array_keys(array_filter($media_enabled_formats, function (FilterFormatInterface $format) {
$filters = array_keys($format->get('filters'));
return in_array('media_embed', $filters, TRUE) || in_array('filter_html_image_secure', $filters, TRUE);
}));
$types = ['text', 'text_long', 'text_with_summary'];
foreach ($types as $type) {
foreach ($this->entityFieldManager->getFieldMapByFieldType($type) as $entity_type => $fields) {
foreach ($fields as $field_name => $info) {
foreach ($info['bundles'] as $bundle) {
$definition = $this->entityFieldManager->getFieldDefinitions($entity_type, $bundle)[$field_name] ?? NULL;
if ($definition instanceof FieldDefinitionInterface) {
$field_formats = $this->getFieldAllowedFormats($entity_type, $bundle, $field_name, $definition);
// If no allowed formats are set, include all text fields.
if (empty($field_formats)) {
$key = "$entity_type.$bundle.$field_name";
$label = $this->t("@entity_type/@bundle/@field_name", [
'@entity_type' => $entity_type,
'@bundle' => $bundle,
'@field_name' => $field_name,
]);
// Check if the field has files.
$hasFiles = $this->getEntitiesWithFiles($key);
if ($hasFiles) {
$options[$key] = $label;
}
}
else {
// Compare the field_formats with media_enabled_formats.
$has_media_format = array_intersect($field_formats, $media_enabled_formats);
if (!empty($has_media_format)) {
$key = "$entity_type.$bundle.$field_name";
$label = $this->t("@entity_type/@bundle/@field_name", [
'@entity_type' => $entity_type,
'@bundle' => $bundle,
'@field_name' => $field_name,
]);
// Check if the field has files.
$hasFiles = $this->getEntitiesWithFiles($key);
if ($hasFiles) {
$options[$key] = $label;
}
}
}
}
}
}
}
}
return $options;
}
/**
* Gets allowed formats for a field from both field definition & form display.
*
* @param string $entity_type
* The entity type.
* @param string $bundle
* The bundle.
* @param string $field_name
* The field name.
* @param \Drupal\Core\Field\FieldDefinitionInterface $definition
* The field definition.
*
* @return array
* Array of allowed format IDs.
*/
private function getFieldAllowedFormats(string $entity_type, string $bundle, string $field_name, FieldDefinitionInterface $definition): array {
// Try field definition first (Drupal 10.1+ core method)
$field_formats = $definition->getSetting('allowed_formats') ?? [];
// If empty, check form display (allowed_formats module).
if (empty($field_formats)) {
try {
$formDisplay = $this->entityDisplayRepository->getFormDisplay($entity_type, $bundle);
$component = $formDisplay->getComponent($field_name);
if (isset($component['third_party_settings']['allowed_formats']['allowed_formats'])) {
$field_formats = array_keys(array_filter($component['third_party_settings']['allowed_formats']['allowed_formats']));
}
}
catch (\Exception $e) {
// Silently continue if form display can't be loaded.
}
}
return $field_formats;
}
/**
* Updates parent revisions for the given entity.
*
* @param \Drupal\Core\Entity\FieldableEntityInterface $entity
* The entity to update parent revisions for.
*
* @throws \Drupal\Core\Entity\EntityStorageException
*/
private function updateRevision(FieldableEntityInterface $entity): void {
// If the entity is revisionable, we need to create a new revision.
if (method_exists($entity, 'getParentEntity')) {
$parent = $entity->getParentEntity();
$this->updateRevision($parent);
}
if ($entity instanceof RevisionableInterface) {
$entity->setNewRevision();
if (method_exists($entity, 'setRevisionLogMessage')) {
$entity->setRevisionLogMessage($this->t('Updated media references in @entity_type @bundle.', [
'@entity_type' => $entity->getEntityTypeId(),
'@bundle' => $entity->bundle(),
]));
}
if (method_exists($entity, 'setRevisionUserId')) {
// Set the revision user ID to the current user.
$entity->setRevisionUserId($this->currentUser->id());
}
if (method_exists($entity, 'setRevisionCreationTime')) {
$entity->setRevisionCreationTime($this->time->getRequestTime());
}
$entity->save();
}
}
/**
* Checks if the entity is the default revision.
*
* @param \Drupal\Core\Entity\FieldableEntityInterface $entity
* The entity to check.
*
* @return bool
* TRUE if the entity is the default revision, FALSE otherwise.
*/
private function isDefaultRevision(FieldableEntityInterface $entity): bool {
// For paragraphs and other entities with parent relationships.
if (method_exists($entity, 'getParentEntity')) {
$parent = $entity->getParentEntity();
// If no parent, this entity is orphaned and should be skipped.
if (!$parent) {
return FALSE;
}
// For paragraphs, check if they're in the parent's current revision.
if ($entity->getEntityTypeId() === 'paragraph' && $entity->hasField('parent_field_name')) {
$parent_field_name = $entity->get('parent_field_name')->value;
// Load the parent's default revision to check current field values.
if ($parent instanceof RevisionableInterface && !$parent->isDefaultRevision()) {
$parent_storage = $this->entityTypeManager->getStorage($parent->getEntityTypeId());
$parent = $parent_storage->load($parent->id());
}
if ($parent && $parent->hasField($parent_field_name)) {
$field = $parent->get($parent_field_name);
// Check if this paragraph is in the parent's current field values.
foreach ($field as $value) {
if ($value->entity &&
$value->entity->getRevisionId() == $entity->getRevisionId() &&
$value->entity->id() == $entity->id()) {
// Recursively check if parent is also in its default revision.
return $this->isDefaultRevision($parent);
}
}
// Paragraph not found in parent's current revision field values.
return FALSE;
}
}
// For other entities with parents, recursively check parent.
if ($parent instanceof FieldableEntityInterface) {
return $this->isDefaultRevision($parent);
}
}
// For regular entities, check if they're the default revision.
if ($entity instanceof RevisionableInterface) {
return $entity->isDefaultRevision();
}
// Non-revisionable entities are always considered current.
return TRUE;
}
/**
* Checks if the given text contains <a> tags with file hrefs.
*
* @param string $text
* The text to check for file links.
*
* @return bool
* TRUE if the text contains file links, FALSE otherwise.
*/
public function containsFileLinks(string $text): bool {
$availableExtensions = $this->swapperService->getAvailableExtensions();
if (empty($availableExtensions)) {
return FALSE;
}
$pattern = '/<a[^>]*href="[^"]*\.(' . implode('|', array_map('preg_quote', $availableExtensions)) . ')(?:\?[^"]*)?(?:#[^"]*)?[^"]*"[^>]*>/i';
return preg_match($pattern, $text) === 1;
}
/**
* Processes file links in the given text and returns replacements.
*
* @param string $text
* The text to process for file links.
*
* @return array
* An array of replacements where keys are original link HTML and values
* are drupal-media HTML.
*
* @throws \Drupal\Core\Entity\EntityStorageException
*/
private function processFileLinks(string $text): array {
$replacements = [];
// Skip link processing if Linkit module is not enabled.
if (!$this->moduleHandler->moduleExists('linkit')) {
return $replacements;
}
$availableExtensions = $this->swapperService->getAvailableExtensions();
if (empty($availableExtensions)) {
return $replacements;
}
// Find all file links (including PDFs)
$pattern = '/<a[^>]*href="([^"]*\.(' . implode('|', array_map('preg_quote', $availableExtensions)) . ')(?:\?[^"]*)?(?:#[^"]*)?[^"]*?)"[^>]*>(.*?)<\/a>/is';
preg_match_all($pattern, $text, $matches, PREG_SET_ORDER);
foreach ($matches as $match) {
$linkHtml = $match[0];
$href = $match[1];
// Capture the text between <a></a> tags.
$linkText = $match[3];
// Full tag for attribute extraction.
$attributes = $match[0];
// Check for data-media-uuid attribute (takes priority)
$mediaUuid = NULL;
if (preg_match('/data-media-uuid="([^"]+)"/i', $attributes, $uuidMatch)) {
$mediaUuid = $uuidMatch[1];
}
$media = NULL;
if ($mediaUuid) {
// Process by UUID.
$media = $this->swapperService->validateAndProcessFileUuid($mediaUuid);
}
else {
// Determine if it's a local or remote URL.
$isAbsoluteUrl = filter_var($href, FILTER_VALIDATE_URL) !== FALSE;
if ($isAbsoluteUrl) {
$currentDomain = $_SERVER['HTTP_HOST'] ?? '';
$urlParts = parse_url($href);
$urlHost = $urlParts['host'] ?? '';
if ($currentDomain && $urlHost === $currentDomain) {
// Same domain - process as local path.
$media = $this->swapperService->validateAndProcessFilePath($href);
}
else {
// Remote domain - process as remote file.
$result = $this->swapperService->validateAndProcessRemoteFile($href);
if (!is_string($result)) {
$media = $result;
}
}
}
else {
// Relative path - process as local path.
$media = $this->swapperService->validateAndProcessFilePath($href);
}
}
if ($media && method_exists($media, 'uuid')) {
// Create linkit-style <a> tag preserving the original link text.
$mediaDom = sprintf(
'<a href="/media/%s" data-entity-type="media" data-entity-uuid="%s" data-entity-substitution="media">%s</a>',
$media->id(),
$media->uuid(),
$linkText
);
$replacements[$linkHtml] = $mediaDom;
}
}
return $replacements;
}
/**
* Processes file links with metadata collection.
*
* @param string $text
* The text to process for file links.
* @param array &$allFileIds
* Reference to array collecting file IDs.
* @param array &$allMediaIds
* Reference to array collecting media IDs.
* @param array &$conversionDetails
* Reference to array collecting conversion details.
*
* @return array
* An array of replacements where keys are original link HTML and values
* are drupal-media HTML.
*
* @throws \Drupal\Core\Entity\EntityStorageException
*/
private function processFileLinksWithMetadata(string $text, array &$allFileIds, array &$allMediaIds, array &$conversionDetails): array {
$replacements = [];
// Skip link processing if Linkit module is not enabled.
if (!$this->moduleHandler->moduleExists('linkit')) {
return $replacements;
}
$availableExtensions = $this->swapperService->getAvailableExtensions();
if (empty($availableExtensions)) {
return $replacements;
}
// Find all file links (including PDFs)
$pattern = '/<a[^>]*href="([^"]*\.(' . implode('|', array_map('preg_quote', $availableExtensions)) . ')(?:\?[^"]*)?(?:#[^"]*)?[^"]*?)"[^>]*>(.*?)<\/a>/is';
preg_match_all($pattern, $text, $matches, PREG_SET_ORDER);
foreach ($matches as $match) {
$linkHtml = $match[0];
$href = $match[1];
// Capture the text between <a></a> tags.
$linkText = $match[3];
// Full tag for attribute extraction.
$attributes = $match[0];
// Check for data-media-uuid attribute (takes priority)
$mediaUuid = NULL;
if (preg_match('/data-media-uuid="([^"]+)"/i', $attributes, $uuidMatch)) {
$mediaUuid = $uuidMatch[1];
}
$media = NULL;
$file = NULL;
if ($mediaUuid) {
// Process by UUID.
$media = $this->swapperService->validateAndProcessFileUuid($mediaUuid);
if ($media && method_exists($media, 'getSource') && $media->getSource()) {
$sourceField = $media->getSource()
->getConfiguration()['source_field'] ?? NULL;
if ($sourceField && $media->hasField($sourceField)) {
$file = $media->get($sourceField)->entity;
}
}
}
else {
// Determine if it's a local or remote URL.
$isAbsoluteUrl = filter_var($href, FILTER_VALIDATE_URL) !== FALSE;
if ($isAbsoluteUrl) {
$currentDomain = $_SERVER['HTTP_HOST'] ?? '';
$urlParts = parse_url($href);
$urlHost = $urlParts['host'] ?? '';
if ($currentDomain && $urlHost === $currentDomain) {
// Same domain - process as local path.
$media = $this->swapperService->validateAndProcessFilePath($href);
}
else {
// Remote domain - process as remote file.
$result = $this->swapperService->validateAndProcessRemoteFile($href);
if (!is_string($result)) {
$media = $result;
}
}
}
else {
// Relative path - process as local path.
$media = $this->swapperService->validateAndProcessFilePath($href);
}
// Get file from media if available.
if ($media && method_exists($media, 'getSource') && $media->getSource()) {
$sourceField = $media->getSource()
->getConfiguration()['source_field'] ?? NULL;
if ($sourceField && $media->hasField($sourceField)) {
$file = $media->get($sourceField)->entity;
}
}
}
if ($media && method_exists($media, 'uuid')) {
// Create linkit-style <a> tag preserving the original link text.
$mediaDom = sprintf(
'<a href="/media/%s" data-entity-type="media" data-entity-uuid="%s" data-entity-substitution="media">%s</a>',
$media->id(),
$media->uuid(),
$linkText
);
$replacements[$linkHtml] = $mediaDom;
// Collect metadata.
if ($file) {
$allFileIds[] = $file->id();
}
$allMediaIds[] = $media->id();
$conversionDetails[] = sprintf('Link: %s -> Media ID %s', $href, $media->id());
}
}
return $replacements;
}
/**
* Process entities with custom content detection & replacement.
*
* @param string $fieldSelector
* The field selector in the format 'entity_type.bundle.field_name'.
* @param array $entities
* Array of entities to process.
* @param callable $contentDetector
* Function that takes ($richText) and returns bool if content should be
* processed.
* @param callable $replacementProcessor
* Function that takes ($richText) and returns array of replacements.
* Context string for logging (e.g., 'images', 'file_links').
*
* @return array
* Array of updated entities.
*
* @throws \Drupal\Core\Entity\EntityStorageException
*/
public function processEntitiesWithReplacements(
string $fieldSelector,
array $entities,
callable $contentDetector,
callable $replacementProcessor,
): array {
$output = [];
$fieldSelectorParts = explode('.', $fieldSelector);
if (count($fieldSelectorParts) !== 3) {
throw new \InvalidArgumentException('Field selector must be in the format "entity_type.bundle.field_name".');
}
$field_name = array_pop($fieldSelectorParts);
foreach ($entities as $entity) {
if (!$entity->hasField($field_name)) {
continue;
}
// Check if the entity is the default revision.
if (!$this->isDefaultRevision($entity)) {
$this->loggerChannel->info('Skipping non-current revision for @entity_type @id (revision @revision_id)', [
'@entity_type' => $entity->getEntityTypeId(),
'@id' => $entity->id(),
'@revision_id' => $entity->getRevisionId(),
]);
continue;
}
$richText = $entity->get($field_name)->value ?? '';
// Use the provided content detector.
if (!$contentDetector($richText)) {
continue;
}
// Use the provided replacement processor.
$replacements = $replacementProcessor($richText);
if (!empty($replacements)) {
$new_value = strtr($richText, $replacements);
$entity->set($field_name, [
'value' => $new_value,
'format' => $entity->get($field_name)->format ?? 'full_html',
]);
$entity->save();
$this->updateRevision($entity);
$output[$entity->id()] = $entity;
}
}
return $output;
}
/**
* Saves entity with text replacements and creates swap record.
*
* @param \Drupal\Core\Entity\FieldableEntityInterface $entity
* The entity to save.
* @param string $fieldName
* The field name to update.
* @param string $originalText
* The original text content.
* @param array $replacements
* Array of string replacements to apply.
* @param string $fieldSelector
* The field selector for the swap record.
* @param string $category
* The batch category.
* @param array $metadata
* Metadata for the swap record.
*
* @throws \Drupal\Core\Entity\EntityStorageException
*/
private function saveEntityWithReplacements(
FieldableEntityInterface $entity,
string $fieldName,
string $originalText,
array $replacements,
string $fieldSelector,
string $category,
array $metadata,
): void {
$newValue = strtr($originalText, $replacements);
$entity->set($fieldName, [
'value' => $newValue,
'format' => $entity->get($fieldName)->format ?? 'full_html',
]);
$entity->save();
$this->updateRevision($entity);
$this->createSwapRecord($fieldSelector, $entity, $category, 'completed', $metadata);
}
/**
* Creates a MediaSwapRecord for successful or failed conversions.
*
* @param string $fieldSelector
* The field selector.
* @param \Drupal\Core\Entity\FieldableEntityInterface $entity
* The entity that was processed.
* @param string $category
* The batch category.
* @param string $status
* The processing status ('completed', 'failed', etc.).
* @param array $metadata
* Additional metadata including:
* - source_file_id: ID of the source file entity
* - created_media_id: ID of the created media entity
* - conversion_details: Details about the conversion
* - error_message: Error message if failed.
*
* @throws \Drupal\Core\Entity\EntityStorageException
*/
public function createSwapRecord(
string $fieldSelector,
FieldableEntityInterface $entity,
string $category,
string $status,
array $metadata = [],
): void {
try {
$swapRecordStorage = $this->entityTypeManager->getStorage('media_swap_record');
/** @var \Drupal\image_to_media_swapper\Entity\MediaSwapRecordInterface $swapRecord */
$swapRecord = $swapRecordStorage->create([
'field_selector' => $fieldSelector,
'target_entity_type' => $entity->getEntityTypeId(),
'target_bundle' => $entity->bundle(),
'target_entity_id' => $entity->id(),
'batch_category' => $category,
'status' => $status,
'processed_time' => $this->time->getRequestTime(),
]);
// Set optional metadata fields.
if (!empty($metadata['source_file_id'])) {
$swapRecord->setSourceFileId($metadata['source_file_id']);
}
if (!empty($metadata['created_media_id'])) {
$swapRecord->setCreatedMediaId($metadata['created_media_id']);
}
if (!empty($metadata['conversion_details'])) {
$swapRecord->setConversionDetails($metadata['conversion_details']);
}
if (!empty($metadata['error_message'])) {
$swapRecord->setErrorMessage($metadata['error_message']);
}
$swapRecord->save();
}
catch (\Exception $e) {
// Log error but don't fail the batch.
$this->loggerChannel->error('Failed to create swap record for entity @id: @error', [
'@id' => $entity->id(),
'@error' => $e->getMessage(),
]);
}
}
}
