cms_content_sync-3.0.x-dev/src/Plugin/cms_content_sync/field_handler/DefaultFormattedTextHandler.php
src/Plugin/cms_content_sync/field_handler/DefaultFormattedTextHandler.php
<?php
namespace Drupal\cms_content_sync\Plugin\cms_content_sync\field_handler;
use Drupal\gutenberg\Parser\BlockParser;
use Drupal\cms_content_sync\Plugin\FieldHandlerBase;
use Drupal\cms_content_sync\PullIntent;
use Drupal\cms_content_sync\PushIntent;
use Drupal\cms_content_sync\SyncIntent;
use Drupal\Core\Entity\TranslatableInterface;
use Drupal\Core\Field\FieldDefinitionInterface;
use Drupal\Core\StreamWrapper\PublicStream;
use Drupal\prosemirror\Transformation\TransformationHelper;
use EdgeBox\SyncCore\Interfaces\Configuration\IDefineEntityType;
use EdgeBox\SyncCore\Interfaces\Configuration\IDefineObjectProperty;
use Psr\Log\LoggerInterface;
/**
* Providing an implementation for formatted text. This handler pushes embedded
* entities along like files or media entities in HTML.
*
* @FieldHandler(
* id = "cms_content_sync_default_formatted_text_handler",
* label = @Translation("Default Formatted Text"),
* weight = 90
* )
*/
class DefaultFormattedTextHandler extends FieldHandlerBase {
/**
* {@inheritdoc}
*/
public static function supports($entity_type, $bundle, $field_name, FieldDefinitionInterface $field) {
$allowed = ['text_with_summary', 'text_long'];
return FALSE !== in_array($field->getType(), $allowed);
}
/**
* {@inheritdoc}
*/
public function pull(PullIntent $intent) {
$action = $intent->getAction();
/**
* @var \Drupal\Core\Entity\FieldableEntityInterface $entity
*/
$entity = $intent->getEntity();
// Deletion doesn't require any action on field basis for static data.
if (SyncIntent::ACTION_DELETE == $action) {
return FALSE;
}
if ($intent->shouldMergeChanges()) {
return FALSE;
}
$data = $intent->getProperty($this->fieldName);
if (empty($data)) {
$entity->set($this->fieldName, NULL);
}
else {
$result = [];
foreach ($data as $item) {
if (!empty($item['value'])) {
// Replace node links correctly based on format.
$format_id = $item['format'] ?? NULL;
// Check if this uses ProseMirror format.
if ($format_id && $this->isProseMirrorFormat($format_id)) {
// For ProseMirror, just extract dependencies and add them to pull intent.
$this->addProseMirrorDependenciesToPull($intent, $item['value'], $item['embedded'] ?? NULL);
}
else {
$item['value'] = $this->replaceEntityReferenceLinks($intent, $item['value'], $item['embedded'] ?? NULL);
}
}
$result[] = $item;
}
$entity->set($this->fieldName, $result);
}
return TRUE;
}
/**
* Parse a text that uses regular HTML to look for dependencies like media or files.
*
* @param \Drupal\cms_content_sync\PushIntent $intent
* The PushIntent to add the dependencies to.
* @param string $text
* The HTML to search.
* @param \Psr\Log\LoggerInterface $logger
* An optional logger to use for logging any errors (e.g. reference can't be resolved)
*
* @return array
*/
public static function extractDependenciesFromHtml(PushIntent $intent, string $text, ?LoggerInterface $logger) {
$result = [];
$base_path = PublicStream::basePath();
$add_file_uri_dependency = function ($matches) use ($intent, &$result, $base_path, $logger) {
$tag = $matches[1];
$path = $matches[2];
// PDF files can have a #page=... anchor attached that we want to keep.
$path = preg_replace('@#.*$@', '', $path);
// Other files may have a query argument e.g. to avoid caching.
$path = preg_replace('@\\?.*$@', '', $path);
$uri = 'public://' . urldecode($path);
/** @var \Drupal\file\FileInterface[] $files */
$files = \Drupal::entityTypeManager()
->getStorage('file')
->loadByProperties(['uri' => $uri]);
if (!count($files)) {
if ($logger) {
$logger->error(
'Failed to push referenced file by URI in the formatted text as the file is missing on this site: @uri<br>Flow: @flow_id | Pool: @pool_id',
[
'@uri' => $uri,
'@flow_id' => $intent->getFlow()->id(),
'@pool_id' => implode(',', $intent->getPoolIds()),
]
);
}
return '';
}
$file = reset($files);
$result[] = $intent->addDependency($file, [
'tag' => $tag,
'internal_url' => $uri,
'external_url' => '/' . $base_path . '/' . $matches[2],
]);
return '';
};
$add_entity_reference_dependency = function ($matches) use ($intent, &$result) {
$tag = $matches[1];
$type = $matches[2];
$id = $matches[3];
$entity = \Drupal::entityTypeManager()
->getStorage($type)
->load($id);
if (!$entity) {
return $matches[0];
}
$view_url = $entity->hasLinkTemplate('canonical') ? $entity->toUrl('canonical', [
'absolute' => TRUE,
'language' => $entity->language(),
// Workaround for PathProcessorAlias::processOutbound to explicitly ignore us
// as we always want the pure, unaliased e.g. /node/:id path because
// we don't use the URL for end-users but for editors and it has to
// be reliable (aliases can be removed or change).
'alias' => TRUE,
])->toString() : NULL;
$result[] = $intent->addDependency($entity, [
'tag' => $tag,
'internal_url' => 'entity:' . $type . '/' . $id,
'external_url' => '/' . $type . '/' . $entity->uuid(),
], view_url:$view_url);
return str_replace('"/' . $type . '/' . $id . '"', '"/' . $type . '/' . $entity->uuid() . '"', $matches[0]);
};
// Simple image embedding (default ckeditor + IMCE images)
preg_replace_callback(
'@<(img)\s[^>]*src="/' . $base_path . '/([^"]+)"@',
$add_file_uri_dependency,
$text
);
// Other file embedding (IMCE files)
preg_replace_callback(
'@<(a)\s[^>]*href="/' . $base_path . '/([^"]+)"@',
$add_file_uri_dependency,
$text
);
// Node references.
preg_replace_callback(
'@<(a)\s[^>]*href="/(node)/([0-9]+)"@',
$add_entity_reference_dependency,
$text
);
// Entity embedding (especially media)
preg_replace_callback(
'@<drupal-(entity|media)[^>]+data-entity-type="([^"]+)"\s+data-entity-uuid="([^"]+)"@',
function ($matches) use ($intent, &$result, $logger) {
$tag = $matches[1];
$type = $matches[2];
$uuid = $matches[3];
$entity = \Drupal::service('entity.repository')->loadEntityByUuid($type, $uuid);
if (!$entity) {
if ($logger) {
$logger->error(
'Failed to push referenced entity by UUID in the formatted text as the file is missing on this site: @type @uuid<br>Flow: @flow_id | Pool: @pool_id',
[
'@type' => $type,
'@uuid' => $uuid,
'@flow_id' => $intent->getFlow()->id(),
'@pool_id' => implode(',', $intent->getPoolIds()),
]
);
}
return '';
}
$result[] = $intent->addDependency($entity, [
'tag' => 'drupal-' . $tag,
'entity_type' => $type,
'entity_uuid' => $uuid,
]);
return '';
},
$text
);
return $result;
}
/**
* Parse a text that uses the 'markdown' filter in it's format to look for
* dependencies like media or files.
*
* @param \Drupal\cms_content_sync\PushIntent $intent
* The PushIntent to add the dependencies to.
* @param string $text
* The markdown text as it's saved in the field.
*
* @return array
*/
protected function extractDependenciesFromMarkdown(PushIntent $intent, string $text) {
$result = [];
$base_path = PublicStream::basePath();
$add_file_uri_dependency = function ($matches) use ($intent, &$result, $base_path) {
$path = $matches[1];
// PDF files can have a #page=... anchor attached that we want to keep.
$path = preg_replace('@#.*$@', '', $path);
// Other files may have a query argument e.g. to avoid caching.
$path = preg_replace('@\\?.*$@', '', $path);
$uri = 'public://' . urldecode($path);
/** @var \Drupal\file\FileInterface[] $files */
$files = \Drupal::entityTypeManager()
->getStorage('file')
->loadByProperties(['uri' => $uri]);
if (!count($files)) {
$this->logger->error(
'Failed to push referenced file by URI in the formatted text as the file is missing on this site: @uri<br>Flow: @flow_id | Pool: @pool_id',
[
'@uri' => $uri,
'@flow_id' => $intent->getFlow()->id(),
'@pool_id' => implode(',', $intent->getPoolIds()),
]
);
return '';
}
$file = reset($files);
$result[] = $intent->addDependency($file, [
'tag' => 'image',
'internal_url' => $uri,
'external_url' => '/' . $base_path . '/' . $matches[1],
]);
return '';
};
// Simple image embedding (default ckeditor + IMCE images)
preg_replace_callback(
'@\!\[[^\]]*\]\(/' . $base_path . '/([^)]+)\)@',
$add_file_uri_dependency,
$text
);
return $result;
}
/**
* Parse a text that uses the 'gutenberg' filter in it's format to look for
* dependencies like media or files.
*
* @param \Drupal\cms_content_sync\PushIntent $intent
* The PushIntent to add the dependencies to.
* @param string $text
* The plain text as it's saved in the field.
*
* @return array
*/
protected function extractDependenciesFromGutenberg(PushIntent $intent, string $text) {
$result = [];
$block_parser = new BlockParser();
$blocks = $block_parser->parse($text);
foreach ($blocks as $block) {
$result = array_merge($result, $this->extractDependenciesFromGutenbergBlock($intent, $block));
}
return $result;
}
/**
* Extract the dependencies from the given block. The block is provided by the
* Gutenberg module's BlockParser. Can be a string or an array with the block
* definition.
*
* @param \Drupal\cms_content_sync\PushIntent $intent
* The PushIntent to add the dependencies to.
* @param array|string $block
* The block to search for references.
*
* @return array
*/
protected function extractDependenciesFromGutenbergBlock(PushIntent $intent, mixed $block) {
if (is_string($block)) {
return self::extractDependenciesFromHtml($intent, $block, $this->logger);
}
elseif (is_array($block)) {
$result = [];
if (!empty($block["attrs"]["mediaAttrs"])) {
$media = $block["attrs"]["mediaAttrs"];
// core/image, core/file.
if (!empty($media["data-entity-type"]) && !empty($media["data-entity-uuid"])) {
$type = $media["data-entity-type"];
$uuid = $media["data-entity-uuid"];
$entity = \Drupal::service('entity.repository')->loadEntityByUuid($type, $uuid);
if ($entity) {
$result[] = $intent->addDependency($entity, [
'tag' => 'core/file',
'attribute' => "mediaAttrs",
'entity-type' => $type,
'entity-uuid' => $uuid,
]);
}
}
// core/gallery.
elseif (is_array($media)) {
foreach ($media as $item) {
if (!empty($item["data-entity-type"]) && !empty($item["data-entity-uuid"])) {
$type = $item["data-entity-type"];
$uuid = $item["data-entity-uuid"];
$entity = \Drupal::service('entity.repository')->loadEntityByUuid($type, $uuid);
if ($entity) {
$result[] = $intent->addDependency($entity, [
'tag' => 'core/gallery',
'attribute' => "mediaAttrs",
'entity-type' => $type,
'entity-uuid' => $uuid,
]);
}
}
}
}
}
// drupalmedia/drupal-media-entity.
if (!empty($block["attrs"]["mediaEntityIds"])) {
foreach ($block["attrs"]["mediaEntityIds"] as $media_id) {
$entity = \Drupal::service('entity.repository')->getActive('media', $media_id);
if ($entity) {
$result[] = $intent->addDependency($entity, [
'tag' => 'drupalmedia/drupal-media-entity',
'attribute' => "mediaEntityIds",
'entity-id' => $media_id,
]);
}
}
}
if (!empty($block['innerHTML'])) {
$result = array_merge($result, self::extractDependenciesFromHtml($intent, $block["innerHTML"], $this->logger));
}
if (!empty($block["innerBlocks"])) {
foreach ($block["innerBlocks"] as $inner_block) {
$result = array_merge($result, $this->extractDependenciesFromGutenbergBlock($intent, $inner_block));
}
}
// Not handling drupalblock/* atm. Maybe useful if customers use custom
// block content they want to sync with the module as dependencies as well.
return $result;
}
return [];
}
/**
* Parse a text that uses the 'prosemirror' filter in its format to look for
* dependencies like media or files.
*
* @param \Drupal\cms_content_sync\PushIntent $intent
* The PushIntent to add the dependencies to.
* @param string $text
* The JSON text as it's saved in the field.
*
* @return array
*/
protected function extractDependenciesFromProseMirror(PushIntent $intent, string $text) {
$result = [];
/**
* @var \Drupal\prosemirror\Transformation\TransformationHelper $transformation_helper
*/
$transformation_helper = \Drupal::service('prosemirror.transformation_helper');
// Check if TransformationHelper is available.
if (!$transformation_helper) {
$this->logger->warning(
'ProseMirror dependencies could not be loaded: @message<br>Flow: @flow_id | Pool: @pool_id',
[
'@message' => "prosemirror.transformation_helper service not available",
'@flow_id' => $intent->getFlow()->id(),
'@pool_id' => implode(',', $intent->getPoolIds()),
]
);
return $result;
}
// Try to decode the JSON content.
$content = json_decode($text, TRUE);
if (json_last_error() !== JSON_ERROR_NONE) {
// Not valid JSON, skip parsing.
return $result;
}
try {
// Use TransformationHelper to validate and get entity references.
$element_instance = $transformation_helper->validateAndSanitize($content);
$references = $element_instance->getReferences();
// Transform the references into the expected format for addDependency.
foreach ($references as $reference) {
$entity_type = $reference->getEntityType();
$entity_uuid = $reference->getEntityUuid();
// Load the entity.
$entity = \Drupal::service('entity.repository')->loadEntityByUuid($entity_type, $entity_uuid);
if ($entity) {
$result[] = $intent->addDependency($entity, [
'tag' => 'prosemirror',
]);
}
}
}
catch (\Exception $e) {
// Log the error but don't break the sync.
if (isset($this->logger)) {
$this->logger->warning(
'Failed to parse ProseMirror content for dependencies: @message<br>Flow: @flow_id | Pool: @pool_id',
[
'@message' => $e->getMessage(),
'@flow_id' => $intent->getFlow()->id(),
'@pool_id' => implode(',', $intent->getPoolIds()),
]
);
}
}
return $result;
}
/**
* Go through the text and check for referenced entities like media and files
* to add them as dependencies to the sync so they are updated along with the
* parent entity that contains this field.
*
* @param \Drupal\cms_content_sync\PushIntent $intent
* @param string $text
* The field's ['value'].
* @param string|null $format_id
* The field's ['format'].
*
* @return array|null
*/
protected function extractDependencies(PushIntent $intent, string $text, ?string $format_id) {
if (!$format_id) {
return self::extractDependenciesFromHtml($intent, $text, $this->logger);
}
$formats = filter_formats();
if (empty($formats[$format_id])) {
return NULL;
}
$format = $formats[$format_id];
$filters = $format->filters()->getAll();
if (!empty($filters['gutenberg']->status)) {
return $this->extractDependenciesFromGutenberg($intent, $text);
}
elseif (!empty($filters['markdown']->status)) {
return $this->extractDependenciesFromMarkdown($intent, $text);
}
elseif (!empty($filters['prosemirror_filter']->status)) {
return $this->extractDependenciesFromProseMirror($intent, $text);
}
else {
return self::extractDependenciesFromHtml($intent, $text, $this->logger);
}
}
/**
* {@inheritdoc}
*/
public function push(PushIntent $intent) {
if (!parent::push($intent)) {
return FALSE;
}
$entity = $intent->getEntity();
$value = $entity->get($this->fieldName)->getValue();
foreach ($value as $index => &$item) {
if (empty($item['value'])) {
continue;
}
$item['embedded'] = $this->extractDependencies($intent, $item['value'], $item['format'] ?? NULL);
$processed = $entity->get($this->fieldName)->get($index)->processed;
if (!empty($processed)) {
$item['processed'] = $processed . "";
}
}
$intent->setProperty($this->fieldName, $value);
return TRUE;
}
/**
* Replace all "/node/..." links with their correct ID for the current site.
*
* @todo If a new entity is added, we should scan the database for existing
* references to it that can now be resolved.
*
* @param \Drupal\cms_content_sync\PullIntent $intent
* @param string $text
* @param array|null $references
*
* @return string
*/
protected function replaceEntityReferenceLinks($intent, $text, $references) {
$entity_repository = \Drupal::service('entity.repository');
/**
* @var \Drupal\Core\Entity\EntityInterface $entity
*/
$entity = $intent->getEntity();
$status = $intent->getEntityStatus();
$language = $entity instanceof TranslatableInterface ? $entity->language()->getId() : 'und';
$status->resetMissingReferences($language, $this->fieldName);
$replace_uri_callback = function ($matches) use ($intent) {
$path = $matches[2];
// PDF files can have a #page=... anchor attached that we want to keep.
$anchor = NULL;
if (FALSE !== strpos($path, '#')) {
[$path, $anchor] = explode('#', $path);
}
// File URIs may include e.g. an ?itok=... from some text editors.
$query = NULL;
if (FALSE !== strpos($path, '?')) {
[$path, $query] = explode('?', $path);
}
$parts = explode('/', $path);
$prefix = '';
if ($parts[0] === 'styles') {
$prefix = $parts[0] . '/' . $parts[1] . '/';
$parts = array_slice($parts, 2);
}
$file = NULL;
$uri = NULL;
for ($i = 0; $i < count($parts); ++$i) {
$uri = 'public://' . urldecode(implode('/', array_slice($parts, $i)));
/** @var \Drupal\file\FileInterface[] $files */
$files = \Drupal::entityTypeManager()
->getStorage('file')
->loadByProperties(['uri' => $uri]);
if (count($files)) {
$file = reset($files);
break;
}
$file = $intent->loadEmbeddedFile($uri);
if (!empty($file)) {
break;
}
}
if (!$file) {
$this->logger->error(
'Failed to replace file URI in the formatted text as the file is missing on this site: @uri',
[
'@uri' => 'public://' . urldecode($path),
]
);
$base_path = PublicStream::basePath();
return $matches[1] . '"/' . $base_path . '/' . $matches[2] . '"';
}
$url = \Drupal::service('file_url_generator')->generateString($uri);
if ($query) {
$url .= '?' . $query;
}
if ($anchor) {
$url .= '#' . $anchor;
}
return $matches[1] . '"' . /*$prefix .*/ $url . '"';
};
// Simple image embedding (default ckeditor + IMCE images)
$text = preg_replace_callback(
'@(<img[^>]+src=)"/sites/[^/]+/files/([^"]+)"@',
$replace_uri_callback,
$text
);
// Other file embedding (IMCE files)
$text = preg_replace_callback(
'@(<a[^>]+href=)"/sites/[^/]+/files/([^"]+)"@',
$replace_uri_callback,
$text
);
$fieldName = $this->fieldName;
// References we can resolve by the UUID being provided as an attribute.
$text = preg_replace_callback(
'@data-entity-uuid="([0-9a-z-]+)" href="/node/([0-9]+)"@',
function ($matches) use ($entity_repository, $intent, $references, $fieldName, $status, $language) {
$uuid = $matches[1];
$id = $matches[2];
try {
$node = $entity_repository->loadEntityByUuid('node', $uuid);
if ($node) {
$id = $node->id();
}
elseif (!empty($references)) {
foreach ($references as $candidate) {
$reference = $intent->loadReference($candidate);
if ($reference->getUuid() === $uuid) {
$intent->saveUnresolvedDependency($candidate);
$status->addMissingReference($language, $fieldName, $candidate);
break;
}
}
}
}
catch (\Exception $e) {
}
return 'data-entity-uuid="' . $uuid . '" href="/node/' . $id . '"';
},
$text
);
// Entity references.
$text = preg_replace_callback(
'@href="(/node/[0-9]+)"@',
function ($matches) use ($intent, $references, $fieldName, $status, $language) {
$path = $matches[1];
if (!empty($references)) {
$internal_url = 'entity:' . substr($path, 1);
foreach ($references as $candidate) {
$reference_details = $intent->getEmbeddedEntityData($candidate);
if (!empty($reference_details['internal_url']) && $reference_details['internal_url'] === $internal_url) {
$referenced = $intent->loadEmbeddedEntity($candidate);
if ($referenced) {
return 'href="/node/' . $referenced->id() . '"';
}
else {
$intent->saveUnresolvedDependency($candidate);
$status->addMissingReference($language, $fieldName, $candidate);
}
}
}
}
// Return as-is to not overwrite values from previous matches by UUID.
return 'href="' . $path . '"';
},
$text
);
return $text;
}
/**
* Checks if a text format uses the ProseMirror filter.
*
* @param string $format_id
* The text format ID.
*
* @return bool
* TRUE if the format uses ProseMirror filter, FALSE otherwise.
*/
protected function isProseMirrorFormat(string $format_id): bool {
$formats = filter_formats();
if (empty($formats[$format_id])) {
return FALSE;
}
$format = $formats[$format_id];
$filters = $format->filters()->getAll();
return !empty($filters['prosemirror']->status);
}
/**
* Add ProseMirror dependencies to pull intent.
*
* Since ProseMirror uses UUIDs that are identical across sites,
* we just need to extract dependencies and add them to the pull intent.
*
* @param \Drupal\cms_content_sync\PullIntent $intent
* The pull intent.
* @param string $text
* The ProseMirror JSON content.
* @param array|null $references
* The embedded references.
*/
protected function addProseMirrorDependenciesToPull(PullIntent $intent, string $text, ?array $references): void {
if (empty($references)) {
return;
}
foreach ($references as $reference) {
$intent->loadEmbeddedEntity($reference);
}
}
/**
* {@inheritDoc}
*/
public function definePropertyAtType(IDefineEntityType $type_definition) {
$property = parent::definePropertyAtType($type_definition);
if (!$property || !($property instanceof IDefineObjectProperty)) {
return $property;
}
$property->addReferenceProperty('embedded', 'Embedded', TRUE, FALSE, 'embedded_entities');
$property->addStringProperty('processed', 'Processed', FALSE, FALSE, 'text_long_processed');
return $property;
}
}
