cms_content_sync-3.0.x-dev/src/Plugin/cms_content_sync/field_handler/DefaultFormattedTextHandler.php

src/Plugin/cms_content_sync/field_handler/DefaultFormattedTextHandler.php
<?php

namespace Drupal\cms_content_sync\Plugin\cms_content_sync\field_handler;

use Drupal\gutenberg\Parser\BlockParser;
use Drupal\cms_content_sync\Plugin\FieldHandlerBase;
use Drupal\cms_content_sync\PullIntent;
use Drupal\cms_content_sync\PushIntent;
use Drupal\cms_content_sync\SyncIntent;
use Drupal\Core\Entity\TranslatableInterface;
use Drupal\Core\Field\FieldDefinitionInterface;
use Drupal\Core\StreamWrapper\PublicStream;
use Drupal\prosemirror\Transformation\TransformationHelper;
use EdgeBox\SyncCore\Interfaces\Configuration\IDefineEntityType;
use EdgeBox\SyncCore\Interfaces\Configuration\IDefineObjectProperty;
use Psr\Log\LoggerInterface;

/**
 * Providing an implementation for formatted text. This handler pushes embedded
 * entities along like files or media entities in HTML.
 *
 * @FieldHandler(
 *   id = "cms_content_sync_default_formatted_text_handler",
 *   label = @Translation("Default Formatted Text"),
 *   weight = 90
 * )
 */
class DefaultFormattedTextHandler extends FieldHandlerBase {

  /**
   * {@inheritdoc}
   */
  public static function supports($entity_type, $bundle, $field_name, FieldDefinitionInterface $field) {
    $allowed = ['text_with_summary', 'text_long'];

    return FALSE !== in_array($field->getType(), $allowed);
  }

  /**
   * {@inheritdoc}
   */
  public function pull(PullIntent $intent) {
    $action = $intent->getAction();
    /**
     * @var \Drupal\Core\Entity\FieldableEntityInterface $entity
     */
    $entity = $intent->getEntity();

    // Deletion doesn't require any action on field basis for static data.
    if (SyncIntent::ACTION_DELETE == $action) {
      return FALSE;
    }

    if ($intent->shouldMergeChanges()) {
      return FALSE;
    }

    $data = $intent->getProperty($this->fieldName);

    if (empty($data)) {
      $entity->set($this->fieldName, NULL);
    }
    else {
      $result = [];

      foreach ($data as $item) {
        if (!empty($item['value'])) {
          // Replace node links correctly based on format.
          $format_id = $item['format'] ?? NULL;

          // Check if this uses ProseMirror format.
          if ($format_id && $this->isProseMirrorFormat($format_id)) {
            // For ProseMirror, just extract dependencies and add them to pull intent.
            $this->addProseMirrorDependenciesToPull($intent, $item['value'], $item['embedded'] ?? NULL);
          }
          else {
            $item['value'] = $this->replaceEntityReferenceLinks($intent, $item['value'], $item['embedded'] ?? NULL);
          }
        }
        $result[] = $item;
      }

      $entity->set($this->fieldName, $result);
    }

    return TRUE;
  }

  /**
   * Parse a text that uses regular HTML to look for dependencies like media or files.
   *
   * @param \Drupal\cms_content_sync\PushIntent $intent
   *   The PushIntent to add the dependencies to.
   * @param string $text
   *   The HTML to search.
   * @param \Psr\Log\LoggerInterface $logger
   *   An optional logger to use for logging any errors (e.g. reference can't be resolved)
   *
   * @return array
   */
  public static function extractDependenciesFromHtml(PushIntent $intent, string $text, ?LoggerInterface $logger) {
    $result = [];

    $base_path = PublicStream::basePath();

    $add_file_uri_dependency = function ($matches) use ($intent, &$result, $base_path, $logger) {
      $tag = $matches[1];
      $path = $matches[2];
      // PDF files can have a #page=... anchor attached that we want to keep.
      $path = preg_replace('@#.*$@', '', $path);
      // Other files may have a query argument e.g. to avoid caching.
      $path = preg_replace('@\\?.*$@', '', $path);
      $uri = 'public://' . urldecode($path);

      /** @var \Drupal\file\FileInterface[] $files */
      $files = \Drupal::entityTypeManager()
        ->getStorage('file')
        ->loadByProperties(['uri' => $uri]);

      if (!count($files)) {
        if ($logger) {
          $logger->error(
            'Failed to push referenced file by URI in the formatted text as the file is missing on this site: @uri<br>Flow: @flow_id | Pool: @pool_id',
            [
              '@uri' => $uri,
              '@flow_id' => $intent->getFlow()->id(),
              '@pool_id' => implode(',', $intent->getPoolIds()),
            ]
          );
        }

        return '';
      }

      $file = reset($files);

      $result[] = $intent->addDependency($file, [
        'tag' => $tag,
        'internal_url' => $uri,
        'external_url' => '/' . $base_path . '/' . $matches[2],
      ]);

      return '';
    };

    $add_entity_reference_dependency = function ($matches) use ($intent, &$result) {
      $tag = $matches[1];
      $type = $matches[2];
      $id = $matches[3];

      $entity = \Drupal::entityTypeManager()
        ->getStorage($type)
        ->load($id);

      if (!$entity) {
        return $matches[0];
      }

      $view_url = $entity->hasLinkTemplate('canonical') ? $entity->toUrl('canonical', [
        'absolute' => TRUE,
        'language' => $entity->language(),
        // Workaround for PathProcessorAlias::processOutbound to explicitly ignore us
        // as we always want the pure, unaliased e.g. /node/:id path because
        // we don't use the URL for end-users but for editors and it has to
        // be reliable (aliases can be removed or change).
        'alias' => TRUE,
      ])->toString() : NULL;

      $result[] = $intent->addDependency($entity, [
        'tag' => $tag,
        'internal_url' => 'entity:' . $type . '/' . $id,
        'external_url' => '/' . $type . '/' . $entity->uuid(),
      ], view_url:$view_url);

      return str_replace('"/' . $type . '/' . $id . '"', '"/' . $type . '/' . $entity->uuid() . '"', $matches[0]);
    };

    // Simple image embedding (default ckeditor + IMCE images)
    preg_replace_callback(
      '@<(img)\s[^>]*src="/' . $base_path . '/([^"]+)"@',
      $add_file_uri_dependency,
      $text
    );

    // Other file embedding (IMCE files)
    preg_replace_callback(
      '@<(a)\s[^>]*href="/' . $base_path . '/([^"]+)"@',
      $add_file_uri_dependency,
      $text
    );

    // Node references.
    preg_replace_callback(
      '@<(a)\s[^>]*href="/(node)/([0-9]+)"@',
      $add_entity_reference_dependency,
      $text
    );

    // Entity embedding (especially media)
    preg_replace_callback(
      '@<drupal-(entity|media)[^>]+data-entity-type="([^"]+)"\s+data-entity-uuid="([^"]+)"@',
      function ($matches) use ($intent, &$result, $logger) {
        $tag = $matches[1];
        $type = $matches[2];
        $uuid = $matches[3];

        $entity = \Drupal::service('entity.repository')->loadEntityByUuid($type, $uuid);

        if (!$entity) {
          if ($logger) {
            $logger->error(
              'Failed to push referenced entity by UUID in the formatted text as the file is missing on this site: @type @uuid<br>Flow: @flow_id | Pool: @pool_id',
              [
                '@type' => $type,
                '@uuid' => $uuid,
                '@flow_id' => $intent->getFlow()->id(),
                '@pool_id' => implode(',', $intent->getPoolIds()),
              ]
            );
          }

          return '';
        }

        $result[] = $intent->addDependency($entity, [
          'tag' => 'drupal-' . $tag,
          'entity_type' => $type,
          'entity_uuid' => $uuid,
        ]);

        return '';
      },
      $text
    );

    return $result;
  }

  /**
   * Parse a text that uses the 'markdown' filter in it's format to look for
   * dependencies like media or files.
   *
   * @param \Drupal\cms_content_sync\PushIntent $intent
   *   The PushIntent to add the dependencies to.
   * @param string $text
   *   The markdown text as it's saved in the field.
   *
   * @return array
   */
  protected function extractDependenciesFromMarkdown(PushIntent $intent, string $text) {
    $result = [];

    $base_path = PublicStream::basePath();

    $add_file_uri_dependency = function ($matches) use ($intent, &$result, $base_path) {
      $path = $matches[1];
      // PDF files can have a #page=... anchor attached that we want to keep.
      $path = preg_replace('@#.*$@', '', $path);
      // Other files may have a query argument e.g. to avoid caching.
      $path = preg_replace('@\\?.*$@', '', $path);

      $uri = 'public://' . urldecode($path);

      /** @var \Drupal\file\FileInterface[] $files */
      $files = \Drupal::entityTypeManager()
        ->getStorage('file')
        ->loadByProperties(['uri' => $uri]);

      if (!count($files)) {
        $this->logger->error(
          'Failed to push referenced file by URI in the formatted text as the file is missing on this site: @uri<br>Flow: @flow_id | Pool: @pool_id',
          [
            '@uri' => $uri,
            '@flow_id' => $intent->getFlow()->id(),
            '@pool_id' => implode(',', $intent->getPoolIds()),
          ]
        );

        return '';
      }

      $file = reset($files);

      $result[] = $intent->addDependency($file, [
        'tag' => 'image',
        'internal_url' => $uri,
        'external_url' => '/' . $base_path . '/' . $matches[1],
      ]);

      return '';
    };

    // Simple image embedding (default ckeditor + IMCE images)
    preg_replace_callback(
      '@\!\[[^\]]*\]\(/' . $base_path . '/([^)]+)\)@',
      $add_file_uri_dependency,
      $text
    );

    return $result;
  }

  /**
   * Parse a text that uses the 'gutenberg' filter in it's format to look for
   * dependencies like media or files.
   *
   * @param \Drupal\cms_content_sync\PushIntent $intent
   *   The PushIntent to add the dependencies to.
   * @param string $text
   *   The plain text as it's saved in the field.
   *
   * @return array
   */
  protected function extractDependenciesFromGutenberg(PushIntent $intent, string $text) {
    $result = [];

    $block_parser = new BlockParser();
    $blocks = $block_parser->parse($text);

    foreach ($blocks as $block) {
      $result = array_merge($result, $this->extractDependenciesFromGutenbergBlock($intent, $block));
    }

    return $result;
  }

  /**
   * Extract the dependencies from the given block. The block is provided by the
   * Gutenberg module's BlockParser. Can be a string or an array with the block
   * definition.
   *
   * @param \Drupal\cms_content_sync\PushIntent $intent
   *   The PushIntent to add the dependencies to.
   * @param array|string $block
   *   The block to search for references.
   *
   * @return array
   */
  protected function extractDependenciesFromGutenbergBlock(PushIntent $intent, mixed $block) {
    if (is_string($block)) {
      return self::extractDependenciesFromHtml($intent, $block, $this->logger);
    }
    elseif (is_array($block)) {
      $result = [];
      if (!empty($block["attrs"]["mediaAttrs"])) {
        $media = $block["attrs"]["mediaAttrs"];
        // core/image, core/file.
        if (!empty($media["data-entity-type"]) && !empty($media["data-entity-uuid"])) {
          $type = $media["data-entity-type"];
          $uuid = $media["data-entity-uuid"];
          $entity = \Drupal::service('entity.repository')->loadEntityByUuid($type, $uuid);
          if ($entity) {
            $result[] = $intent->addDependency($entity, [
              'tag' => 'core/file',
              'attribute' => "mediaAttrs",
              'entity-type' => $type,
              'entity-uuid' => $uuid,
            ]);
          }
        }
        // core/gallery.
        elseif (is_array($media)) {
          foreach ($media as $item) {
            if (!empty($item["data-entity-type"]) && !empty($item["data-entity-uuid"])) {
              $type = $item["data-entity-type"];
              $uuid = $item["data-entity-uuid"];
              $entity = \Drupal::service('entity.repository')->loadEntityByUuid($type, $uuid);
              if ($entity) {
                $result[] = $intent->addDependency($entity, [
                  'tag' => 'core/gallery',
                  'attribute' => "mediaAttrs",
                  'entity-type' => $type,
                  'entity-uuid' => $uuid,
                ]);
              }
            }
          }
        }
      }
      // drupalmedia/drupal-media-entity.
      if (!empty($block["attrs"]["mediaEntityIds"])) {
        foreach ($block["attrs"]["mediaEntityIds"] as $media_id) {
          $entity = \Drupal::service('entity.repository')->getActive('media', $media_id);
          if ($entity) {
            $result[] = $intent->addDependency($entity, [
              'tag' => 'drupalmedia/drupal-media-entity',
              'attribute' => "mediaEntityIds",
              'entity-id' => $media_id,
            ]);
          }
        }
      }
      if (!empty($block['innerHTML'])) {
        $result = array_merge($result, self::extractDependenciesFromHtml($intent, $block["innerHTML"], $this->logger));
      }
      if (!empty($block["innerBlocks"])) {
        foreach ($block["innerBlocks"] as $inner_block) {
          $result = array_merge($result, $this->extractDependenciesFromGutenbergBlock($intent, $inner_block));
        }
      }
      // Not handling drupalblock/* atm. Maybe useful if customers use custom
      // block content they want to sync with the module as dependencies as well.
      return $result;
    }

    return [];
  }

  /**
   * Parse a text that uses the 'prosemirror' filter in its format to look for
   * dependencies like media or files.
   *
   * @param \Drupal\cms_content_sync\PushIntent $intent
   *   The PushIntent to add the dependencies to.
   * @param string $text
   *   The JSON text as it's saved in the field.
   *
   * @return array
   */
  protected function extractDependenciesFromProseMirror(PushIntent $intent, string $text) {
    $result = [];

    /**
     * @var \Drupal\prosemirror\Transformation\TransformationHelper $transformation_helper
     */
    $transformation_helper = \Drupal::service('prosemirror.transformation_helper');

    // Check if TransformationHelper is available.
    if (!$transformation_helper) {
      $this->logger->warning(
        'ProseMirror dependencies could not be loaded: @message<br>Flow: @flow_id | Pool: @pool_id',
        [
          '@message' => "prosemirror.transformation_helper service not available",
          '@flow_id' => $intent->getFlow()->id(),
          '@pool_id' => implode(',', $intent->getPoolIds()),
        ]
      );
      return $result;
    }

    // Try to decode the JSON content.
    $content = json_decode($text, TRUE);
    if (json_last_error() !== JSON_ERROR_NONE) {
      // Not valid JSON, skip parsing.
      return $result;
    }

    try {
      // Use TransformationHelper to validate and get entity references.
      $element_instance = $transformation_helper->validateAndSanitize($content);
      $references = $element_instance->getReferences();

      // Transform the references into the expected format for addDependency.
      foreach ($references as $reference) {
        $entity_type = $reference->getEntityType();
        $entity_uuid = $reference->getEntityUuid();

        // Load the entity.
        $entity = \Drupal::service('entity.repository')->loadEntityByUuid($entity_type, $entity_uuid);

        if ($entity) {
          $result[] = $intent->addDependency($entity, [
            'tag' => 'prosemirror',
          ]);
        }
      }
    }
    catch (\Exception $e) {
      // Log the error but don't break the sync.
      if (isset($this->logger)) {
        $this->logger->warning(
          'Failed to parse ProseMirror content for dependencies: @message<br>Flow: @flow_id | Pool: @pool_id',
          [
            '@message' => $e->getMessage(),
            '@flow_id' => $intent->getFlow()->id(),
            '@pool_id' => implode(',', $intent->getPoolIds()),
          ]
        );
      }
    }

    return $result;
  }

  /**
   * Go through the text and check for referenced entities like media and files
   * to add them as dependencies to the sync so they are updated along with the
   * parent entity that contains this field.
   *
   * @param \Drupal\cms_content_sync\PushIntent $intent
   * @param string $text
   *   The field's ['value'].
   * @param string|null $format_id
   *   The field's ['format'].
   *
   * @return array|null
   */
  protected function extractDependencies(PushIntent $intent, string $text, ?string $format_id) {
    if (!$format_id) {
      return self::extractDependenciesFromHtml($intent, $text, $this->logger);
    }

    $formats = filter_formats();
    if (empty($formats[$format_id])) {
      return NULL;
    }

    $format = $formats[$format_id];

    $filters = $format->filters()->getAll();

    if (!empty($filters['gutenberg']->status)) {
      return $this->extractDependenciesFromGutenberg($intent, $text);
    }
    elseif (!empty($filters['markdown']->status)) {
      return $this->extractDependenciesFromMarkdown($intent, $text);
    }
    elseif (!empty($filters['prosemirror_filter']->status)) {
      return $this->extractDependenciesFromProseMirror($intent, $text);
    }
    else {
      return self::extractDependenciesFromHtml($intent, $text, $this->logger);
    }
  }

  /**
   * {@inheritdoc}
   */
  public function push(PushIntent $intent) {
    if (!parent::push($intent)) {
      return FALSE;
    }

    $entity = $intent->getEntity();

    $value = $entity->get($this->fieldName)->getValue();

    foreach ($value as $index => &$item) {
      if (empty($item['value'])) {
        continue;
      }

      $item['embedded'] = $this->extractDependencies($intent, $item['value'], $item['format'] ?? NULL);
      $processed = $entity->get($this->fieldName)->get($index)->processed;
      if (!empty($processed)) {
        $item['processed'] = $processed . "";
      }
    }

    $intent->setProperty($this->fieldName, $value);

    return TRUE;
  }

  /**
   * Replace all "/node/..." links with their correct ID for the current site.
   *
   * @todo If a new entity is added, we should scan the database for existing
   * references to it that can now be resolved.
   *
   * @param \Drupal\cms_content_sync\PullIntent $intent
   * @param string $text
   * @param array|null $references
   *
   * @return string
   */
  protected function replaceEntityReferenceLinks($intent, $text, $references) {
    $entity_repository = \Drupal::service('entity.repository');

    /**
     * @var \Drupal\Core\Entity\EntityInterface $entity
     */
    $entity = $intent->getEntity();
    $status = $intent->getEntityStatus();
    $language = $entity instanceof TranslatableInterface ? $entity->language()->getId() : 'und';

    $status->resetMissingReferences($language, $this->fieldName);

    $replace_uri_callback = function ($matches) use ($intent) {
      $path = $matches[2];

      // PDF files can have a #page=... anchor attached that we want to keep.
      $anchor = NULL;
      if (FALSE !== strpos($path, '#')) {
        [$path, $anchor] = explode('#', $path);
      }

      // File URIs may include e.g. an ?itok=... from some text editors.
      $query = NULL;
      if (FALSE !== strpos($path, '?')) {
        [$path, $query] = explode('?', $path);
      }

      $parts = explode('/', $path);
      $prefix = '';
      if ($parts[0] === 'styles') {
        $prefix = $parts[0] . '/' . $parts[1] . '/';
        $parts = array_slice($parts, 2);
      }
      $file = NULL;
      $uri = NULL;
      for ($i = 0; $i < count($parts); ++$i) {
        $uri = 'public://' . urldecode(implode('/', array_slice($parts, $i)));

        /** @var \Drupal\file\FileInterface[] $files */
        $files = \Drupal::entityTypeManager()
          ->getStorage('file')
          ->loadByProperties(['uri' => $uri]);

        if (count($files)) {
          $file = reset($files);

          break;
        }

        $file = $intent->loadEmbeddedFile($uri);
        if (!empty($file)) {
          break;
        }
      }

      if (!$file) {
        $this->logger->error(
              'Failed to replace file URI in the formatted text as the file is missing on this site: @uri',
              [
                '@uri' => 'public://' . urldecode($path),
              ]
          );

        $base_path = PublicStream::basePath();

        return $matches[1] . '"/' . $base_path . '/' . $matches[2] . '"';
      }

      $url = \Drupal::service('file_url_generator')->generateString($uri);

      if ($query) {
        $url .= '?' . $query;
      }
      if ($anchor) {
        $url .= '#' . $anchor;
      }

      return $matches[1] . '"' . /*$prefix .*/ $url . '"';
    };

    // Simple image embedding (default ckeditor + IMCE images)
    $text = preg_replace_callback(
          '@(<img[^>]+src=)"/sites/[^/]+/files/([^"]+)"@',
          $replace_uri_callback,
          $text
      );

    // Other file embedding (IMCE files)
    $text = preg_replace_callback(
          '@(<a[^>]+href=)"/sites/[^/]+/files/([^"]+)"@',
          $replace_uri_callback,
          $text
      );

    $fieldName = $this->fieldName;

    // References we can resolve by the UUID being provided as an attribute.
    $text = preg_replace_callback(
      '@data-entity-uuid="([0-9a-z-]+)" href="/node/([0-9]+)"@',
      function ($matches) use ($entity_repository, $intent, $references, $fieldName, $status, $language) {
        $uuid = $matches[1];
        $id = $matches[2];

        try {
          $node = $entity_repository->loadEntityByUuid('node', $uuid);
          if ($node) {
            $id = $node->id();
          }
          elseif (!empty($references)) {
            foreach ($references as $candidate) {
              $reference = $intent->loadReference($candidate);
              if ($reference->getUuid() === $uuid) {
                $intent->saveUnresolvedDependency($candidate);
                $status->addMissingReference($language, $fieldName, $candidate);
                break;
              }
            }
          }
        }
        catch (\Exception $e) {
        }

        return 'data-entity-uuid="' . $uuid . '" href="/node/' . $id . '"';
      },
      $text
    );

    // Entity references.
    $text = preg_replace_callback(
          '@href="(/node/[0-9]+)"@',
          function ($matches) use ($intent, $references, $fieldName, $status, $language) {
            $path = $matches[1];

            if (!empty($references)) {
              $internal_url = 'entity:' . substr($path, 1);
              foreach ($references as $candidate) {
                $reference_details = $intent->getEmbeddedEntityData($candidate);
                if (!empty($reference_details['internal_url']) && $reference_details['internal_url'] === $internal_url) {
                  $referenced = $intent->loadEmbeddedEntity($candidate);
                  if ($referenced) {
                    return 'href="/node/' . $referenced->id() . '"';
                  }
                  else {
                    $intent->saveUnresolvedDependency($candidate);
                    $status->addMissingReference($language, $fieldName, $candidate);
                  }
                }
              }
            }

            // Return as-is to not overwrite values from previous matches by UUID.
            return 'href="' . $path . '"';
          },
          $text
      );

    return $text;
  }

  /**
   * Checks if a text format uses the ProseMirror filter.
   *
   * @param string $format_id
   *   The text format ID.
   *
   * @return bool
   *   TRUE if the format uses ProseMirror filter, FALSE otherwise.
   */
  protected function isProseMirrorFormat(string $format_id): bool {
    $formats = filter_formats();
    if (empty($formats[$format_id])) {
      return FALSE;
    }

    $format = $formats[$format_id];
    $filters = $format->filters()->getAll();

    return !empty($filters['prosemirror']->status);
  }

  /**
   * Add ProseMirror dependencies to pull intent.
   *
   * Since ProseMirror uses UUIDs that are identical across sites,
   * we just need to extract dependencies and add them to the pull intent.
   *
   * @param \Drupal\cms_content_sync\PullIntent $intent
   *   The pull intent.
   * @param string $text
   *   The ProseMirror JSON content.
   * @param array|null $references
   *   The embedded references.
   */
  protected function addProseMirrorDependenciesToPull(PullIntent $intent, string $text, ?array $references): void {
    if (empty($references)) {
      return;
    }

    foreach ($references as $reference) {
      $intent->loadEmbeddedEntity($reference);
    }
  }

  /**
   * {@inheritDoc}
   */
  public function definePropertyAtType(IDefineEntityType $type_definition) {
    $property = parent::definePropertyAtType($type_definition);

    if (!$property || !($property instanceof IDefineObjectProperty)) {
      return $property;
    }

    $property->addReferenceProperty('embedded', 'Embedded', TRUE, FALSE, 'embedded_entities');

    $property->addStringProperty('processed', 'Processed', FALSE, FALSE, 'text_long_processed');

    return $property;
  }

}

Главная | Обратная связь

drupal hosting | друпал хостинг | it patrol .inc