media_migration-8.x-1.x-dev/src/Plugin/migrate/process/ImgTagToEmbedFilter.php

src/Plugin/migrate/process/ImgTagToEmbedFilter.php
<?php

namespace Drupal\media_migration\Plugin\migrate\process;

use Drupal\Component\Utility\Variable;
use Drupal\Core\Database\Connection;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Logger\LoggerChannelInterface;
use Drupal\Core\Logger\RfcLogLevel;
use Drupal\media_migration\MediaMigration;
use Drupal\media_migration\MediaMigrationUuidOracleInterface;
use Drupal\media_migration\Utility\MigrationPluginTool;
use Drupal\migrate\MigrateExecutableInterface;
use Drupal\migrate\MigrateLookupInterface;
use Drupal\migrate\Plugin\migrate\source\SqlBase;
use Drupal\migrate\Plugin\MigrationInterface;
use Drupal\migrate\Row;
use Masterminds\HTML5;
use Masterminds\HTML5\Parser\StringInputStream;
use Symfony\Component\DependencyInjection\ContainerInterface;

/**
 * Transforms <img src="/files/cat.png"> tags to <drupal-media …>.
 *
 * @MigrateProcessPlugin(
 *   id = "img_tag_to_embed"
 * )
 */
class ImgTagToEmbedFilter extends EmbedFilterBase {

  /**
   * The logger.
   *
   * @var \Drupal\Core\Logger\LoggerChannelInterface
   */
  protected $logger;

  /**
   * The plugin ID of the filter which processes the embed code on destination.
   *
   * @var string
   */
  protected $destinationFilterPluginId;

  /**
   * Constructs a new ImgTagToEmbedFilter object.
   *
   * @param array $configuration
   *   A configuration array containing information about the plugin instance.
   * @param string $plugin_id
   *   The plugin_id for the plugin instance.
   * @param mixed $plugin_definition
   *   The plugin implementation definition.
   * @param \Drupal\migrate\Plugin\MigrationInterface $migration
   *   The migration entity.
   * @param \Drupal\media_migration\MediaMigrationUuidOracleInterface $media_uuid_oracle
   *   The media UUID oracle.
   * @param \Drupal\Core\Logger\LoggerChannelInterface $logger
   *   The logger.
   * @param \Drupal\entity_embed\EntityEmbedDisplay\EntityEmbedDisplayManager|null $entity_embed_display_manager
   *   The entity embed display plugin manager service, if available.
   * @param \Drupal\migrate\MigrateLookupInterface $migrate_lookup
   *   The migration lookup service.
   * @param \Drupal\Core\Entity\EntityTypeManagerInterface $entity_type_manager
   *   The entity type manager.
   */
  public function __construct(array $configuration, $plugin_id, $plugin_definition, MigrationInterface $migration, MediaMigrationUuidOracleInterface $media_uuid_oracle, LoggerChannelInterface $logger, $entity_embed_display_manager, MigrateLookupInterface $migrate_lookup, EntityTypeManagerInterface $entity_type_manager) {
    parent::__construct($configuration, $plugin_id, $plugin_definition, $migration, $media_uuid_oracle, $entity_embed_display_manager, $migrate_lookup, $entity_type_manager);

    $this->logger = $logger;
    $this->destinationFilterPluginId = MediaMigration::getEmbedTokenDestinationFilterPlugin();
    $this->migrateLookup = $migrate_lookup;
    $this->mediaStorage = $entity_type_manager->getStorage('media');
  }

  /**
   * {@inheritdoc}
   */
  public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition, MigrationInterface $migration = NULL) {
    return new static(
      $configuration,
      $plugin_id,
      $plugin_definition,
      $migration,
      $container->get('media_migration.media_uuid_oracle'),
      $container->get('logger.channel.media_migration'),
      $container->get('plugin.manager.entity_embed.display', ContainerInterface::NULL_ON_INVALID_REFERENCE),
      $container->get('migrate.lookup'),
      $container->get('entity_type.manager')
    );
  }

  /**
   * {@inheritdoc}
   */
  public function transform($value, MigrateExecutableInterface $migrate_executable, Row $row, $destination_property) {
    $value_is_array = is_array($value);
    $text = (string) ($value_is_array ? $value['value'] : $value);
    if (strpos($text, '<img ') === FALSE) {
      return $value;
    }

    if (!MediaMigration::embedTokenDestinationFilterPluginIsValid($this->destinationFilterPluginId)) {
      return $value;
    }

    $source_plugin = $this->migration->getSourcePlugin();
    if (!$source_plugin instanceof SqlBase) {
      return $value;
    }

    $probable_domain_names = $this->getProbableDomainNames($source_plugin->getDatabase());

    // Document why HTML5 instead of DomDocument.
    $html5 = new HTML5(['disable_html_ns' => TRUE]);

    // Compatibility for older HTML5 versions (e.g. in Drupal core 8.9.x).
    $dom_text = '<html><body>' . $text . '</body></html>';
    try {
      $dom = $html5->parse($dom_text);
    }
    catch (\TypeError $e) {
      $text_stream = new StringInputStream($dom_text);
      $dom = $html5->parse($text_stream);
    }

    $d7_file_public_path = $this->variableGet($source_plugin->getDatabase(), 'file_public_path', 'sites/default/files');
    $source_connection = $source_plugin->getDatabase();

    $images = $dom->getElementsByTagName('img');
    $images_count = $images->length;
    $skipped_images_count = 0;

    for ($i = 0; $i < $images_count; $i++) {
      $image = $images->item($skipped_images_count);
      $src = rawurldecode($image->getAttribute('src'));
      $url_parts = parse_url($src);
      $path = $url_parts['path'];

      // Support transforming absolute image URLs without knowing the source
      // site's domain name: validate that the correct public files path is
      // present in file URLs, and then look up the file by using the filename.
      if (strpos($path, '/' . $d7_file_public_path . '/') !== 0) {
        $skipped_images_count++;
        continue;
      }

      // Support transforming absolute image URLs without knowing the source
      // site's domain, but do not attempt to transform absolute URLs if we were
      // able to deduce probable domain names from watchdog log entries.
      if (isset($url_parts['host']) && !empty($probable_domain_names) && !in_array($url_parts['host'], $probable_domain_names)) {
        $skipped_images_count++;
        continue;
      }

      $escaped_file_path = preg_quote($d7_file_public_path, '/');
      $filesystem_location = preg_replace('/^\/' . $escaped_file_path . '\/(.*)$/', 'public://$1', $path);
      $file_id = FALSE;
      try {
        if ($source_connection->schema()->tableExists('file_managed')) {
          $file_id = $source_connection
            ->select('file_managed', 'fm')
            ->fields('fm', ['fid'])
            ->condition('fm.uri', $filesystem_location)
            ->execute()
            ->fetchField();
        }
      }
      catch (\Exception $e) {
      }

      if ($file_id === FALSE) {
        // If no file was found, distinguish between absolute URLs and relative
        // URLs. The latter are definitely errors on the source site. The former
        // may be hotlinking or not; this is impossible to know without knowing
        // the source site's domain(s).
        $row_source_id_string = preg_replace(
          '/\s+/',
          ' ',
          Variable::export($row->getSourceIdValues())
        );

        if (strpos($src, 'http') === 0 || strpos($src, '//') === 0) {
          $this->logger->log(RfcLogLevel::INFO, sprintf("No file found for the absolute image URL in tag '%s' used in the '%s' migration's source row with source ID %s while processing the destination property '%s'.", $html5->saveHTML($image), $this->migration->id(), $row_source_id_string, $destination_property));
        }
        else {
          $this->logger->log(RfcLogLevel::WARNING, sprintf("No file found for the relative image URL in tag '%s' used in the '%s' migration's source row with source ID %s while processing the destination property '%s'.", $html5->saveHTML($image), $this->migration->id(), $row_source_id_string, $destination_property));
        }

        $skipped_images_count++;
        continue;
      }

      // Delete the consumed attribute.
      $image->removeAttribute('src');

      // Generate the <drupal-media> tag that will replace the <img> tag.
      $replacement_node = $this->createEmbedNode($dom, $file_id);

      // Best-effort support for data-align.
      // @see \Drupal\filter\Plugin\Filter\FilterAlign
      // @see https://developer.mozilla.org/en-US/docs/Web/HTML/Element/Img#attr-align
      if ($image->hasAttribute('align')) {
        $replacement_node->setAttribute('data-align', $image->getAttribute('align'));
        // Delete the consumed attribute.
        $image->removeAttribute('align');
      }
      if ($image->hasAttribute('style')) {
        $styles = explode(';', $image->getAttribute('style'));
        foreach ($styles as $index => $style) {
          // We have to get the last value of a float style property definition,
          // so we must not have a break here, after the first match.
          if (preg_match('/;float\s*\:\s*(left|right);/', ';' . trim($style) . ';', $matches)) {
            $replacement_node->setAttribute('data-align', $matches[1]);
            unset($styles[$index]);
            $image->setAttribute('style', implode(';', $styles));
          }
        }
      }

      // Best-effort support for data-caption.
      // @see \Drupal\filter\Plugin\Filter\FilterCaption
      // @see https://developer.mozilla.org/en-US/docs/Web/HTML/Element/figcaption
      $target_node = $image;
      if ($image->parentNode->tagName === 'figure') {
        $target_node = $image->parentNode;
        foreach ($image->parentNode->childNodes as $child) {
          if ($child instanceof \DOMElement && $child->tagName === 'figcaption') {
            $caption_html = $html5->saveHTML($child->childNodes);
            $replacement_node->setAttribute('data-caption', $caption_html);
            break;
          }
        }
      }

      // Retain all other attributes. Currently the media_embed filter
      // explicitly supports the `alt` and `title` attributes, but it may
      // support more attributes in the future. We avoid data loss and allow
      // contrib modules to add more filtering.
      // @see \Drupal\media\Plugin\Filter\MediaEmbed::applyPerEmbedMediaOverrides()
      foreach ($image->attributes as $attribute) {
        if ($attribute->name === 'style' && empty($attribute->value)) {
          continue;
        }
        $replacement_node->setAttribute($attribute->name, $attribute->value);
      }

      $target_node->parentNode->insertBefore($replacement_node, $target_node);
      $target_node->parentNode->removeChild($target_node);
    }

    $result = $html5->saveHTML($dom->documentElement->firstChild->childNodes);
    if ($value_is_array) {
      $value['value'] = $result;
    }
    else {
      $value = $result;
    }
    return $value;
  }

  /**
   * Reads a variable from a source Drupal database.
   *
   * @param \Drupal\Core\Database\Connection $connection
   *   The source database connection.
   * @param string $name
   *   Name of the variable.
   * @param mixed $default
   *   The default value.
   *
   * @return mixed
   *   The unserialized value of the Drupal 7 variable, of the given default.
   */
  protected function variableGet(Connection $connection, string $name, $default) {
    try {
      $result = $connection->select('variable', 'v')
        ->fields('v', ['value'])
        ->condition('name', $name)
        ->execute()
        ->fetchField();
    }
    // The table might not exist.
    catch (\Exception $e) {
      $result = FALSE;
    }
    return $result !== FALSE ? unserialize($result) : $default;
  }

  /**
   * Gets the probable domain names by inspecting the watchdog table.
   *
   * @param \Drupal\Core\Database\Connection $connection
   *   The source database connection.
   *
   * @return string[]
   *   The probable domain names.
   */
  protected function getProbableDomainNames(Connection $connection) : array {
    try {
      $query = $connection->select('watchdog', 'w');
      $query->addExpression('DISTINCT (SUBSTR(SUBSTR(location, INSTR(location, \'//\') + 2), 1, INSTR(SUBSTR(location, INSTR(location, \'//\') + 2), \'/\') - 1))');
      $result = $query->execute()
        ->fetchAll();
    }
    // The table might not exist.
    catch (\Exception $e) {
      return [];
    }
    $domain_names = [];
    foreach ($result as $row) {
      $domain_names[] = $row->expression;
    }
    return $domain_names;
  }

  /**
   * Creates a DOM element representing an embed media on the destination.
   *
   * @param \DOMDocument $dom
   *   The \DOMDocument in which the embed \DOMElement is being created.
   * @param string|int $file_id
   *   The ID of the file which should be represented by the new embed tag.
   *
   * @return \DOMElement
   *   The new embed tag as a writable \DOMElement.
   */
  protected function createEmbedNode(\DOMDocument $dom, $file_id) {
    $filter_destination_is_entity_embed = $this->destinationFilterPluginId === MediaMigration::MEDIA_TOKEN_DESTINATION_FILTER_ENTITY_EMBED;
    $tag = $filter_destination_is_entity_embed ?
      'drupal-entity' :
      'drupal-media';
    $display_mode_attribute = $filter_destination_is_entity_embed ?
      'data-entity-embed-display' :
      'data-view-mode';

    $embed_node = $dom->createElement($tag);
    $embed_node->setAttribute('data-entity-type', 'media');
    $migrations = $this->configuration['migrations'] ?? MigrationPluginTool::getMediaEntityMigrationIds();
    if (MediaMigration::getEmbedMediaReferenceMethod() === MediaMigration::EMBED_MEDIA_REFERENCE_METHOD_ID) {
      $destination_id = $this->getMigratedMediaId($file_id, $migrations);
      $embed_node->setAttribute('data-entity-id', $destination_id);
    }
    else {
      $uuid = $this->getExistingMediaUuid($file_id, $migrations) ??
        $this->mediaUuidOracle->getMediaUuid((int) $file_id);
      $embed_node->setAttribute('data-entity-uuid', $uuid);
    }
    $embed_node->setAttribute($display_mode_attribute, 'default');
    if ($filter_destination_is_entity_embed) {
      $embed_node->setAttribute('data-embed-button', 'media');
    }
    $embed_node->setAttribute($display_mode_attribute, $this->getDisplayPluginId('default', $this->destinationFilterPluginId));

    return $embed_node;
  }

}

Главная | Обратная связь

drupal hosting | друпал хостинг | it patrol .inc