drupalorg_migrate-1.0.x-dev/src/Drush/Commands/DrushCommands.php

src/Drush/Commands/DrushCommands.php
<?php

namespace Drupal\drupalorg_migrate\Drush\Commands;

use Drupal\Core\Cache\MemoryCache\MemoryCache;
use Drupal\Core\Config\ConfigFactory;
use Drupal\Core\Database\Database;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Language\LanguageInterface;
use Drupal\Core\Language\LanguageManagerInterface;
use Drupal\Core\Queue\QueueFactory;
use Drupal\drupalorg\ProjectService;
use Drupal\node\Entity\Node;
use Drupal\paragraphs\ParagraphInterface;
use Drupal\path_alias\AliasManager;
use Drupal\path_alias\AliasRepositoryInterface;
use Drupal\path_alias\Entity\PathAlias;
use Drupal\redirect\Entity\Redirect;
use Drupal\user\Entity\User;
use Drupal\user\UserInterface;
use Drush\Attributes as CLI;
use Drush\Commands\AutowireTrait;
use Drush\Commands\DrushCommands as BaseDrushCommands;
use Symfony\Component\DependencyInjection\Attribute\Autowire;

/**
 * A drush command file.
 *
 * @package Drupal\drupalorg_migrate\Commands
 */
final class DrushCommands extends BaseDrushCommands {

  use AutowireTrait;

  /**
   * Size of the batch to use.
   *
   * Memory usage around 60MB for 100000.
   *
   * @var int
   */
  const BATCH_LENGTH = 50000;

  /**
   * Node types to check in addition to project types.
   *
   * @var array
   */
  const NODE_TYPES_TO_CHECK = [
    ...ProjectService::PROJECT_TYPES,
    'project_release',
    // 'casestudy',
  ];

  /**
   * {@inheritdoc}
   */
  public function __construct(
    #[Autowire(service: 'entity_type.manager')]
    protected EntityTypeManagerInterface $entityTypeManager,
    #[Autowire(service: 'entity.memory_cache')]
    protected MemoryCache $memoryCache,
    #[Autowire(service: 'queue')]
    protected QueueFactory $queueFactory,
    #[Autowire(service: 'path_alias.manager')]
    protected AliasManager $aliasManager,
    #[Autowire(service: 'path_alias.repository')]
    protected AliasRepositoryInterface $aliasRepository,
    #[Autowire(service: 'config.factory')]
    protected ConfigFactory $configFactory,
    #[Autowire(service: 'language_manager')]
    protected LanguageManagerInterface $languageManager,
  ) {
    parent::__construct();
  }

  /**
   * Sync aliases and redirects from D7 to modern Drupal programmatically.
   */
  #[CLI\Command(name: 'drupalorg_migrate:sync-aliases-and-redirects')]
  public function syncAliasesAndRedirects() {
    if ($this->io()->confirm(\dt('Do you want to sync aliases and redirects?'))) {
      $this->syncRedirects();
      $this->syncAliases();
    }
  }

  /**
   * Returns a redirect entry by source path.
   *
   * @param string $source_path
   *   Path to search.
   * @param bool $count
   *   Do count query instead.
   * @param bool $only_enabled
   *   Include only enabled redirects.
   *
   * @return mixed
   *   A single field from the next record, or FALSE if there is no next record.
   */
  protected function getRedirectBySourcePath(string $source_path, bool $count = TRUE, bool $only_enabled = FALSE) {
    $source_path = trim($source_path, '/');
    $query = Database::getConnection()
      ->select('redirect', 'r')
      ->condition('redirect_source__path', $source_path);
    if ($only_enabled) {
      $query->condition('enabled', 1);
    }
    if ($count) {
      return $query->countQuery()->execute()->fetchField();
    }
    $query->fields('r');
    return $query->execute()->fetch();
  }

  /**
   * Sync redirects from D7.
   */
  protected function syncRedirects() {
    $d7_connection = Database::getConnection('default', 'migrate');
    $query = $d7_connection->select('redirect', 'r');
    $query->fields('r', [
      'source',
      'redirect',
      'source_options',
      'redirect_options',
      'language',
      'status_code',
      'status',
    ]);

    $count = $query->countQuery()->execute()->fetchField();
    $this->io()->note(\dt('@count redirects will be synced from D7.', [
      '@count' => $count,
    ]));

    // Stats.
    $stats = [
      'created' => 0,
      'duplicated' => 0,
      'errors' => [],
    ];
    $default_status_code = $this->configFactory->get('redirect.settings')->get('default_status_code');

    // First batch.
    $offset = 0;
    $limit = 500;
    $batch = $query->range($offset, $limit)->execute()->fetchAll();
    $this->io()->progressStart($count);
    while (!empty($batch)) {
      foreach ($batch as $item) {
        // Massage the data.
        $redirect_url = str_starts_with($item->redirect, 'http') ? $item->redirect : '/' . $item->redirect;
        $source_options = unserialize($item->source_options);
        $redirect_options = unserialize($item->redirect_options);
        $source_query = $source_options['query'] ?? [];
        $redirect_query = $redirect_options['query'] ?? [];
        $redirect_fragment = (!empty($source_options['fragment'])) ?
          ['fragment' => $source_options['fragment']] :
          [];

        $source_path = trim($item->source, '/');
        $existing_redirect = $this->getRedirectBySourcePath($source_path);
        if (!$existing_redirect) {
          // Create the redirect.
          $redirect = Redirect::create();
          $redirect->setSource($source_path, $source_query);
          $redirect->setRedirect($redirect_url, $redirect_query, $redirect_fragment);
          $redirect->setStatusCode($item->status_code ?: $default_status_code);
          if ($item->status) {
            $redirect->setPublished();
          }
          else {
            // @todo Should we create disabled redirects?
            $redirect->setUnpublished();
          }
          $redirect->save();
          $stats['created']++;
        }
        else {
          $stats['duplicated']++;
        }
      }

      // Next batch.
      $offset += $limit;
      $batch = $query->range($offset, $limit)->execute()->fetchAll();
      $this->io()->progressAdvance($limit);

      // Try to reduce memory accumulating.
      $this->memoryCache->deleteAll();
    }
    $this->io()->progressFinish();

    if (!empty($stats['errors'])) {
      $this->io()->warning(\dt("There were @count error redirects. Showing them below (up to 50):\n@errors\n\n", [
        '@count' => count($stats['errors']),
        '@errors' => print_r(array_slice($stats['errors'], 0, 50), TRUE),
      ]));
    }

    $this->io()->info(\dt("\n- Created: @created\n- Duplicated: @duplicated\n- Errors: @errors\n", [
      '@created' => $stats['created'],
      '@duplicated' => $stats['duplicated'],
      '@errors' => count($stats['errors']),
    ]));
  }

  /**
   * Sync the aliases from D7.
   *
   * Aliases can be modified and there is no timestamp, so we need to go
   * through all of them. Also, aliases can be set in D7 and modern
   * Drupal, so we cannot compare between instances with a diff query.
   */
  protected function syncAliases() {
    $log_not_found = FALSE;
    $d7_connection = Database::getConnection('default', 'migrate');

    $query = $d7_connection->select('url_alias', 'a');
    $orGroup = $query->orConditionGroup();
    $orGroup->condition('source', 'node/%', 'LIKE');
    $orGroup->condition('source', 'user/%', 'LIKE');
    $query->condition($orGroup);
    // Issue aliases will be redirects.
    $query->condition('alias', 'project/%/issues/%', 'NOT LIKE');
    // Ignore other aliases we are not processing yet.
    $query->condition('alias', '%/releases', 'NOT LIKE');
    $query->condition('alias', '%/testing-status', 'NOT LIKE');
    $query->condition('alias', '%/cvs-instructions', 'NOT LIKE');
    $query->condition('alias', '%/git-instructions', 'NOT LIKE');
    $query->fields('a', ['source', 'alias']);

    $count = $query->countQuery()->execute()->fetchField();
    $this->io()->note(\dt('@count (user and node) aliases will be synced from D7.', [
      '@count' => $count,
    ]));

    // Stats.
    $stats = [
      'created' => 0,
      'not_found' => 0,
      'conflict' => 0,
      'identical' => 0,
      'ignored' => [],
    ];
    $conflicts = [];

    // First batch.
    $offset = 0;
    $limit = 5000;
    $batch = $query->range($offset, $limit)->execute()->fetchAll();
    $this->io()->progressStart($count);
    while (!empty($batch)) {
      // Process the batch loading the entities with a matching id.
      $batch_data = $this->loadEntitiesFromBatch($batch);
      $stats['ignored'] += $batch_data['ignored'];
      foreach (['user', 'node'] as $entity_type) {
        // Fill up the not found stats.
        if (!empty($batch_data['not_found'][$entity_type])) {
          $stats['not_found'] += count($batch_data['not_found'][$entity_type]);
          $log_not_found && $this->logger()->warning('@type aliases not found on the new system: @ids', [
            '@ids' => implode(',', $batch_data['not_found'][$entity_type]),
            '@type' => $entity_type,
          ]);
        }

        // Loop through the ones that had a matching entity.
        foreach ($batch_data[$entity_type] as $entity) {
          /** @var \Drupal\Core\Entity\EntityInterface $entity */
          $source = $entity_type . '/' . $entity->id();
          $alias = $batch_data['alias_map'][$entity_type][$entity->id()];
          $modern_alias = $this->aliasManager->getAliasByPath('/' . $source);
          if ($modern_alias === '/' . $source) {
            // We recognize the route in the system, create the alias.
            $path_alias = PathAlias::create([
              'path' => '/' . $source,
              'alias' => '/' . $alias,
            ]);
            $path_alias->save();
            $stats['created']++;
          }
          elseif ($modern_alias !== '/' . $alias) {
            // We have it and it's different.
            $stats['conflict']++;

            // Modify the modern alias to match the old alias.
            $langcode = $this->languageManager->getCurrentLanguage(LanguageInterface::TYPE_URL)->getId();
            $existing_alias = $this->aliasRepository->lookupByAlias($modern_alias, $langcode);

            // Check if there is a redirect like the one that would be created
            // automatically to avoid a "Duplicate entry" error.
            $modern_path_redirect = $this->getRedirectBySourcePath($modern_alias, FALSE);
            if ($modern_path_redirect && $modern_path_redirect->enabled) {
              // If the existing redirect points at the same path, we'll
              // consider that a duplicate instead of a conflict.
              if ($modern_path_redirect->redirect_redirect__uri === 'internal:' . $existing_alias['path']) {
                $stats['conflict']--;
                $stats['identical']++;
              }
              else {
                // Redirect is enabled, we cannot override the alias.
                // Continue reporting the conflict and do nothing else.
                $conflicts[] = [
                  'modern' => $modern_alias,
                  'old' => '/' . $alias,
                  'action' => 'none',
                ];
              }
            }
            else {
              if ($modern_path_redirect) {
                // Redirect disabled: remove it so we can create the new one.
                redirect_repository()->load($modern_path_redirect->rid)->delete();
              }

              // Check if we have redirects linked to the D7 alias matching
              // the same node.
              $existing_redirect = $this->getRedirectBySourcePath($alias, FALSE);
              if (
                $existing_alias &&
                $existing_redirect &&
                $existing_redirect->redirect_redirect__uri === 'internal:' . $existing_alias['path']
              ) {
                // Remove the redirect to avoid a redirect loop.
                redirect_repository()->load($existing_redirect->rid)->delete();
              }

              /** @var \Drupal\path_alias\PathAliasInterface $existing_alias */
              $existing_alias = $this->entityTypeManager->getStorage('path_alias')->load($existing_alias['id']);
              $existing_alias->setAlias('/' . $alias);
              // Saving the new alias will create a redirect from old to new.
              $existing_alias->save();

              $conflicts[] = [
                'modern' => $modern_alias,
                'old' => '/' . $alias,
                'action' => 'overwritten',
              ];
            }
          }
          else {
            // Otherwise we have it and it is already identical.
            $stats['identical']++;
          }
        }
      }

      // Next batch.
      $offset += $limit;
      $batch = $query->range($offset, $limit)->execute()->fetchAll();
      $this->io()->progressAdvance($limit);

      // Try to reduce memory accumulating.
      $this->memoryCache->deleteAll();
      $this->aliasManager->cacheClear();
    }
    $this->io()->progressFinish();

    if (!empty($conflicts)) {
      $this->io()->warning(\dt("There were @count conflicts. Showing them below (up to 50):\n@conflicts\n\n", [
        '@count' => count($conflicts),
        '@conflicts' => print_r(array_slice($conflicts, 0, 50), TRUE),
      ]));
    }

    if (!empty($stats['ignored'])) {
      $this->io()->warning(\dt("There were @count ignored aliases. Showing them below (up to 50):\n@ignored\n\n", [
        '@count' => count($stats['ignored']),
        '@ignored' => print_r(array_slice($stats['ignored'], 0, 50), TRUE),
      ]));
    }

    $this->io()->info(\dt("\n- Created: @created\n- Not found: @not_found\n- Conflict: @conflict\n- Identical: @identical\n- Ignored: @ignored\n", [
      '@created' => $stats['created'],
      '@not_found' => $stats['not_found'],
      '@conflict' => $stats['conflict'],
      '@identical' => $stats['identical'],
      '@ignored' => count($stats['ignored']),
    ]));
  }

  /**
   * Do a quick pass to load the entities in bulk.
   *
   * This is done so we don't need to load one by one when processing the batch.
   */
  protected function loadEntitiesFromBatch(array $batch): array {
    $entities = [
      'user' => [],
      'node' => [],
    ];
    $aliases = [
      'user' => [],
      'node' => [],
    ];
    $ignored = [];
    // This loop is really quick and barely has any processing.
    foreach ($batch as $d7_alias) {
      $parts = explode('/', $d7_alias->source);
      if (count($parts) === 2) {
        $id = (int) $parts[1];
        $entity_type = $parts[0];
        $entities[$entity_type][] = $id;
        $aliases[$entity_type][$id] = $d7_alias->alias;
      }
      else {
        $ignored[$d7_alias->source] = $d7_alias->alias;
      }
    }

    // Format the data.
    $data = [
      'user' => $this->entityTypeManager->getStorage('user')->loadMultiple($entities['user']),
      'node' => $this->entityTypeManager->getStorage('node')->loadMultiple($entities['node']),
      'alias_map' => $aliases,
      'ignored' => $ignored,
      'not_found' => [
        'user' => [],
        'node' => [],
      ],
    ];
    // Check the ones that are not found.
    $diff = array_diff($entities['node'], array_keys($data['node']));
    $data['not_found']['node'] = $diff;
    $diff = array_diff($entities['user'], array_keys($data['user']));
    $data['not_found']['user'] = $diff;

    return $data;
  }

  /**
   * Remove user aliases and redirects in modern Drupal programmatically.
   */
  #[CLI\Command(name: 'drupalorg_migrate:remove-user-redirects')]
  public function removeUserRedirects() {
    // Remove all redirects that start with `/u/` for users.
    // Aliases are stored on a different table.
    $database = Database::getConnection();
    $base_query = $database->select('redirect')
      ->condition('redirect_source__path', 'u/%', 'LIKE')
      ->condition('redirect_redirect__uri', 'internal:/user/%', 'LIKE');
    $count = $base_query
      ->countQuery()
      ->execute()
      ->fetchField();
    if ($count && $this->io()->confirm(\dt('Do you want to remove @count redirects?', [
      '@count' => $count,
    ]))) {
      // See "redirect_delete_by_path".
      $base_query->addField('redirect', 'rid');
      $redirect_rids = $base_query
        ->execute()
        ->fetchCol();

      if ($redirect_rids) {
        $total_chunks = (int) ($count / 100) + 1;
        $done = 0;
        foreach (array_chunk($redirect_rids, 100) as $rids) {
          foreach (redirect_repository()->loadMultiple($rids) as $redirect) {
            $redirect->delete();
          }
          $done++;
          $this->io()->note(\dt('Deleted @done / @total chunks.', [
            '@done' => $done,
            '@total' => $total_chunks,
          ]));
        }
      }
    }
  }

  /**
   * Sync user organizations bypassing migrations.
   */
  #[CLI\Command(name: 'drupalorg_migrate:sync-user-organizations')]
  #[CLI\Option(name: 'uids', description: 'User IDs to sync.')]
  #[CLI\Option(name: 'force', description: 'Force a full sync.')]
  public function syncUserOrganizations(
    $options = [
      'uids' => NULL,
      'force' => NULL,
    ],
  ) {
    $force = (bool) $options['force'];
    $uids = [];
    if (!empty($options['uids'])) {
      $uids = explode(',', $options['uids']);
    }
    if (!empty($uids)) {
      foreach ($uids as $uid) {
        $this->io()->info(\dt('Processing UID @uid.', [
          '@uid' => $uid,
        ]));

        $user = User::load($uid);
        if ($user instanceof UserInterface) {
          $results = $this->syncUserOrganizationsFromD7($user, $force);
          if (
            $results['updated'] ||
            $results['added'] ||
            $results['deleted']
          ) {
            $this->io()->note(print_r($results, TRUE));
          }
        }
        else {
          $this->io()->warning(\dt('UID @uid not found on the new system.', [
            '@uid' => $uid,
          ]));
        }

        // Try to reduce memory accumulating.
        $this->memoryCache->deleteAll();
      }
    }
  }

  /**
   * Sync the user organizations linked to the given user to match those on D7.
   *
   * @todo Once the users are managed via modern Drupal we will not need this.
   *
   * @param \Drupal\user\UserInterface $user
   *   User to sync.
   * @param bool $force
   *   Force a full reset of the entries.
   *
   * @return array
   *   Stats about added, removed paragraphs.
   */
  protected function syncUserOrganizationsFromD7(UserInterface $user, bool $force = FALSE) {
    // Fetch info from D7.
    $d7_connection = Database::getConnection('default', 'migrate');
    $query = $d7_connection->select('users', 'u');
    $query->leftJoin('field_data_field_organizations', 'fdf_o', 'fdf_o.entity_id = u.uid');
    $query->leftJoin('field_data_field_organization_reference', 'fdf_or', 'fdf_or.entity_id = fdf_o.field_organizations_value');
    $query->leftJoin('node', 'n', 'fdf_or.field_organization_reference_target_id = n.nid AND n.type = :type', [':type' => 'organization']);
    $query->leftJoin('field_data_field_current', 'fdf_c', 'fdf_c.entity_id = fdf_o.field_organizations_value');
    $query->leftJoin('field_data_field_job_title', 'fdf_jt', 'fdf_jt.entity_id = fdf_o.field_organizations_value');
    $query->leftJoin('field_data_field_organization_name', 'fdf_on', 'fdf_on.entity_id = fdf_o.field_organizations_value');
    $query->condition('u.uid', $user->id());
    $query->fields('u', ['uid', 'name']);
    $query->fields('fdf_on', ['field_organization_name_value']);
    $query->fields('n', ['nid']);
    $query->fields('fdf_c', ['field_current_value']);
    $query->fields('fdf_jt', ['field_job_title_value']);
    $d7_user_organizations = $query->execute()->fetchAll();

    // Organize by Org name.
    $user_orgs = [];
    foreach ($d7_user_organizations as $d7_user_organization) {
      $user_orgs[$d7_user_organization->field_organization_name_value] = $d7_user_organization;
    }

    // Get D10 information.
    $user_organizations = $user->get('field_user_organizations')->referencedEntities();

    // Find the differences.
    $to_stay = [];
    $to_delete = [];

    // D10 paragraphs loop. Check the ones that need to stay or go.
    foreach ($user_organizations as $user_organization) {
      /** @var \Drupal\Paragraphs\Entity\Paragraph $user_organization */
      if ($user_organization->bundle() !== 'organizations') {
        // Fix the data mismatch.
        $to_delete[] = $user_organization;
      }
      else {
        $org_name = $user_organization->get('field_organization_name')->getValue()[0]['value'] ?? NULL;
        if (!$force && $org_name && isset($user_orgs[$org_name])) {
          $to_stay[$org_name] = $user_organization;
        }
        else {
          // Linked but shouldn't be there.
          $to_delete[] = $user_organization;
        }
      }
    }

    $added = 0;
    if ($force) {
      // Add the D7 ones.
      foreach ($d7_user_organizations as $data) {
        $paragraph = $this->createParagraph($data);
        $to_stay[] = $paragraph;
        $added++;
      }
    }
    else {
      // Is there any missing from D7.
      $diff = array_diff(
        array_keys($user_orgs),
        array_keys($to_stay),
      );
      if (!empty($diff)) {
        foreach ($diff as $org_name) {
          $data = $user_orgs[$org_name];
          if ($data->field_organization_name_value === $org_name) {
            $paragraph = $this->createParagraph($data);
            $to_stay[] = $paragraph;
            $added++;
          }
        }
      }
    }

    // Update record.
    $updated = FALSE;
    if ($added > 0) {
      $user->set('field_user_organizations', array_values($to_stay));
      $user->save();
      $updated = TRUE;
    }

    // Final clean up.
    if (!empty($to_delete)) {
      foreach ($to_delete as $item) {
        /** @var \Drupal\Paragraphs\Entity\Paragraph $item */
        $item->delete();
      }
    }

    return [
      'deleted' => count($to_delete),
      'added' => $added,
      'd7_total' => count($user_orgs),
      'before_total' => count($user_organizations),
      'after_total' => count($to_stay),
      'updated' => $updated,
    ];
  }

  /**
   * Check users deleted in D7 and not in D10.
   */
  #[CLI\Command(name: 'drupalorg_migrate:sync-users')]
  public function syncUsers() {
    $this->deleteEntitiesNotComingFromMigration('users');
  }

  /**
   * Check projects deleted in D7 and not in D10.
   */
  #[CLI\Command(name: 'drupalorg_migrate:sync-projects')]
  public function syncProjects() {
    $this->deleteEntitiesNotComingFromMigration('node');
  }

  /**
   * Check entities in D7 and not in D10.
   */
  #[CLI\Command(name: 'drupalorg_migrate:check-entities')]
  public function checkEntities() {
    $this->checkEntitiesNotMigrated('users');
    $this->checkEntitiesNotMigrated('node');
  }

  /**
   * Checks for records removed in D7 and propagates the removal to D10.
   *
   * The diff file comes from "check-project-maintainers.sh".
   */
  #[CLI\Command(name: 'drupalorg_migrate:sync-projects-maintainers')]
  #[CLI\Option(name: 'diff-file', description: 'Diff file with mismatches.')]
  #[CLI\Option(name: 'force', description: 'Force a full sync.')]
  public function syncProjectMaintainers(
    $options = [
      'diff-file' => NULL,
      'force' => NULL,
    ],
  ) {
    $force = (bool) $options['force'];
    $diff_file = $options['diff-file'];
    if (file_exists($diff_file)) {
      $this->io()->note('Checking diff file...');
      $handle = @fopen($diff_file, 'r');
      if (!$handle) {
        $this->io()->error('Failed to open diff file');
        return;
      }

      // Process file first.
      $reported_rows = [];
      while (($current_row = fgetcsv($handle, 0, "\t", '"', "\\")) !== FALSE) {
        if ($this->discardDiffRow($current_row, 6)) {
          continue;
        }

        // Only keep the ones on modern Drupal (+) not on old Drupal (-).
        $int_value = (int) $current_row[0];
        if ($int_value > 0) {
          $current_row[0] = abs($int_value);
          $reported_rows[] = $current_row;
        }
      }
      fclose($handle);

      // And then remove the entries that shouldn't be there.
      if (!empty($reported_rows)) {
        $count = count($reported_rows);
        if (
          ($count < 50  || $force) &&
          $this->io()->confirm(dt('Do you want to delete @count entries from drupalorg_project_maintainer?', [
            '@count' => $count,
          ]))
        ) {
          $connection = Database::getConnection();
          $d7_connection = Database::getConnection('default', 'migrate');
          foreach ($reported_rows as $reported_row) {
            $nid = (int) $reported_row[0];
            $uid = (int) $reported_row[1];

            // Check first to see that there is no record on D7.
            $count = $d7_connection->select('project_maintainer')
              ->condition('nid', $nid)
              ->condition('uid', $uid)
              ->countQuery()
              ->execute()
              ->fetchField();
            if ($count) {
              // The record should be here, but maybe the flags are different.
              // The next migration will fix it.
              $this->io()->note(\dt('Ignoring NID: @nid / UID: @uid from drupalorg_project_maintainer', [
                '@nid' => $nid,
                '@uid' => $uid,
              ]));
            }
            else {
              $this->io()->note(\dt('Removing NID: @nid / UID: @uid from drupalorg_project_maintainer', [
                '@nid' => $nid,
                '@uid' => $uid,
              ]));
              $connection->delete('drupalorg_project_maintainer')
                ->condition('nid', $nid)
                ->condition('uid', $uid)
                ->execute();
            }
          }
        }
        elseif ($count >= 50) {
          $this->io()->warning(\dt('Too many rows (@count - max expected 50), review the migration first.', [
            '@count' => $count,
          ]));
        }
      }
      $this->io()->note('Diff file processed');
    }
  }

  /**
   * Whether to process rows from the diff file or not.
   *
   * @param array $row
   *   Row from the diff file.
   * @param int $columns
   *   Number of columns in the row.
   *
   * @return bool
   *   Whether we should discard this row or not.
   */
  protected function discardDiffRow($row, $columns) {
    // Discard non-diff lines.
    $discard_these = ['---', '+++', '@@', ' '];
    $as_string = implode(' ', $row);
    foreach ($discard_these as $value) {
      if (str_starts_with($as_string, $value)) {
        return TRUE;
      }
    }

    return (count($row) !== $columns);
  }

  /**
   * Check entities that are not migrated.
   *
   * @param string $type
   *   Type of entities to check.
   */
  protected function checkEntitiesNotMigrated($type) {
    $bundles = [];
    if ($type === 'node') {
      $bundles = self::NODE_TYPES_TO_CHECK;
    }
    elseif ($type === 'users') {
      // Special case as the user has no bundles.
      $bundles = [NULL];
    }
    foreach ($bundles as $entity_type) {
      $this->io()->info(\dt('Checking @type (@entity_type) entities migration status.', [
        '@type' => $type,
        '@entity_type' => $entity_type,
      ]));
      $entities_not_migrated = $this->entitiesFromFirstDatabaseNotInSecondDatabase('migrate', 'default', $type, $entity_type);
      if (empty($entities_not_migrated)) {
        $this->io()->success(\dt('Could not find any @type (@entity_type) entity not migrated.', [
          '@type' => $type,
          '@entity_type' => $entity_type,
        ]));
      }
      else {
        $count_entities_not_migrated = count($entities_not_migrated);
        $entity_ids_as_string = $this->arrayToStringTruncated($entities_not_migrated);
        // Up to 5 is normally ok as some migrations can take hours.
        if ($count_entities_not_migrated > 5) {
          $this->io()->warning(\dt('@count @type (@entity_type) entities are not migrated, with the following IDs: @ids', [
            '@count' => $count_entities_not_migrated,
            '@type' => $type,
            '@entity_type' => $entity_type,
            '@ids' => $entity_ids_as_string,
          ]));
        }
      }
    }
  }

  /**
   * Remove entities that are not coming from migrations.
   *
   * @param string $type
   *   Type of entities to check.
   */
  protected function deleteEntitiesNotComingFromMigration($type) {
    $bundles = [];
    if ($type === 'node') {
      $bundles = self::NODE_TYPES_TO_CHECK;
    }
    elseif ($type === 'users') {
      // Special case as the user has no bundles.
      $bundles = [NULL];
    }
    foreach ($bundles as $entity_type) {
      $this->io()->info(\dt('Checking @type (@entity_type) that are not coming from a migration.', [
        '@type' => $type,
        '@entity_type' => $entity_type,
      ]));
      $extra_entities = $this->entitiesFromFirstDatabaseNotInSecondDatabase('default', 'migrate', $type, $entity_type);
      if (empty($extra_entities)) {
        $this->io()->success(\dt('Could not find any @type (@entity_type) to delete.', [
          '@type' => $type,
          '@entity_type' => $entity_type,
        ]));
      }
      else {
        $count_entities_to_remove = count($extra_entities);
        $entities_to_remove = $this->arrayToStringTruncated($extra_entities);
        $warning = \dt('@count @type (@entity_type) entities will be deleted, with the following IDs: @ids', [
          '@type' => $type,
          '@entity_type' => $entity_type,
          '@count' => $count_entities_to_remove,
          '@ids' => $entities_to_remove,
        ]);
        $this->io()->warning($warning);

        if (
          $count_entities_to_remove &&
          $this->io()->confirm(dt('Do you want to delete the @type (@entity_type) entities?', [
            '@type' => $type,
            '@entity_type' => $entity_type,
          ]))
        ) {
          $this->logger()->warning($warning);
          if ($count_entities_to_remove > 200) {
            // Do not delete: there is something very wrong with the data.
            $error = dt('Number of entities to be deleted (@count) exceeded the limit (@limit)', [
              '@count' => $count_entities_to_remove,
              '@limit' => 200,
            ]);
            $this->io()->error($error);
            $this->logger()->error($error);
          }
          else {
            foreach ($extra_entities as $extra_entity_id) {
              if ($type === 'users') {
                $this->cancelAccount($extra_entity_id);
              }
              elseif ($type === 'node') {
                $this->deleteNode($extra_entity_id);
              }
            }
          }
        }
      }
    }
  }

  /**
   * Show array as string (comma separated) and truncate it if too long.
   *
   * @param array $array
   *   Array to convert to string.
   *
   * @return string
   *   String representation of the array.
   */
  protected function arrayToStringTruncated($array) {
    return (count($array) > 100) ?
      implode(', ', array_slice($array, 0, 100)) . '... (truncated)' :
      implode(', ', $array);
  }

  /**
   * Find entity ids present in a database and not in another.
   *
   * @param string $first_database
   *   Key of the database.
   * @param string $second_database
   *   Key of the database.
   * @param string $entity_type_id
   *   Type of the entity.
   * @param string $bundle
   *   (Optional) Bundle of the entity.
   *
   * @return array
   *   List of entity ids.
   */
  protected function entitiesFromFirstDatabaseNotInSecondDatabase($first_database, $second_database, $entity_type_id, $bundle = NULL) {
    $count_entities = $this->getEntityCounts($first_database, $entity_type_id, $bundle);
    $length = self::BATCH_LENGTH;
    $offset = 0;
    $extra_entities = [];

    $ids = $this->getNextEntityBatch($offset, $length, $first_database, $entity_type_id, $bundle);
    $this->io()->progressStart($count_entities);
    while (!empty($ids)) {
      $ids_not_in_database = $this->entityIdsNotInDatabase($ids, $second_database, $entity_type_id, $bundle);
      $extra_entities = array_merge($extra_entities, $ids_not_in_database);

      $this->io()->progressAdvance($length);

      $offset += $length;
      $ids = $this->getNextEntityBatch($offset, $length, $first_database, $entity_type_id, $bundle);
    }
    $this->io()->progressFinish();

    return $extra_entities;
  }

  /**
   * Deletes a node.
   *
   * @param int $nid
   *   The node id.
   */
  protected function deleteNode($nid) {
    $node = Node::load($nid);
    if ($node) {
      $node->delete();
      $this->io()->info(dt('Node @nid deleted.', [
        '@nid' => $nid,
      ]));
    }
  }

  /**
   * Cancels a user account.
   *
   * @param int $uid
   *   User id.
   */
  protected function cancelAccount($uid) {
    // No need for extra logging as the user_cancel function does it.
    user_cancel([], $uid, 'user_cancel_reassign');
    $batch =& batch_get();
    $batch['progressive'] = FALSE;
    // Batch it the drush way, instead of batch_process().
    drush_backend_batch_process();

    $this->logger()->warning(\dt('Deleting user @uid via sync-users drush command.', [
      '@uid' => $uid,
    ]));
  }

  /**
   * Gets a count of entity table on the given database for a given type.
   *
   * @param string $database
   *   Database to check.
   * @param string $entity_type_id
   *   Type of the entity.
   * @param string $bundle
   *   (Optional) Type of the node.
   *
   * @return int
   *   Count of entities.
   */
  protected function getEntityCounts($database, $entity_type_id, $bundle = NULL) {
    $connection = Database::getConnection('default', $database);
    $query = $connection->select($entity_type_id, 'e');
    if (!is_null($bundle)) {
      $query->condition('e.type', $bundle);
    }
    return $query
      ->countQuery()
      ->execute()
      ->fetchField();
  }

  /**
   * Get a batch of entity ids to check.
   *
   * @param int $offset
   *   Offset of the records to get.
   * @param int $length
   *   Length of the records to get.
   * @param string $database
   *   Database to check.
   * @param string $entity_type_id
   *   Type of the entity.
   * @param string $bundle
   *   (Optional) Bundle types to bring.
   *
   * @return array
   *   Array of entity ids.
   */
  protected function getNextEntityBatch($offset, $length, $database, $entity_type_id, $bundle = NULL) {
    $connection = Database::getConnection('default', $database);
    $id_column = $this->getIdColumn($entity_type_id);
    $query = $connection
      ->select($entity_type_id, 'e')
      ->fields('e', [$id_column])
      ->range($offset, $length)
      ->orderBy("e.$id_column");
    if (!is_null($bundle)) {
      $query->condition('e.type', $bundle);
    }
    return $query
      ->execute()
      ->fetchCol();
  }

  /**
   * Return the ID column depending on the entity type.
   *
   * @param string $type
   *   Type of the entity.
   *
   * @return string
   *   ID column.
   */
  protected function getIdColumn($type) {
    return ($type === 'users') ? 'uid' : (($type === 'node') ? 'nid' : 'id');
  }

  /**
   * Check which entity ids are not in a database.
   *
   * @param array $ids
   *   List of entity ids to check.
   * @param string $database
   *   Database to check.
   * @param string $entity_type_id
   *   Type of the entities.
   * @param string $bundle
   *   (Optional) Bundle of the entity type.
   *
   * @return array
   *   Array of entity ids not present in the given database.
   */
  protected function entityIdsNotInDatabase($ids, $database, $entity_type_id, $bundle = NULL) {
    $connection = Database::getConnection('default', $database);
    $id_column = $this->getIdColumn($entity_type_id);
    $query = $connection
      ->select($entity_type_id, 'e')
      ->fields('e', [$id_column])
      ->condition("e.$id_column", $ids, 'IN')
      ->orderBy("e.$id_column");
    if (!is_null($bundle)) {
      $query->condition('e.type', $bundle);
    }
    $matched_ids = $query
      ->execute()
      ->fetchCol();

    return array_diff($ids, $matched_ids);
  }

  /**
   * Creates a new paragraph with the org data.
   *
   * @param mixed $data
   *   Data to add.
   *
   * @return \Drupal\paragraphs\ParagraphInterface
   *   Created paragraph.
   */
  protected function createParagraph(mixed $data): ParagraphInterface {
    $paragraph_data = [
      'type' => 'organizations',
      'field_current' => (bool) ($data->field_current_value ?? FALSE),
      'field_job_title' => $data->field_job_title_value ?? '',
      'field_organization_name' => $data->field_organization_name_value,
    ];
    if ($data->nid) {
      $org_node = Node::load($data->nid);
      if ($org_node && $org_node->bundle() === 'organization') {
        $paragraph_data['field_organization_reference'] = ['target_id' => $data->nid];
      }
    }
    $paragraph = $this->entityTypeManager->getStorage('paragraph')->create($paragraph_data);
    $paragraph->save();

    return $paragraph;
  }

}

Главная | Обратная связь

drupal hosting | друпал хостинг | it patrol .inc