drupalorg_migrate-1.0.x-dev/src/Drush/Commands/DrushCommands.php
src/Drush/Commands/DrushCommands.php
<?php
namespace Drupal\drupalorg_migrate\Drush\Commands;
use Drupal\Core\Cache\MemoryCache\MemoryCache;
use Drupal\Core\Config\ConfigFactory;
use Drupal\Core\Database\Database;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Language\LanguageInterface;
use Drupal\Core\Language\LanguageManagerInterface;
use Drupal\Core\Queue\QueueFactory;
use Drupal\drupalorg\ProjectService;
use Drupal\node\Entity\Node;
use Drupal\paragraphs\ParagraphInterface;
use Drupal\path_alias\AliasManager;
use Drupal\path_alias\AliasRepositoryInterface;
use Drupal\path_alias\Entity\PathAlias;
use Drupal\redirect\Entity\Redirect;
use Drupal\user\Entity\User;
use Drupal\user\UserInterface;
use Drush\Attributes as CLI;
use Drush\Commands\AutowireTrait;
use Drush\Commands\DrushCommands as BaseDrushCommands;
use Symfony\Component\DependencyInjection\Attribute\Autowire;
/**
* A drush command file.
*
* @package Drupal\drupalorg_migrate\Commands
*/
final class DrushCommands extends BaseDrushCommands {
use AutowireTrait;
/**
* Size of the batch to use.
*
* Memory usage around 60MB for 100000.
*
* @var int
*/
const BATCH_LENGTH = 50000;
/**
* Node types to check in addition to project types.
*
* @var array
*/
const NODE_TYPES_TO_CHECK = [
...ProjectService::PROJECT_TYPES,
'project_release',
// 'casestudy',
];
/**
* {@inheritdoc}
*/
public function __construct(
#[Autowire(service: 'entity_type.manager')]
protected EntityTypeManagerInterface $entityTypeManager,
#[Autowire(service: 'entity.memory_cache')]
protected MemoryCache $memoryCache,
#[Autowire(service: 'queue')]
protected QueueFactory $queueFactory,
#[Autowire(service: 'path_alias.manager')]
protected AliasManager $aliasManager,
#[Autowire(service: 'path_alias.repository')]
protected AliasRepositoryInterface $aliasRepository,
#[Autowire(service: 'config.factory')]
protected ConfigFactory $configFactory,
#[Autowire(service: 'language_manager')]
protected LanguageManagerInterface $languageManager,
) {
parent::__construct();
}
/**
* Sync aliases and redirects from D7 to modern Drupal programmatically.
*/
#[CLI\Command(name: 'drupalorg_migrate:sync-aliases-and-redirects')]
public function syncAliasesAndRedirects() {
if ($this->io()->confirm(\dt('Do you want to sync aliases and redirects?'))) {
$this->syncRedirects();
$this->syncAliases();
}
}
/**
* Returns a redirect entry by source path.
*
* @param string $source_path
* Path to search.
* @param bool $count
* Do count query instead.
* @param bool $only_enabled
* Include only enabled redirects.
*
* @return mixed
* A single field from the next record, or FALSE if there is no next record.
*/
protected function getRedirectBySourcePath(string $source_path, bool $count = TRUE, bool $only_enabled = FALSE) {
$source_path = trim($source_path, '/');
$query = Database::getConnection()
->select('redirect', 'r')
->condition('redirect_source__path', $source_path);
if ($only_enabled) {
$query->condition('enabled', 1);
}
if ($count) {
return $query->countQuery()->execute()->fetchField();
}
$query->fields('r');
return $query->execute()->fetch();
}
/**
* Sync redirects from D7.
*/
protected function syncRedirects() {
$d7_connection = Database::getConnection('default', 'migrate');
$query = $d7_connection->select('redirect', 'r');
$query->fields('r', [
'source',
'redirect',
'source_options',
'redirect_options',
'language',
'status_code',
'status',
]);
$count = $query->countQuery()->execute()->fetchField();
$this->io()->note(\dt('@count redirects will be synced from D7.', [
'@count' => $count,
]));
// Stats.
$stats = [
'created' => 0,
'duplicated' => 0,
'errors' => [],
];
$default_status_code = $this->configFactory->get('redirect.settings')->get('default_status_code');
// First batch.
$offset = 0;
$limit = 500;
$batch = $query->range($offset, $limit)->execute()->fetchAll();
$this->io()->progressStart($count);
while (!empty($batch)) {
foreach ($batch as $item) {
// Massage the data.
$redirect_url = str_starts_with($item->redirect, 'http') ? $item->redirect : '/' . $item->redirect;
$source_options = unserialize($item->source_options);
$redirect_options = unserialize($item->redirect_options);
$source_query = $source_options['query'] ?? [];
$redirect_query = $redirect_options['query'] ?? [];
$redirect_fragment = (!empty($source_options['fragment'])) ?
['fragment' => $source_options['fragment']] :
[];
$source_path = trim($item->source, '/');
$existing_redirect = $this->getRedirectBySourcePath($source_path);
if (!$existing_redirect) {
// Create the redirect.
$redirect = Redirect::create();
$redirect->setSource($source_path, $source_query);
$redirect->setRedirect($redirect_url, $redirect_query, $redirect_fragment);
$redirect->setStatusCode($item->status_code ?: $default_status_code);
if ($item->status) {
$redirect->setPublished();
}
else {
// @todo Should we create disabled redirects?
$redirect->setUnpublished();
}
$redirect->save();
$stats['created']++;
}
else {
$stats['duplicated']++;
}
}
// Next batch.
$offset += $limit;
$batch = $query->range($offset, $limit)->execute()->fetchAll();
$this->io()->progressAdvance($limit);
// Try to reduce memory accumulating.
$this->memoryCache->deleteAll();
}
$this->io()->progressFinish();
if (!empty($stats['errors'])) {
$this->io()->warning(\dt("There were @count error redirects. Showing them below (up to 50):\n@errors\n\n", [
'@count' => count($stats['errors']),
'@errors' => print_r(array_slice($stats['errors'], 0, 50), TRUE),
]));
}
$this->io()->info(\dt("\n- Created: @created\n- Duplicated: @duplicated\n- Errors: @errors\n", [
'@created' => $stats['created'],
'@duplicated' => $stats['duplicated'],
'@errors' => count($stats['errors']),
]));
}
/**
* Sync the aliases from D7.
*
* Aliases can be modified and there is no timestamp, so we need to go
* through all of them. Also, aliases can be set in D7 and modern
* Drupal, so we cannot compare between instances with a diff query.
*/
protected function syncAliases() {
$log_not_found = FALSE;
$d7_connection = Database::getConnection('default', 'migrate');
$query = $d7_connection->select('url_alias', 'a');
$orGroup = $query->orConditionGroup();
$orGroup->condition('source', 'node/%', 'LIKE');
$orGroup->condition('source', 'user/%', 'LIKE');
$query->condition($orGroup);
// Issue aliases will be redirects.
$query->condition('alias', 'project/%/issues/%', 'NOT LIKE');
// Ignore other aliases we are not processing yet.
$query->condition('alias', '%/releases', 'NOT LIKE');
$query->condition('alias', '%/testing-status', 'NOT LIKE');
$query->condition('alias', '%/cvs-instructions', 'NOT LIKE');
$query->condition('alias', '%/git-instructions', 'NOT LIKE');
$query->fields('a', ['source', 'alias']);
$count = $query->countQuery()->execute()->fetchField();
$this->io()->note(\dt('@count (user and node) aliases will be synced from D7.', [
'@count' => $count,
]));
// Stats.
$stats = [
'created' => 0,
'not_found' => 0,
'conflict' => 0,
'identical' => 0,
'ignored' => [],
];
$conflicts = [];
// First batch.
$offset = 0;
$limit = 5000;
$batch = $query->range($offset, $limit)->execute()->fetchAll();
$this->io()->progressStart($count);
while (!empty($batch)) {
// Process the batch loading the entities with a matching id.
$batch_data = $this->loadEntitiesFromBatch($batch);
$stats['ignored'] += $batch_data['ignored'];
foreach (['user', 'node'] as $entity_type) {
// Fill up the not found stats.
if (!empty($batch_data['not_found'][$entity_type])) {
$stats['not_found'] += count($batch_data['not_found'][$entity_type]);
$log_not_found && $this->logger()->warning('@type aliases not found on the new system: @ids', [
'@ids' => implode(',', $batch_data['not_found'][$entity_type]),
'@type' => $entity_type,
]);
}
// Loop through the ones that had a matching entity.
foreach ($batch_data[$entity_type] as $entity) {
/** @var \Drupal\Core\Entity\EntityInterface $entity */
$source = $entity_type . '/' . $entity->id();
$alias = $batch_data['alias_map'][$entity_type][$entity->id()];
$modern_alias = $this->aliasManager->getAliasByPath('/' . $source);
if ($modern_alias === '/' . $source) {
// We recognize the route in the system, create the alias.
$path_alias = PathAlias::create([
'path' => '/' . $source,
'alias' => '/' . $alias,
]);
$path_alias->save();
$stats['created']++;
}
elseif ($modern_alias !== '/' . $alias) {
// We have it and it's different.
$stats['conflict']++;
// Modify the modern alias to match the old alias.
$langcode = $this->languageManager->getCurrentLanguage(LanguageInterface::TYPE_URL)->getId();
$existing_alias = $this->aliasRepository->lookupByAlias($modern_alias, $langcode);
// Check if there is a redirect like the one that would be created
// automatically to avoid a "Duplicate entry" error.
$modern_path_redirect = $this->getRedirectBySourcePath($modern_alias, FALSE);
if ($modern_path_redirect && $modern_path_redirect->enabled) {
// If the existing redirect points at the same path, we'll
// consider that a duplicate instead of a conflict.
if ($modern_path_redirect->redirect_redirect__uri === 'internal:' . $existing_alias['path']) {
$stats['conflict']--;
$stats['identical']++;
}
else {
// Redirect is enabled, we cannot override the alias.
// Continue reporting the conflict and do nothing else.
$conflicts[] = [
'modern' => $modern_alias,
'old' => '/' . $alias,
'action' => 'none',
];
}
}
else {
if ($modern_path_redirect) {
// Redirect disabled: remove it so we can create the new one.
redirect_repository()->load($modern_path_redirect->rid)->delete();
}
// Check if we have redirects linked to the D7 alias matching
// the same node.
$existing_redirect = $this->getRedirectBySourcePath($alias, FALSE);
if (
$existing_alias &&
$existing_redirect &&
$existing_redirect->redirect_redirect__uri === 'internal:' . $existing_alias['path']
) {
// Remove the redirect to avoid a redirect loop.
redirect_repository()->load($existing_redirect->rid)->delete();
}
/** @var \Drupal\path_alias\PathAliasInterface $existing_alias */
$existing_alias = $this->entityTypeManager->getStorage('path_alias')->load($existing_alias['id']);
$existing_alias->setAlias('/' . $alias);
// Saving the new alias will create a redirect from old to new.
$existing_alias->save();
$conflicts[] = [
'modern' => $modern_alias,
'old' => '/' . $alias,
'action' => 'overwritten',
];
}
}
else {
// Otherwise we have it and it is already identical.
$stats['identical']++;
}
}
}
// Next batch.
$offset += $limit;
$batch = $query->range($offset, $limit)->execute()->fetchAll();
$this->io()->progressAdvance($limit);
// Try to reduce memory accumulating.
$this->memoryCache->deleteAll();
$this->aliasManager->cacheClear();
}
$this->io()->progressFinish();
if (!empty($conflicts)) {
$this->io()->warning(\dt("There were @count conflicts. Showing them below (up to 50):\n@conflicts\n\n", [
'@count' => count($conflicts),
'@conflicts' => print_r(array_slice($conflicts, 0, 50), TRUE),
]));
}
if (!empty($stats['ignored'])) {
$this->io()->warning(\dt("There were @count ignored aliases. Showing them below (up to 50):\n@ignored\n\n", [
'@count' => count($stats['ignored']),
'@ignored' => print_r(array_slice($stats['ignored'], 0, 50), TRUE),
]));
}
$this->io()->info(\dt("\n- Created: @created\n- Not found: @not_found\n- Conflict: @conflict\n- Identical: @identical\n- Ignored: @ignored\n", [
'@created' => $stats['created'],
'@not_found' => $stats['not_found'],
'@conflict' => $stats['conflict'],
'@identical' => $stats['identical'],
'@ignored' => count($stats['ignored']),
]));
}
/**
* Do a quick pass to load the entities in bulk.
*
* This is done so we don't need to load one by one when processing the batch.
*/
protected function loadEntitiesFromBatch(array $batch): array {
$entities = [
'user' => [],
'node' => [],
];
$aliases = [
'user' => [],
'node' => [],
];
$ignored = [];
// This loop is really quick and barely has any processing.
foreach ($batch as $d7_alias) {
$parts = explode('/', $d7_alias->source);
if (count($parts) === 2) {
$id = (int) $parts[1];
$entity_type = $parts[0];
$entities[$entity_type][] = $id;
$aliases[$entity_type][$id] = $d7_alias->alias;
}
else {
$ignored[$d7_alias->source] = $d7_alias->alias;
}
}
// Format the data.
$data = [
'user' => $this->entityTypeManager->getStorage('user')->loadMultiple($entities['user']),
'node' => $this->entityTypeManager->getStorage('node')->loadMultiple($entities['node']),
'alias_map' => $aliases,
'ignored' => $ignored,
'not_found' => [
'user' => [],
'node' => [],
],
];
// Check the ones that are not found.
$diff = array_diff($entities['node'], array_keys($data['node']));
$data['not_found']['node'] = $diff;
$diff = array_diff($entities['user'], array_keys($data['user']));
$data['not_found']['user'] = $diff;
return $data;
}
/**
* Remove user aliases and redirects in modern Drupal programmatically.
*/
#[CLI\Command(name: 'drupalorg_migrate:remove-user-redirects')]
public function removeUserRedirects() {
// Remove all redirects that start with `/u/` for users.
// Aliases are stored on a different table.
$database = Database::getConnection();
$base_query = $database->select('redirect')
->condition('redirect_source__path', 'u/%', 'LIKE')
->condition('redirect_redirect__uri', 'internal:/user/%', 'LIKE');
$count = $base_query
->countQuery()
->execute()
->fetchField();
if ($count && $this->io()->confirm(\dt('Do you want to remove @count redirects?', [
'@count' => $count,
]))) {
// See "redirect_delete_by_path".
$base_query->addField('redirect', 'rid');
$redirect_rids = $base_query
->execute()
->fetchCol();
if ($redirect_rids) {
$total_chunks = (int) ($count / 100) + 1;
$done = 0;
foreach (array_chunk($redirect_rids, 100) as $rids) {
foreach (redirect_repository()->loadMultiple($rids) as $redirect) {
$redirect->delete();
}
$done++;
$this->io()->note(\dt('Deleted @done / @total chunks.', [
'@done' => $done,
'@total' => $total_chunks,
]));
}
}
}
}
/**
* Sync user organizations bypassing migrations.
*/
#[CLI\Command(name: 'drupalorg_migrate:sync-user-organizations')]
#[CLI\Option(name: 'uids', description: 'User IDs to sync.')]
#[CLI\Option(name: 'force', description: 'Force a full sync.')]
public function syncUserOrganizations(
$options = [
'uids' => NULL,
'force' => NULL,
],
) {
$force = (bool) $options['force'];
$uids = [];
if (!empty($options['uids'])) {
$uids = explode(',', $options['uids']);
}
if (!empty($uids)) {
foreach ($uids as $uid) {
$this->io()->info(\dt('Processing UID @uid.', [
'@uid' => $uid,
]));
$user = User::load($uid);
if ($user instanceof UserInterface) {
$results = $this->syncUserOrganizationsFromD7($user, $force);
if (
$results['updated'] ||
$results['added'] ||
$results['deleted']
) {
$this->io()->note(print_r($results, TRUE));
}
}
else {
$this->io()->warning(\dt('UID @uid not found on the new system.', [
'@uid' => $uid,
]));
}
// Try to reduce memory accumulating.
$this->memoryCache->deleteAll();
}
}
}
/**
* Sync the user organizations linked to the given user to match those on D7.
*
* @todo Once the users are managed via modern Drupal we will not need this.
*
* @param \Drupal\user\UserInterface $user
* User to sync.
* @param bool $force
* Force a full reset of the entries.
*
* @return array
* Stats about added, removed paragraphs.
*/
protected function syncUserOrganizationsFromD7(UserInterface $user, bool $force = FALSE) {
// Fetch info from D7.
$d7_connection = Database::getConnection('default', 'migrate');
$query = $d7_connection->select('users', 'u');
$query->leftJoin('field_data_field_organizations', 'fdf_o', 'fdf_o.entity_id = u.uid');
$query->leftJoin('field_data_field_organization_reference', 'fdf_or', 'fdf_or.entity_id = fdf_o.field_organizations_value');
$query->leftJoin('node', 'n', 'fdf_or.field_organization_reference_target_id = n.nid AND n.type = :type', [':type' => 'organization']);
$query->leftJoin('field_data_field_current', 'fdf_c', 'fdf_c.entity_id = fdf_o.field_organizations_value');
$query->leftJoin('field_data_field_job_title', 'fdf_jt', 'fdf_jt.entity_id = fdf_o.field_organizations_value');
$query->leftJoin('field_data_field_organization_name', 'fdf_on', 'fdf_on.entity_id = fdf_o.field_organizations_value');
$query->condition('u.uid', $user->id());
$query->fields('u', ['uid', 'name']);
$query->fields('fdf_on', ['field_organization_name_value']);
$query->fields('n', ['nid']);
$query->fields('fdf_c', ['field_current_value']);
$query->fields('fdf_jt', ['field_job_title_value']);
$d7_user_organizations = $query->execute()->fetchAll();
// Organize by Org name.
$user_orgs = [];
foreach ($d7_user_organizations as $d7_user_organization) {
$user_orgs[$d7_user_organization->field_organization_name_value] = $d7_user_organization;
}
// Get D10 information.
$user_organizations = $user->get('field_user_organizations')->referencedEntities();
// Find the differences.
$to_stay = [];
$to_delete = [];
// D10 paragraphs loop. Check the ones that need to stay or go.
foreach ($user_organizations as $user_organization) {
/** @var \Drupal\Paragraphs\Entity\Paragraph $user_organization */
if ($user_organization->bundle() !== 'organizations') {
// Fix the data mismatch.
$to_delete[] = $user_organization;
}
else {
$org_name = $user_organization->get('field_organization_name')->getValue()[0]['value'] ?? NULL;
if (!$force && $org_name && isset($user_orgs[$org_name])) {
$to_stay[$org_name] = $user_organization;
}
else {
// Linked but shouldn't be there.
$to_delete[] = $user_organization;
}
}
}
$added = 0;
if ($force) {
// Add the D7 ones.
foreach ($d7_user_organizations as $data) {
$paragraph = $this->createParagraph($data);
$to_stay[] = $paragraph;
$added++;
}
}
else {
// Is there any missing from D7.
$diff = array_diff(
array_keys($user_orgs),
array_keys($to_stay),
);
if (!empty($diff)) {
foreach ($diff as $org_name) {
$data = $user_orgs[$org_name];
if ($data->field_organization_name_value === $org_name) {
$paragraph = $this->createParagraph($data);
$to_stay[] = $paragraph;
$added++;
}
}
}
}
// Update record.
$updated = FALSE;
if ($added > 0) {
$user->set('field_user_organizations', array_values($to_stay));
$user->save();
$updated = TRUE;
}
// Final clean up.
if (!empty($to_delete)) {
foreach ($to_delete as $item) {
/** @var \Drupal\Paragraphs\Entity\Paragraph $item */
$item->delete();
}
}
return [
'deleted' => count($to_delete),
'added' => $added,
'd7_total' => count($user_orgs),
'before_total' => count($user_organizations),
'after_total' => count($to_stay),
'updated' => $updated,
];
}
/**
* Check users deleted in D7 and not in D10.
*/
#[CLI\Command(name: 'drupalorg_migrate:sync-users')]
public function syncUsers() {
$this->deleteEntitiesNotComingFromMigration('users');
}
/**
* Check projects deleted in D7 and not in D10.
*/
#[CLI\Command(name: 'drupalorg_migrate:sync-projects')]
public function syncProjects() {
$this->deleteEntitiesNotComingFromMigration('node');
}
/**
* Check entities in D7 and not in D10.
*/
#[CLI\Command(name: 'drupalorg_migrate:check-entities')]
public function checkEntities() {
$this->checkEntitiesNotMigrated('users');
$this->checkEntitiesNotMigrated('node');
}
/**
* Checks for records removed in D7 and propagates the removal to D10.
*
* The diff file comes from "check-project-maintainers.sh".
*/
#[CLI\Command(name: 'drupalorg_migrate:sync-projects-maintainers')]
#[CLI\Option(name: 'diff-file', description: 'Diff file with mismatches.')]
#[CLI\Option(name: 'force', description: 'Force a full sync.')]
public function syncProjectMaintainers(
$options = [
'diff-file' => NULL,
'force' => NULL,
],
) {
$force = (bool) $options['force'];
$diff_file = $options['diff-file'];
if (file_exists($diff_file)) {
$this->io()->note('Checking diff file...');
$handle = @fopen($diff_file, 'r');
if (!$handle) {
$this->io()->error('Failed to open diff file');
return;
}
// Process file first.
$reported_rows = [];
while (($current_row = fgetcsv($handle, 0, "\t", '"', "\\")) !== FALSE) {
if ($this->discardDiffRow($current_row, 6)) {
continue;
}
// Only keep the ones on modern Drupal (+) not on old Drupal (-).
$int_value = (int) $current_row[0];
if ($int_value > 0) {
$current_row[0] = abs($int_value);
$reported_rows[] = $current_row;
}
}
fclose($handle);
// And then remove the entries that shouldn't be there.
if (!empty($reported_rows)) {
$count = count($reported_rows);
if (
($count < 50 || $force) &&
$this->io()->confirm(dt('Do you want to delete @count entries from drupalorg_project_maintainer?', [
'@count' => $count,
]))
) {
$connection = Database::getConnection();
$d7_connection = Database::getConnection('default', 'migrate');
foreach ($reported_rows as $reported_row) {
$nid = (int) $reported_row[0];
$uid = (int) $reported_row[1];
// Check first to see that there is no record on D7.
$count = $d7_connection->select('project_maintainer')
->condition('nid', $nid)
->condition('uid', $uid)
->countQuery()
->execute()
->fetchField();
if ($count) {
// The record should be here, but maybe the flags are different.
// The next migration will fix it.
$this->io()->note(\dt('Ignoring NID: @nid / UID: @uid from drupalorg_project_maintainer', [
'@nid' => $nid,
'@uid' => $uid,
]));
}
else {
$this->io()->note(\dt('Removing NID: @nid / UID: @uid from drupalorg_project_maintainer', [
'@nid' => $nid,
'@uid' => $uid,
]));
$connection->delete('drupalorg_project_maintainer')
->condition('nid', $nid)
->condition('uid', $uid)
->execute();
}
}
}
elseif ($count >= 50) {
$this->io()->warning(\dt('Too many rows (@count - max expected 50), review the migration first.', [
'@count' => $count,
]));
}
}
$this->io()->note('Diff file processed');
}
}
/**
* Whether to process rows from the diff file or not.
*
* @param array $row
* Row from the diff file.
* @param int $columns
* Number of columns in the row.
*
* @return bool
* Whether we should discard this row or not.
*/
protected function discardDiffRow($row, $columns) {
// Discard non-diff lines.
$discard_these = ['---', '+++', '@@', ' '];
$as_string = implode(' ', $row);
foreach ($discard_these as $value) {
if (str_starts_with($as_string, $value)) {
return TRUE;
}
}
return (count($row) !== $columns);
}
/**
* Check entities that are not migrated.
*
* @param string $type
* Type of entities to check.
*/
protected function checkEntitiesNotMigrated($type) {
$bundles = [];
if ($type === 'node') {
$bundles = self::NODE_TYPES_TO_CHECK;
}
elseif ($type === 'users') {
// Special case as the user has no bundles.
$bundles = [NULL];
}
foreach ($bundles as $entity_type) {
$this->io()->info(\dt('Checking @type (@entity_type) entities migration status.', [
'@type' => $type,
'@entity_type' => $entity_type,
]));
$entities_not_migrated = $this->entitiesFromFirstDatabaseNotInSecondDatabase('migrate', 'default', $type, $entity_type);
if (empty($entities_not_migrated)) {
$this->io()->success(\dt('Could not find any @type (@entity_type) entity not migrated.', [
'@type' => $type,
'@entity_type' => $entity_type,
]));
}
else {
$count_entities_not_migrated = count($entities_not_migrated);
$entity_ids_as_string = $this->arrayToStringTruncated($entities_not_migrated);
// Up to 5 is normally ok as some migrations can take hours.
if ($count_entities_not_migrated > 5) {
$this->io()->warning(\dt('@count @type (@entity_type) entities are not migrated, with the following IDs: @ids', [
'@count' => $count_entities_not_migrated,
'@type' => $type,
'@entity_type' => $entity_type,
'@ids' => $entity_ids_as_string,
]));
}
}
}
}
/**
* Remove entities that are not coming from migrations.
*
* @param string $type
* Type of entities to check.
*/
protected function deleteEntitiesNotComingFromMigration($type) {
$bundles = [];
if ($type === 'node') {
$bundles = self::NODE_TYPES_TO_CHECK;
}
elseif ($type === 'users') {
// Special case as the user has no bundles.
$bundles = [NULL];
}
foreach ($bundles as $entity_type) {
$this->io()->info(\dt('Checking @type (@entity_type) that are not coming from a migration.', [
'@type' => $type,
'@entity_type' => $entity_type,
]));
$extra_entities = $this->entitiesFromFirstDatabaseNotInSecondDatabase('default', 'migrate', $type, $entity_type);
if (empty($extra_entities)) {
$this->io()->success(\dt('Could not find any @type (@entity_type) to delete.', [
'@type' => $type,
'@entity_type' => $entity_type,
]));
}
else {
$count_entities_to_remove = count($extra_entities);
$entities_to_remove = $this->arrayToStringTruncated($extra_entities);
$warning = \dt('@count @type (@entity_type) entities will be deleted, with the following IDs: @ids', [
'@type' => $type,
'@entity_type' => $entity_type,
'@count' => $count_entities_to_remove,
'@ids' => $entities_to_remove,
]);
$this->io()->warning($warning);
if (
$count_entities_to_remove &&
$this->io()->confirm(dt('Do you want to delete the @type (@entity_type) entities?', [
'@type' => $type,
'@entity_type' => $entity_type,
]))
) {
$this->logger()->warning($warning);
if ($count_entities_to_remove > 200) {
// Do not delete: there is something very wrong with the data.
$error = dt('Number of entities to be deleted (@count) exceeded the limit (@limit)', [
'@count' => $count_entities_to_remove,
'@limit' => 200,
]);
$this->io()->error($error);
$this->logger()->error($error);
}
else {
foreach ($extra_entities as $extra_entity_id) {
if ($type === 'users') {
$this->cancelAccount($extra_entity_id);
}
elseif ($type === 'node') {
$this->deleteNode($extra_entity_id);
}
}
}
}
}
}
}
/**
* Show array as string (comma separated) and truncate it if too long.
*
* @param array $array
* Array to convert to string.
*
* @return string
* String representation of the array.
*/
protected function arrayToStringTruncated($array) {
return (count($array) > 100) ?
implode(', ', array_slice($array, 0, 100)) . '... (truncated)' :
implode(', ', $array);
}
/**
* Find entity ids present in a database and not in another.
*
* @param string $first_database
* Key of the database.
* @param string $second_database
* Key of the database.
* @param string $entity_type_id
* Type of the entity.
* @param string $bundle
* (Optional) Bundle of the entity.
*
* @return array
* List of entity ids.
*/
protected function entitiesFromFirstDatabaseNotInSecondDatabase($first_database, $second_database, $entity_type_id, $bundle = NULL) {
$count_entities = $this->getEntityCounts($first_database, $entity_type_id, $bundle);
$length = self::BATCH_LENGTH;
$offset = 0;
$extra_entities = [];
$ids = $this->getNextEntityBatch($offset, $length, $first_database, $entity_type_id, $bundle);
$this->io()->progressStart($count_entities);
while (!empty($ids)) {
$ids_not_in_database = $this->entityIdsNotInDatabase($ids, $second_database, $entity_type_id, $bundle);
$extra_entities = array_merge($extra_entities, $ids_not_in_database);
$this->io()->progressAdvance($length);
$offset += $length;
$ids = $this->getNextEntityBatch($offset, $length, $first_database, $entity_type_id, $bundle);
}
$this->io()->progressFinish();
return $extra_entities;
}
/**
* Deletes a node.
*
* @param int $nid
* The node id.
*/
protected function deleteNode($nid) {
$node = Node::load($nid);
if ($node) {
$node->delete();
$this->io()->info(dt('Node @nid deleted.', [
'@nid' => $nid,
]));
}
}
/**
* Cancels a user account.
*
* @param int $uid
* User id.
*/
protected function cancelAccount($uid) {
// No need for extra logging as the user_cancel function does it.
user_cancel([], $uid, 'user_cancel_reassign');
$batch =& batch_get();
$batch['progressive'] = FALSE;
// Batch it the drush way, instead of batch_process().
drush_backend_batch_process();
$this->logger()->warning(\dt('Deleting user @uid via sync-users drush command.', [
'@uid' => $uid,
]));
}
/**
* Gets a count of entity table on the given database for a given type.
*
* @param string $database
* Database to check.
* @param string $entity_type_id
* Type of the entity.
* @param string $bundle
* (Optional) Type of the node.
*
* @return int
* Count of entities.
*/
protected function getEntityCounts($database, $entity_type_id, $bundle = NULL) {
$connection = Database::getConnection('default', $database);
$query = $connection->select($entity_type_id, 'e');
if (!is_null($bundle)) {
$query->condition('e.type', $bundle);
}
return $query
->countQuery()
->execute()
->fetchField();
}
/**
* Get a batch of entity ids to check.
*
* @param int $offset
* Offset of the records to get.
* @param int $length
* Length of the records to get.
* @param string $database
* Database to check.
* @param string $entity_type_id
* Type of the entity.
* @param string $bundle
* (Optional) Bundle types to bring.
*
* @return array
* Array of entity ids.
*/
protected function getNextEntityBatch($offset, $length, $database, $entity_type_id, $bundle = NULL) {
$connection = Database::getConnection('default', $database);
$id_column = $this->getIdColumn($entity_type_id);
$query = $connection
->select($entity_type_id, 'e')
->fields('e', [$id_column])
->range($offset, $length)
->orderBy("e.$id_column");
if (!is_null($bundle)) {
$query->condition('e.type', $bundle);
}
return $query
->execute()
->fetchCol();
}
/**
* Return the ID column depending on the entity type.
*
* @param string $type
* Type of the entity.
*
* @return string
* ID column.
*/
protected function getIdColumn($type) {
return ($type === 'users') ? 'uid' : (($type === 'node') ? 'nid' : 'id');
}
/**
* Check which entity ids are not in a database.
*
* @param array $ids
* List of entity ids to check.
* @param string $database
* Database to check.
* @param string $entity_type_id
* Type of the entities.
* @param string $bundle
* (Optional) Bundle of the entity type.
*
* @return array
* Array of entity ids not present in the given database.
*/
protected function entityIdsNotInDatabase($ids, $database, $entity_type_id, $bundle = NULL) {
$connection = Database::getConnection('default', $database);
$id_column = $this->getIdColumn($entity_type_id);
$query = $connection
->select($entity_type_id, 'e')
->fields('e', [$id_column])
->condition("e.$id_column", $ids, 'IN')
->orderBy("e.$id_column");
if (!is_null($bundle)) {
$query->condition('e.type', $bundle);
}
$matched_ids = $query
->execute()
->fetchCol();
return array_diff($ids, $matched_ids);
}
/**
* Creates a new paragraph with the org data.
*
* @param mixed $data
* Data to add.
*
* @return \Drupal\paragraphs\ParagraphInterface
* Created paragraph.
*/
protected function createParagraph(mixed $data): ParagraphInterface {
$paragraph_data = [
'type' => 'organizations',
'field_current' => (bool) ($data->field_current_value ?? FALSE),
'field_job_title' => $data->field_job_title_value ?? '',
'field_organization_name' => $data->field_organization_name_value,
];
if ($data->nid) {
$org_node = Node::load($data->nid);
if ($org_node && $org_node->bundle() === 'organization') {
$paragraph_data['field_organization_reference'] = ['target_id' => $data->nid];
}
}
$paragraph = $this->entityTypeManager->getStorage('paragraph')->create($paragraph_data);
$paragraph->save();
return $paragraph;
}
}
