media_duplicate_check-1.0.0/src/Service/MediaDuplicateChecker.php

src/Service/MediaDuplicateChecker.php
<?php

declare(strict_types=1);

namespace Drupal\media_duplicate_check\Service;

use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\File\FileSystemInterface;
use Drupal\Core\Database\Connection;
use Drupal\media\MediaInterface;
use Drupal\Core\StringTranslation\ByteSizeMarkup;

/**
 * Service for checking duplicate media files.
 */
class MediaDuplicateChecker {

  /**
   * The entity type manager.
   *
   * @var \Drupal\Core\Entity\EntityTypeManagerInterface
   */
  protected $entityTypeManager;

  /**
   * The file system service.
   *
   * @var \Drupal\Core\File\FileSystemInterface
   */
  protected $fileSystem;

  /**
   * The database connection.
   *
   * @var \Drupal\Core\Database\Connection
   */
  protected $database;

  /**
   * Constructs a MediaDuplicateChecker object.
   *
   * @param \Drupal\Core\Entity\EntityTypeManagerInterface $entity_type_manager
   *   The entity type manager.
   * @param \Drupal\Core\File\FileSystemInterface $file_system
   *   The file system service.
   * @param \Drupal\Core\Database\Connection $database
   *   The database connection.
   */
  public function __construct(
    EntityTypeManagerInterface $entity_type_manager,
    FileSystemInterface $file_system,
    Connection $database
  ) {
    $this->entityTypeManager = $entity_type_manager;
    $this->fileSystem = $file_system;
    $this->database = $database;
  }

  /**
   * Check for duplicates using the original filename before upload.
   * 
   * @param string $original_filename
   *   The original filename before any Drupal processing.
   * @param string $media_type
   *   The media type to check within.
   * 
   * @return array
   *   Array of media entities with similar filenames.
   */
  public function findDuplicatesByOriginalFilename(string $original_filename, ?string $media_type = NULL): array {
    $base_filename = $this->getBaseFilename($original_filename);
    $extension = pathinfo($original_filename, PATHINFO_EXTENSION);
    
    // Search for files that have the same base filename
    $query = $this->database->select('file_managed', 'f')
      ->fields('f', ['fid', 'filename', 'uri', 'created']);
    
    // Look for files that start with the base filename
    $pattern = $base_filename . '%';
    $query->condition('f.filename', $pattern, 'LIKE');
    
    // Ensure the extension matches
    if ($extension) {
      $query->condition('f.filename', '%.' . $extension, 'LIKE');
    }
    
    $file_results = $query->execute()->fetchAll();
    
    if (empty($file_results)) {
      return [];
    }
    
    // Get file IDs and find associated media
    $file_ids = array_column($file_results, 'fid');
    $media_storage = $this->entityTypeManager->getStorage('media');
    
    $query = $media_storage->getQuery()->accessCheck(TRUE);
    
    if ($media_type) {
      $query->condition('bundle', $media_type);
    }
    
    // Check media file fields
    $field_conditions = $query->orConditionGroup();
    $media_file_fields = [
      'field_media_image',
      'field_media_document',
      'field_media_svg_image',
      'field_media_oembed_video',
    ];
    
    $field_definitions = \Drupal::service('entity_field.manager')->getFieldStorageDefinitions('media');
    $has_conditions = FALSE;
    
    foreach ($media_file_fields as $field_name) {
      if (isset($field_definitions[$field_name])) {
        $field_conditions->condition($field_name, $file_ids, 'IN');
        $has_conditions = TRUE;
      }
    }
    
    if ($has_conditions) {
      $query->condition($field_conditions);
      $media_ids = $query->execute();
      return $media_storage->loadMultiple($media_ids);
    }
    
    return [];
  }

  /**
   * Find existing media entities with the same filename or file hash.
   *
   * @param string $filename
   *   The filename to check.
   * @param string $media_type
   *   The media type to check within.
   * @param string $file_uri
   *   Optional file URI to check by hash.
   *
   * @return array
   *   Array of media entities with the same filename or hash.
   */
  public function findDuplicatesByFilename(string $filename, ?string $media_type = NULL, ?string $file_uri = NULL): array {
    $duplicates = [];
    
    // Get configuration.
    $config = \Drupal::config('media_duplicate_check.settings');
    $case_sensitive = $config->get('case_sensitive') ?? FALSE;
    $check_by_hash = $config->get('check_by_hash') ?? TRUE;
    
    // Extract the base filename (remove Drupal's automatic suffixes like _0, _1, _13, etc.)
    $base_filename = $this->getBaseFilename($filename);
    $extension = pathinfo($filename, PATHINFO_EXTENSION);
    
    // If we have a file URI and hash checking is enabled, check by hash
    if ($file_uri && $check_by_hash && file_exists($file_uri)) {
      $file_hash = md5_file($file_uri);
      
      // Query for files with similar names (to limit the hash checking)
      $query = $this->database->select('file_managed', 'f')
        ->fields('f', ['fid', 'filename', 'uri', 'created']);
      
      // Look for files that start with the base filename
      $query->condition('f.filename', $base_filename . '%', 'LIKE');
      if ($extension) {
        $query->condition('f.filename', '%.' . $extension, 'LIKE');
      }
      
      $similar_files = $query->execute()->fetchAll();
      $file_results = [];
      
      // Check hashes only for similar files
      foreach ($similar_files as $file_record) {
        if (file_exists($file_record->uri)) {
          $existing_hash = md5_file($file_record->uri);
          if ($existing_hash === $file_hash) {
            $file_results[] = $file_record;
          }
        }
      }
    } else {
      // Search for files with the same base filename pattern
      $query = $this->database->select('file_managed', 'f')
        ->fields('f', ['fid', 'filename', 'uri', 'created']);
      
      // Look for files that match the base filename pattern
      if ($case_sensitive) {
        $query->condition('f.filename', $base_filename . '%', 'LIKE');
      } else {
        // Case-insensitive comparison with base filename
        $query->where('LOWER(f.filename) LIKE LOWER(:pattern)', [':pattern' => $base_filename . '%']);
      }
      
      // Also ensure the extension matches
      if ($extension) {
        $query->condition('f.filename', '%.' . $extension, 'LIKE');
      }
      
      $file_results = $query->execute()->fetchAll();
    }
    
    if (empty($file_results)) {
      return $duplicates;
    }
    
    // Get file IDs.
    $file_ids = array_column($file_results, 'fid');
    
    // Now find media entities that reference these files.
    $media_storage = $this->entityTypeManager->getStorage('media');
    
    // Build query for media entities.
    $query = $media_storage->getQuery()
      ->accessCheck(TRUE);
    
    if ($media_type) {
      $query->condition('bundle', $media_type);
    }
    
    // Check different media field types for file references.
    $field_conditions = $query->orConditionGroup();
    
    // Check common media file fields - but only if they exist.
    $media_file_fields = [
      'field_media_image',
      'field_media_document',
      'field_media_svg_image',
      'field_media_oembed_video',
    ];
    
    // Get field definitions to check which fields actually exist.
    $field_definitions = \Drupal::service('entity_field.manager')->getFieldStorageDefinitions('media');
    
    $has_conditions = FALSE;
    foreach ($media_file_fields as $field_name) {
      if (isset($field_definitions[$field_name])) {
        $field_conditions->condition($field_name, $file_ids, 'IN');
        $has_conditions = TRUE;
      }
    }
    
    // Only add conditions if we found at least one valid field.
    if ($has_conditions) {
      $query->condition($field_conditions);
      $media_ids = $query->execute();
    }
    else {
      $media_ids = [];
    }
    
    if (!empty($media_ids)) {
      $duplicates = $media_storage->loadMultiple($media_ids);
    }
    
    return $duplicates;
  }

  /**
   * Extract the base filename without Drupal's automatic numbering.
   * 
   * Examples:
   * - profile-image-3_0_13.png -> profile-image-3_0
   * - document_5.pdf -> document
   * - image.jpg -> image
   * 
   * @param string $filename
   *   The filename to process.
   * 
   * @return string
   *   The base filename without automatic suffixes.
   */
  public function getBaseFilename(string $filename): string {
    $extension = pathinfo($filename, PATHINFO_EXTENSION);
    $name_without_ext = pathinfo($filename, PATHINFO_FILENAME);
    
    // Remove Drupal's automatic numbering pattern (_0, _1, _2, etc.)
    // This regex matches _[number] at the end of the filename
    $base_name = preg_replace('/_\d+$/', '', $name_without_ext);
    
    return $base_name;
  }

  /**
   * Get media preview data for AJAX response.
   *
   * @param \Drupal\media\MediaInterface $media
   *   The media entity.
   *
   * @return array
   *   Array containing media preview data.
   */
  public function getMediaPreviewData(MediaInterface $media): array {
    $data = [
      'id' => $media->id(),
      'name' => $media->getName(),
      'created' => $media->getCreatedTime(),
      'created_formatted' => \Drupal::service('date.formatter')->format($media->getCreatedTime(), 'medium'),
      'type' => $media->bundle(),
      'thumbnail' => NULL,
      'url' => $media->toUrl()->toString(),
    ];
    
    // Try to get thumbnail.
    if ($media->hasField('thumbnail') && !$media->get('thumbnail')->isEmpty()) {
      $thumbnail = $media->get('thumbnail')->entity;
      if ($thumbnail) {
        $data['thumbnail'] = \Drupal::service('file_url_generator')->generateString($thumbnail->getFileUri());
      }
    }
    
    // Get the actual file URL.
    $file_field_name = $this->getFileFieldName($media);
    if ($file_field_name && $media->hasField($file_field_name) && !$media->get($file_field_name)->isEmpty()) {
      $file = $media->get($file_field_name)->entity;
      if ($file) {
        $data['file_url'] = \Drupal::service('file_url_generator')->generateString($file->getFileUri());
        $data['file_size'] = ByteSizeMarkup::create($file->getSize());
      }
    }
    
    return $data;
  }

  /**
   * Get the file field name for a media entity.
   *
   * @param \Drupal\media\MediaInterface $media
   *   The media entity.
   *
   * @return string|null
   *   The field name or NULL if not found.
   */
  protected function getFileFieldName(MediaInterface $media): ?string {
    $bundle = $media->bundle();
    
    // Map media types to their file fields.
    $field_map = [
      'image' => 'field_media_image',
      'document' => 'field_media_document',
      'svg_image' => 'field_media_svg_image',
      'remote_video' => 'field_media_oembed_video',
    ];
    
    return $field_map[$bundle] ?? NULL;
  }
}

Главная | Обратная связь

drupal hosting | друпал хостинг | it patrol .inc