foldershare-8.x-1.2/src/Entity/FolderShareTraits/OperationUnarchiveTrait.php

src/Entity/FolderShareTraits/OperationUnarchiveTrait.php
<?php

namespace Drupal\foldershare\Entity\FolderShareTraits;

use Drupal\foldershare\ManageFilenameExtensions;
use Drupal\foldershare\ManageFileSystem;
use Drupal\foldershare\Settings;
use Drupal\foldershare\Utilities\FileUtilities;
use Drupal\foldershare\Utilities\FormatUtilities;
use Drupal\foldershare\Entity\Exception\LockException;
use Drupal\foldershare\Entity\Exception\SystemException;
use Drupal\foldershare\Entity\Exception\ValidationException;

/**
 * Unarchive FolderShare entities into multiple FolderShare entities.
 *
 * This trait includes methods to unarchive a FolderShare entity for
 * a ZIP archive, saving the contents as new separate FolderShare entities.
 *
 * <B>Internal trait</B>
 * This trait is internal to the FolderShare module and used to define
 * features of the FolderShare entity class. It is a mechanism to group
 * functionality to improve code management.
 *
 * @ingroup foldershare
 */
trait OperationUnarchiveTrait {

  /*---------------------------------------------------------------------
   *
   * Unarchive.
   *
   *---------------------------------------------------------------------*/

  /**
   * {@inheritdoc}
   */
  public function unarchiveFromZip() {
    //
    // Implementation note:
    //
    // A ZIP archive includes a list of files and folders. Each entry in the
    // list has a path, modification date, size, and assorted internal
    // attributes. Entries are listed in an order so that parent directories
    // are listed before files in those directories.
    //
    // Each entry's name is a relative path. Path components are separated
    // by '/' characters, regardless of the source or current OS. An entry
    // that ends in a '/' is for a directory.
    //
    // The task here is to extract everything from a ZIP archive and create
    // new FolderShare files and folders for that content. While the ZIP
    // archive supports a single extractTo() method that can dump the whole
    // archive into a subdirectory, this can cause file and directory names
    // to be changed based upon the limitations of the local OS. Names could
    // be shortened, special characters removed, and extensions shortened.
    // We don't want any of that. We want to retain the original names in
    // the entities we create.
    //
    // The extraction task is therefore one with multiple steps:
    //
    // 1. Extract the archive into a temporary directory. Assign each
    //    extracted file a generic numeric name (e.g. 1, 2, 3, 4) instead of
    //    using the original name, which may not work for this OS. Record
    //    this temporary name, the original ZIP name, and the other item
    //    attributes for later use.
    //
    // 2. Loop through all of the extracted files and folders and create
    //    corresponding FolderShare entities. Give those entities the
    //    original ZIP names and modification dates. For FolderShare files,
    //    also create a File object that wraps the stored file. Move that
    //    stored file from the temporary directory into FolderShare's
    //    normal file directory tree and rename it to use FolderShare's
    //    entity ID-based name scheme.
    //
    // 3. Delete the temporary directory. Since all of the files will have
    //    been moved out of it, all that will be left is empty directories.
    //
    // On errors, we need to clean up. The amount of cleanup depends upon
    // where the error occurs:
    //
    // 1. If the current FolderShare entity is not a file, or it is not
    //    recognized as a ZIP file, or it is corrupted, then abort. Delete
    //    anything extracted so far.
    //
    // 2. If there is a problem creating FolderShare entities, abort but
    //    keep whatever has been created so far. Delete the temp directory
    //    and whatever it contains.
    //
    // Validate
    // --------
    // This item must be a FolderShare file. We'll leave validating the
    // ZIP file until we try to unarchive it below.
    if ($this->isFile() === FALSE) {
      throw new ValidationException(FormatUtilities::createFormattedMessage(
        t(
          '@method was called with an entity that is not a file.',
          [
            '@method' => __METHOD__,
          ])));
    }

    //
    // Extract to local directory
    // --------------------------
    // The FolderShare file entity wraps a File object which in turn wraps
    // a locally stored file. Get that file path then open and extract
    // everything from the file. Lock the File while we do this.
    //
    // LOCK FILE ROOT'S FOLDER TREE.
    $rootId = $this->getRootItemId();
    if (self::acquireRootOperationLock($rootId) === FALSE) {
      throw new LockException(
        self::getStandardLockExceptionMessage(
          t('uncompressed'),
          $this->getName()));
    }

    // Create a temporary directory for the archive's contents.
    $tempDirUri = ManageFileSystem::createLocalTempDirectory();

    // Get the local file path to the archive.
    $archivePath = FileUtilities::realpath($this->getFile()->getFileUri());

    // Extract into a temp directory.
    try {
      $entries = self::extractLocalZipFileToLocalDirectory(
        $archivePath,
        $tempDirUri);
    }
    catch (\Exception $e) {
      // A problem occurred while trying to unarchive the file. This
      // could be because the ZIP file is corrupted, or because there
      // is insufficient disk space to store the unarchived contents.
      // There could also be assorted system errors, like a file system
      // going off line or a permissions problem.
      //
      // UNLOCK FILE ROOT'S FOLDER TREE.
      self::releaseRootOperationLock($rootId);

      FileUtilities::rrmdir($tempDirUri);
      throw $e;
    }

    // UNLOCK FILE ROOT'S FOLDER TREE.
    self::releaseRootOperationLock($rootId);

    //
    // Decide content should go into a subfolder
    // -----------------------------------------
    // A ZIP file may contain any number of files and folders in an
    // arbitrary hierarchy. There are four cases of interest regarding
    // the highest-level items:
    // - A single highest-level file.
    // - A single highest-level folder and arbitrary content.
    // - Multiple highest-level files.
    // - Multiple highest-level folders and arbitrary content.
    //
    // There are two common behaviors for these:
    //
    // - Unarchive single and multiple cases the same and put them all
    //   into the current folder.
    //
    // - Unarchive single items into the current folder, but unarchive
    //   multiple highest-level items into a subfolder named after the archive.
    //   This prevents an archive uncompress from dumping a large number of
    //   files and folders all over a folder, which is confusing. This is
    //   the behavior of macOS.
    $unzipIntoFolder = $this->getParentFolder();
    $createdSubFolder = NULL;

    if (Settings::getZipUnarchiveMultipleToSubfolder() === TRUE) {
      // When there are multiple highest-level items, unarchive to a subfolder.
      //
      // Start by seeing how many highest-level items we have.
      $nTop = 0;
      foreach ($entries as $entry) {
        if ($entry['isTop'] === TRUE) {
          ++$nTop;
        }
      }

      if ($nTop > 1) {
        // There are multiple highest-level items. We need a subfolder.
        $subFolderName = $this->getName();
        $lastDotIndex = mb_strrpos($subFolderName, '.');
        if ($lastDotIndex !== FALSE) {
          $subFolderName = mb_substr($subFolderName, 0, $lastDotIndex);
        }

        try {
          if ($unzipIntoFolder === NULL) {
            // The ZIP file is at the root level and there is no parent.
            // Create a root folder to contain the unZIPed archive.
            $createdSubFolder = self::createRootFolder($subFolderName, TRUE);
          }
          else {
            // The ZIP file is in a folder. Create a subfolder to
            // contain the unZIPed archive.
            $createdSubFolder = $unzipIntoFolder->createFolder($subFolderName, TRUE);
          }
        }
        catch (\Exception $e) {
          // The subfolder could not be created. Clean up.
          FileUtilities::rrmdir($tempDirUri);
          throw $e;
        }

        // Hereafter, treat the subfolder as the parent folder for the
        // unarchiving.
        $unzipIntoFolder = $createdSubFolder;
      }
    }

    //
    // DO NOT lock the root folder tree that will contain the unZIPed items.
    // Each of the operations below that create folders or add files do
    // their own locking of the root folder tree. If we lock it now, those
    // operations will fail.
    //
    // Create files and folders
    // ------------------------
    // Loop through the list of files and folders in the archive and
    // create corresponding FolderShare entities for folders, and
    // FolderShare and File entities for files. Each creation operation
    // locks the root folder tree as it adds the new item.
    $mapPathToEntity = [];

    try {
      // Loop through all of the entries.
      foreach ($entries as $entry) {
        // Get a few values from the entry.
        $isDirectory = $entry['isDirectory'];
        $zipPath     = $entry['zipPath'];
        $localUri    = $entry['localUri'];
        $zipTime     = $entry['time'];

        // Split the original ZIP path into the parent folder path and
        // the new child's name. For a directory, remember to skip the
        // ending '/'. Note, again, that '/' is the ZIP directory separator,
        // regardless of the separator used by the current OS.
        if ($isDirectory === TRUE) {
          $slashIndex = mb_strrpos($zipPath, '/', -1);
        }
        else {
          $slashIndex = mb_strrpos($zipPath, '/');
        }

        if ($slashIndex === FALSE) {
          // There is no slash. This entry has no parent directory, so
          // use the unzip-into folder (which will be NULL if we're unZIPing
          // into the user's root list). This entry may be a directory or file.
          $parentEntity = $unzipIntoFolder;
          $zipName      = $zipPath;
        }
        else {
          // There is a slash. Get the last name and the parent path.
          $parentZipPath = mb_substr($zipPath, 0, $slashIndex);
          $zipName       = mb_substr($zipPath, ($slashIndex + 1));

          // Find the parent entity by looking up the path in the map
          // of previously created entities. Because ZIP entries for
          // folder files always follow entries for their parent folders,
          // we are guaranteed that the parent entity has already been
          // encountered.
          $parentEntity = $mapPathToEntity[$parentZipPath];
        }

        // Create folder or file.
        if ($entry['isDirectory'] === TRUE) {
          // The ZIP entry is for a directory.
          //
          // Create a new folder in the appropriate parent. This will lock
          // a root folder tree as needed.
          //
          // This function call locks the parent and updates usage tracking.
          if ($parentEntity === NULL) {
            $childFolder = self::createRootFolder($zipName);
          }
          else {
            $childFolder = $parentEntity->createFolder($zipName);
          }

          if ($zipTime !== 0) {
            $childFolder->setCreatedTime($zipTime);
            $childFolder->setChangedTime($zipTime);
            $childFolder->save();
          }

          // Save that new folder entity back into the map.
          $mapPathToEntity[$zipPath] = $childFolder;
        }
        else {
          // The ZIP entry is for a file.
          //
          // Move the temporary file to FolderShare's directory tree and
          // wrap it with a new File entity.
          $childFile = self::createFileEntityFromLocalFile(
            $localUri,
            $zipName);

          if ($zipTime !== 0) {
            $childFile->setChangedTime($zipTime);
            $childFile->save();
          }

          try {
            // Add the file to the parent folder, which will be NULL if
            // we're adding the file to the user's root list.
            //
            // This will lock the root folder tree as needed.
            self::addFilesInternal(
              $parentEntity,
              [$childFile],
              (-1),
              TRUE,
              TRUE,
              FALSE);
          }
          catch (\Exception $e) {
            // On any error, we cannot continue. Delete the orphaned File.
            $childFile->delete();
            throw $e;
          }
        }
      }
    }
    catch (\Exception $e) {
      // On any error, we cannot continue. Delete the temporary directory
      // containing the extracted archive. Whatever has been added to
      // the FolderShare file system remains.
      FileUtilities::rrmdir($tempDirUri);
      throw $e;
    }

    // We're done. Delete the temporary directory that used to contain
    // the extracted archive.
    FileUtilities::rrmdir($tempDirUri);
  }

  /*---------------------------------------------------------------------
   *
   * Implementation.
   *
   *---------------------------------------------------------------------*/

  /**
   * Extracts a local ZIP archive into a local directory.
   *
   * The indicated ZIP archive is un-zipped to extract all of its files
   * into a flat temporary directory. The files are all given simple numeric
   * names, instead of their names in the archive, in order to avoid name
   * changes that result from the current OS not supporting the same name
   * length and character sets used within the ZIP archive.
   *
   * An array is returned that indicates the name of the temporary directory
   * and a mapping from ZIP entries to the numerically-named temporary files
   * in the temporary directory.
   *
   * @param string $archivePath
   *   The local file system path to the ZIP archive to extract.
   * @param string $directoryUri
   *   The URI for a local temp directory into which to extract the ZIP
   *   archive's files and directories.
   *
   * @return array
   *   Returns an array containing one entry for each ZIP archive file or
   *   folder. Entries are associative arrays with the following keys:
   *   - 'isDirectory' - TRUE if the entry is a directory.
   *   - 'zipPath' - the file or directory path in the ZIP file.
   *   - 'localUri' - the file or directory URI in local storage.
   *   - 'time' - the last-modified time in the ZIP file.
   *
   * @throws \Drupal\foldershare\Entity\Exception\SystemException
   *   Thrown if a file or directory cannot be created, or if the ZIP
   *   archive is corrupted.
   */
  private static function extractLocalZipFileToLocalDirectory(
    string $archivePath,
    string $directoryUri) {

    //
    // Implementation note:
    //
    // A ZIP archive includes files and directories with relative paths
    // that meet the name constraints on the OS and file system on which
    // the archive was created. So if the original OS only supports ASCII
    // names and 3-letter extensions, that's what will be in the ZIP archive.
    //
    // The ZipArchive class can open an archive, then extract all of it in one
    // operation:
    // @code
    // $archive->extractTo($dir);
    // @endcode
    //
    // This works and it creates a new directory tree under $dir that contains
    // all of the files and subdirectories in the ZIP archive.
    //
    // HOWEVER... the local OS and file system may have different name length
    // and character set limits from that used to create the archive. In a
    // worst case, imagine extracting an archive with long UTF-8 file and
    // directory names into an old DOS file system that requires 8.3 names
    // in ASCII. Rather than fail, ZipArchive will rename the files during
    // extraction.
    //
    // The problem is that we need to know those new file names. We want to
    // create new FolderShare entities that point to them. But extractTo()
    // does not return them.
    //
    // A variant of extractTo() takes two arguments. The first is the
    // directory path for the new files, and the second is the name of the
    // file to extract:
    // @code
    // for ($i = 0; $i < $archive->numFiles; ++$i)
    //   $archive->extractTo($dir, $archive->getNameIndex($i));
    // @endcode
    //
    // HOWEVER... each file extracted by this method is dropped into $dir
    // by APPENDING the internal ZIP file path. So if the internal ZIP path
    // is "mydir/myfile.png" and $dir is "/tmp/stuff", then the file will
    // be dropped into "/tmp/stuff/mydir/myfile.png", but with "mydir" and
    // "myfile.png" adjusted for the local file system and OS limitations.
    //
    // The problem again is that we still don't know the names of the newly
    // created local files. Even though we can specify $dir, we cannot
    // specify the name of the file that is created.
    //
    // THEREFORE... we cannot use extractTo(). This is unfortunate and
    // causes a lot more code here.
    //
    // We can bypass extractTo() by getting a stream from ZipArchive,
    // then reading from that stream directly to copy the archive's
    // contents into a new file we explicitly create and write to.
    //
    if (empty($archivePath) === TRUE || empty($directoryUri) === TRUE) {
      return NULL;
    }

    // Implementation note:
    //
    // ZIP paths always use '/', regardless of the local OS or file system
    // conventions. So, as we parse ZIP paths, we use '/', and not the
    // current OS's DIRECTORY_SEPARATOR.
    //
    //
    // Open archive
    // ------------
    // Create the ZipArchive object and open the archive. The CHECKCONS
    // flag asks that the open perform consistency checks.
    $archive = new \ZipArchive();

    if ($archive->open($archivePath, \ZipArchive::CHECKCONS) !== TRUE) {
      throw new SystemException(FormatUtilities::createFormattedMessage(
        t('The file does not appear to be a valid ZIP archive.'),
        t('The file may be corrupted or it may not be a ZIP archive.')));
    }

    $numFiles = $archive->numFiles;

    //
    // Check file name extensions
    // --------------------------
    // If the site is restricting file name extensions, check everything
    // in the archive first to insure that all files are supported. If any
    // are not, stop and report an error.
    $extensionsString = ManageFilenameExtensions::getAllowedNameExtensions();
    if (empty($extensionsString) === FALSE) {
      // Extensions are limited.
      $extensions = mb_split(' ', $extensionsString);
      for ($i = 0; $i < $numFiles; $i++) {
        $path = $archive->getNameIndex($i);
        if (ManageFilenameExtensions::isNameExtensionAllowed($path, $extensions) === FALSE) {
          $archive->close();
          throw new SystemException(t(
            "The file type used by '@name' in the archive is not allowed.\nThe archive cannot be uncompressed. Please see the site's documentation for a list of approved file types.",
            [
              '@name' => $path,
            ]));
        }
      }
    }

    //
    // Create temp directories
    // -----------------------
    // Sweep through the archive and make a list of directories. For each
    // one, create a corresponding temp directory. To avoid local file
    // system naming problems, use simple numbers (e.g. 0, 1, 2, 3).
    //
    // Implementation note:
    //
    // Each ZIP entry can have its own relative path. That path may include
    // parent directories that do not have their own ZIP entries. So we need
    // to parse out the parent directory path for EVERY entry and insure we
    // create a temp directory for all of them.
    //
    // For each entry, we'll use statIndex() to get these values:
    // - 'name' = the stored path for the file.
    // - 'index' = the index for the entry (equal to $i for this loop).
    // - 'crc' = the CRC (cyclic redundancy check) for the file.
    // - 'size' = the uncompressed file size.
    // - 'mtime' = the modification time stamp.
    // - 'comp_size' = the compressed file size.
    // - 'comp_method' = the compression method.
    // - 'encryption_method' = the encryption method.
    //
    // We do not support encryption, and we rely upon ZipArchive to handle
    // CRC checking and decompression. So the only values we need are:
    // - 'name'
    // - 'mtime'
    //
    // Note that the name returned is in the original file's character
    // encoding, which we don't know and it may not match that of the
    // current OS. We therefore need to attempt to detect the encoding of
    // each name and convert it to our generic UTF-8.
    //
    // Note that the creation time and recent access times are not stored
    // in the OS-independent part of the ZIP archive. They are apparently
    // stored in some OS-specific parts of the archive, but those require
    // PHP 7+ to access, and we cannot count on that.
    $entries = [];
    $counter = 0;

    for ($i = 0; $i < $numFiles; $i++) {
      // Get the next entry's info.
      $stat    = $archive->statIndex($i, \ZipArchive::FL_UNCHANGED);
      $zipPath = $stat['name'];
      $zipTime = isset($stat['mtime']) === FALSE ? 0 : $stat['mtime'];

      // Insure the ZIP file path is in UTF-8.
      $zipPathEncoding = mb_detect_encoding($zipPath, NULL, TRUE);
      if ($zipPathEncoding !== 'UTF-8') {
        $zipPath = mb_convert_encoding($zipPath, 'UTF-8', $zipPathEncoding);
      }

      // Split on the ZiP directory separator, which is always '/'.
      $zipDirs = mb_split('/', $zipPath);

      // For a directory entry, the last character in the name is '/' and
      // the last name in $dirs is empty.
      //
      // For a file entry, the last character in the name is not '/' and
      // the last name in $dirs is the file name.
      //
      // In both cases, we don't need the last entry since we are only
      // interested in all of the parent directories.
      unset($zipDirs[(count($zipDirs) - 1)]);

      // Loop through the directories on the ZIP file's path and create
      // any we haven't encountered before.
      $zipPathSoFar = '';
      $dirUriSoFar = $directoryUri;

      foreach ($zipDirs as $dir) {
        // Append the next dir to our ZIP path so far.
        if ($zipPathSoFar === '') {
          $zipPathSoFar = $dir;
          $isTop = TRUE;
        }
        else {
          $zipPathSoFar .= '/' . $dir;
          $isTop = FALSE;
        }

        if (isset($entries[$zipPathSoFar]) === TRUE) {
          // We've encountered this path before. Update it's saved
          // modification time if it is newer.
          $dirUriSoFar = $entries[$zipPathSoFar]['localUri'];

          if ($zipTime > $entries[$zipPathSoFar]['time']) {
            $entries[$zipPathSoFar]['time'] = $zipTime;
          }
        }
        else {
          // Create the local URI.
          $localUri = $dirUriSoFar . '/' . $counter;
          ++$counter;

          $entries[$zipPathSoFar] = [
            'isDirectory' => TRUE,
            'isTop'       => $isTop,
            'zipPath'     => $zipPathSoFar,
            'localUri'    => $localUri,
            'time'        => $zipTime,
          ];

          FileUtilities::mkdir($localUri);
        }
      }
    }

    //
    // Extract files
    // -------------
    // Sweep through the archive again. Ignore directory entries since
    // we have already handled them above.
    //
    // For each file, DO NOT use extractTo(), since that will create
    // local names we cannot control (see implementation notes earlier).
    // Instead, open a stream for each file and copy from the stream
    // into a file we create here with a name we can control and save.
    for ($i = 0; $i < $numFiles; $i++) {
      // Get the next entry's info.
      $stat    = $archive->statIndex($i, \ZipArchive::FL_UNCHANGED);
      $zipPath = $stat['name'];
      $zipTime = isset($stat['mtime']) === FALSE ? 0 : $stat['mtime'];

      // Insure the ZIP file path is in UTF-8.
      $zipPathEncoding = mb_detect_encoding($zipPath, NULL, TRUE);
      if ($zipPathEncoding !== 'UTF-8') {
        $zipPath = mb_convert_encoding($zipPath, 'UTF-8', $zipPathEncoding);
      }

      if ($zipPath[(mb_strlen($zipPath) - 1)] === '/') {
        // Paths that end in '/' are directories. Already handled.
        continue;
      }

      // Get the parent ZIP directory path.
      $parentZipPath = FileUtilities::dirname($zipPath);

      // Get the local temp directory URI for this path, which we have
      // created earlier.
      if ($parentZipPath === '.') {
        // The $zipPath had no parent directories. Drop the file into
        // the target directory.
        $parentLocalUri = $directoryUri;
        $isTop = TRUE;
      }
      else {
        // Get the name of the temp directory we created earlier for
        // this parent path.
        $parentLocalUri = $entries[$parentZipPath]['localUri'];
        $isTop = FALSE;
      }

      // Create a name for a local file in the parent directory. We'll
      // be writing the ZIP archive's uncompressed file here.
      $localUri = $parentLocalUri . '/' . $counter;
      ++$counter;

      // Get an uncompressed byte stream for the file.
      $stream = $archive->getStream($zipPath);

      // Create a local file. We can use the local URI because fopen()
      // is stream-aware and will track the scheme down to Drupal and
      // its installed stream wrappers.
      $fp = fopen($localUri, 'w');
      if ($fp === FALSE) {
        $archive->close();
        throw new SystemException(t(
          "System error. A file at '@path' could not be written.\nThere may be a problem with permissions. Please report this to the site administrator.",
          [
            '@path' => $archivePath,
          ]));
      }

      while (feof($stream) !== TRUE) {
        fwrite($fp, fread($stream, 8192));
      }

      fclose($fp);
      fclose($stream);

      // Give the new file appropriate permissions.
      FileUtilities::chmod($localUri);

      // Set the new file's modification time.
      if ($zipTime !== 0) {
        FileUtilities::touch($localUri, $zipTime);
      }

      $entries[$zipPath] = [
        'isDirectory' => FALSE,
        'isTop'       => $isTop,
        'zipPath'     => $zipPath,
        'localUri'    => $localUri,
        'time'        => $zipTime,
      ];
    }

    if ($archive->close() === FALSE) {
      throw new SystemException(t(
        "System error. A file at '@path' could not be written.\nThere may be a problem with permissions. Please report this to the site administrator.",
        [
          '@path' => $archivePath,
        ]));
    }

    return $entries;
  }

}

Главная | Обратная связь

drupal hosting | друпал хостинг | it patrol .inc