foldershare-8.x-1.2/src/ManageFilenameExtensions.php
src/ManageFilenameExtensions.php
<?php
namespace Drupal\foldershare;
use Drupal\foldershare\Entity\FolderShare;
/**
* Manages file name extensions for the FolderShare module.
*
* This class provides static methods to manage lists of filename extensions
* and queries to check them. Supported operations include:
* - Getting topical groups of known extensions.
* - Parsing a file name or path to get the extension.
* - Checking if an extension is allowed for a FolderShare file.
* - Getting and setting the list of allowed extensions for a FolderShare file.
*
* @ingroup foldershare
*
* @see https://www.online-convert.com/file-type
*/
class ManageFilenameExtensions {
/*---------------------------------------------------------------------
*
* Topical filename extensions.
*
*---------------------------------------------------------------------*/
/**
* Returns common file name extensions for archives.
*
* An archive is a multi-file container used to package related files
* and folders together for easier management and distribution. The
* most widely used archive format is "zip". Other formats exist, many
* of which are particular to specific OSes for specific uses.
*
* The list returned by this method is not exhaustive. Only the most
* common formats are included. Obscure, legacy, narrowly-used, or
* vendor-specific extensions are not included.
*
* @return string[]
* Returns a list of extensions, without dots.
*/
public static function getArchiveExtensions() {
return [
// General.
'zip',
// OS-specific, Android.
'apk',
// OS-specific, Debian.
'deb',
// OS-specific, Linux, BSD, and macOS.
'a', 'cpio', 'bcpio', 'rar', 'tar', 'gtar', 'tgz', 'tbz2',
'shar', 'gz', 'gzip', 'z', 'bz', 'bz2',
// OS-specific, RedHat Linux.
"rpm",
// OS-specific, macOS.
'dmg', 'iso', 'mpkg', 'sit', 'sitx',
// OS-specific, Windows.
'cab', 'dll', 'rar',
// Content-specific, Java.
'jar',
// Content-specific, PHP.
'phar',
];
}
/**
* Returns common file name extensions for audio files.
*
* An audio file contains a recorded or synthesized sound waveform that may
* be played back to create sound. The most widely used formats are those
* defined by MPEG, such as the ubiquitous "mp3" format. Other common
* formats include "flac", "ogg", and "wav".
*
* The list returned by this method is not exhaustive. Only the most
* common formats are included. Obscure, legacy, narrowly-used, or
* vendor-specific extensions are not included.
*
* @return string[]
* Returns a list of extensions, without dots.
*
* @see ::getVideoExtensions()
*/
public static function getAudioExtensions() {
return [
// General.
'aif', 'aiff', 'flac',
'mp3', 'm3u', 'm4a',
'ogg', 'oga', 'mogg',
// OS-specific, Windows.
'wma', 'wav',
// OS-specific, macOS, iOS.
'm4p', 'm4r',
// Older formats.
'au',
];
}
/**
* Returns common file name extensions for data files.
*
* A data file contains primarily numeric data in a task-specific format.
* The common "data" and "bin" extensions are used generically, while
* the standard "hdf" format is used for self-describing structured data
* in many science applications.
*
* The list returned by this method is not exhaustive. Only the most
* common formats are included. Obscure, legacy, narrowly-used, or
* vendor-specific extensions are not included.
*
* @return string[]
* Returns a list of extensions, without dots.
*/
public static function getDataExtensions() {
return [
// General.
'asc', 'ascii', 'bin', 'dat', 'data', 'text', 'txt',
'csv', 'tsv',
// Science formats.
'hdf', 'hdf5', 'h5', 'nc', 'fits', 'daq', 'fig',
// Matlab.
'mat', 'mn',
// Web and Drupal et al.
'yaml', 'yml', 'twig', 'info',
// Web page styles.
'css', 'less', 'sass', 'scss', 'xsl', 'xsd',
// Web data.
'json', 'xml', 'rdf',
// Calendar.
'ics',
// Google.
'kml', 'kmz',
];
}
/**
* Returns common file name extensions for document files.
*
* A document file contains numeric and text data to support presentation
* and editing of a formatted document. Common formats include HTML and
* PDF, as well as Microsoft Word, Microsoft Excel, and Microsoft PowerPoint.
*
* The list returned by this method is not exhaustive. Only the most
* common formats are included. Obscure, legacy, narrowly-used, or
* vendor-specific extensions are not included.
*
* @return string[]
* Returns a list of extensions, without dots.
*
* @see ::getTextEextensions()
*/
public static function getDocumentExtensions() {
return [
// Web.
'htm', 'html', 'xhtml', 'rss', 'dtd', 'xml',
// Other.
'man', 'rtf', 'rtx', 'tex', 'ltx', 'latex', 'pdf', 'md',
// Word.
'docx', 'docm', 'dotx', 'dotm', 'docb',
// Excel.
'xlsx', 'xlsm', 'xltx', 'xltm',
// Powerpoint.
'pptx', 'pptm', 'potx', 'potm', 'ppam', 'ppsx', 'ppsm',
'sldx', 'sldm',
// Access.
'adn', 'accdb', 'accdr', 'accdt', 'accda', 'mdw', 'accde',
'mam', 'maq', 'mar', 'mat', 'maf', 'laccdb',
// Legacy Word.
'doc', 'dot', 'wbk',
// Legacy Excel.
'xls', 'xlt', 'xlm',
// Legacy Powerpoint.
'ppt', 'pot', 'pps',
// Legacy Access.
'ade', 'adp', 'mdb', 'cdb', 'mda', 'mdn', 'mdt', 'mdf',
'mde', 'ldb',
// Wordperfect.
'wpd',
// KDE.
'karbon', 'chrt', 'kfo', 'flw', 'kon', 'kpr', 'ksp', 'kwd',
// OpenDocument.
'odc', 'otc', 'odb', 'odf', 'odft', 'odg', 'otg', 'odi',
'oti', 'odp', 'otp', 'ods', 'ots', 'odt', 'odm', 'ott',
// Open office.
'sxc', 'stc', 'sxd', 'std', 'sxi', 'sti', 'sxm', 'sxw',
'sxg', 'stw', 'oxt',
// Star office.
'sdc', 'sda', 'sdd', 'smf', 'sdw', 'sgl',
];
}
/**
* Returns common file name extensions for drawing files.
*
* A drawing file contains instructions and data for drawing shapes in
* 2D or 3D. This includes vendor-neutral formats like "ps" for PostScript,
* or "svg" for scalable vector graphics. This also includes vendor-specific
* formats, such as "blend" for Blender, "3ds" for 3D Studio Max, or
* "dxf" for Autocad.
*
* The list returned by this method is not exhaustive. Only the most
* common formats are included. Obscure, legacy, narrowly-used, or
* vendor-specific extensions are not included.
*
* @return string[]
* Returns a list of extensions, without dots.
*
* @see ::getImageExtensions()
*/
public static function getDrawingExtensions() {
return [
// General, web.
'svg',
// General, printing.
'ps', 'eps', 'ppd',
// Vendor neutral.
'dae', 'odg', 'stl',
// Vendor-specific.
'ai', 'dwf', 'dxf', 'blend', '3ds',
];
}
/**
* Returns common file name extensions for binary executable files.
*
* An executable file contains a compiled program that can be executed
* by the OS. The most common of these is a Windows "exe" file. On Linux,
* BSD, and macOS, however, executable programs do not have a specific
* extension.
*
* The list returned by this method is not exhaustive. Only the most
* common formats are included. Obscure, legacy, narrowly-used, or
* vendor-specific extensions are not included.
*
* @return string[]
* Returns a list of extensions, without dots.
*
* @see ::getScriptExtensions()
* @see ::getSoftwareExtensions()
*/
public static function getExecutableExtensions() {
return [
// OS-specific, macOS UI application (but not command-line programs).
'app',
// OS-specific, Windows and DOS.
'exe',
];
}
/**
* Returns common file name extensions for image files.
*
* An image file contains a representation of a rectangular grid of
* raster pixels. Typical image formats on the web include "png",
* "jpeg", and "gif".
*
* The list returned by this method is not exhaustive. Only the most
* common formats are included. Obscure, legacy, narrowly-used, or
* vendor-specific extensions are not included.
*
* @return string[]
* Returns a list of extensions, without dots.
*
* @see ::getDrawingExtensions()
*/
public static function getImageExtensions() {
return [
// General.
'png', 'gif', 'ico', 'webp',
// JPEG and JPEG2000.
'jpg', 'jpeg', 'jp2', 'j2k', 'jpf', 'jpx', 'jpm',
// TIFF.
'tif', 'tiff',
// OS-specific, Linux, BSD, macOS (generally).
'ppm', 'pbm', 'pgm', 'pnm', 'xbm', 'xpm',
// OS-specific, Windows.
'bmp', 'pcx', 'tga',
// Other.
'fits', 'psd',
];
}
/**
* Returns common file name extensions for command script files.
*
* A command script file contains a list of instructions for running
* commands on an OS. This includes command, batch, and shell script files,
* but not scripting programming languages, like perl or PHP, which
* require a separate interpreter that is not typically considered part of
* a core OS distribution.
*
* The list returned by this method is not exhaustive. Only the most
* common formats are included. Obscure, legacy, narrowly-used, or
* vendor-specific extensions are not included.
*
* @return string[]
* Returns a list of extensions, without dots.
*
* @see ::getExecutableExtensions()
* @see ::getSoftwareExtensions()
*/
public static function getScriptExtensions() {
return [
// OS-specific, Linux, BSD, and macOS.
'cgi', 'sh', 'bash', 'csh', 'shar',
// OS-specific, Windows and DOS.
'bat', 'cmd', 'com',
];
}
/**
* Returns common file name extensions for software files.
*
* A software file contains the source code or intermediate code used during
* software development. This can include assembly, object files, class files,
* and libraries, along with source code for specific programming languages.
*
* The list returned by this method is not exhaustive. Only the most
* common formats are included. Obscure, legacy, narrowly-used, or
* vendor-specific extensions are not included. For instance, the various
* legacy Flash and Shockwave formats are not included (e.g. "fla", "flv",
* "f4v", "swf").
*
* @return string[]
* Returns a list of extensions, without dots.
*
* @see ::getExecutableExtensions()
*/
public static function getSoftwareExtensions() {
return [
// Assembly.
'asm', 's',
// Basic.
'b',
// C, C++, C#.
'c', 'c++', 'cp', 'cpp', 'cxx', 'cs', 'csx',
'h', 'hpp', 'inc', 'include',
// Cobol.
'cbl',
// ECMAscript (a.k.a. Javascript or LiveScript).
'js',
// Fortran.
'f',
// Java.
'class', 'java', 'jsp',
// Objective-C.
'm',
// Perl.
'pl', 'prl', 'perl',
// PHP.
'php', 'phar',
// PHP, Drupal-specific.
'module', 'install',
// Python.
'py', 'pyc', 'python',
// R.
'r',
// Swift.
'swift',
// General macro processing.
'm4',
// OS-specific, Linux, BSD, and macOS.
'awk', 'sed', 'o', 'make', 'mk', 'cmake', 'ini', 'config',
// OS-specific, Windows.
'cd', 'cs', 'vbp', 'vbproj', 'vbx', 'vcxproj', 'asp',
];
}
/**
* Returns common file name extensions for text files.
*
* A text file contains human-readable text without formatting. Text files
* can be created in any text editor, and viewed without a specific
* document formatting application. This specifically excludes document
* files, such as for Microsoft Office.
*
* The list returned by this method is not exhaustive. Only the most
* common formats are included. Obscure, legacy, narrowly-used, or
* vendor-specific extensions are not included.
*
* @return string[]
* Returns a list of extensions, without dots.
*
* @see ::getDocumentExtensions()
*/
public static function getTextExtensions() {
return [
// General.
'asc', 'ascii', 'text', 'txt',
// OS-specific, Linux, BSD, and macOS (generally).
'readme', '1st',
];
}
/**
* Returns common file name extensions for video files.
*
* A video file contains a sequence of raster images that may be played
* back onto a display to recreate an animation or movie. The most common
* of these are by MPEG ("mp4") and Apple for QuickTime ("qt").
*
* The list returned by this method is not exhaustive. Only the most
* common formats are included. Obscure, legacy, narrowly-used, or
* vendor-specific extensions are not included.
*
* @return string[]
* Returns a list of extensions, without dots.
*
* @see ::getAudioExtensions()
*/
public static function getVideoExtensions() {
return [
// MPEG.
'mp4', 'm4v', 'mpg', 'mpv', 'mpeg',
// OS-specific, Windows.
'avi', 'wmv',
// OS-specific, macOS.
'mov', 'qt',
// Other.
'h264', 'mj2', 'mkv', 'ogv', 'webm',
];
}
/**
* Returns common file name extensions for web files.
*
* This method returns a mix of file formats commonly used on the web.
* This includes formats like "html", "png", and "txt", but excludes
* vendor- or OS-specific formats like those in Microsoft Office.
*
* The list returned by this method is not exhaustive. Only the most
* common formats are included. Obscure, legacy, narrowly-used, or
* vendor-specific extensions are not included.
*
* @return string[]
* Returns a list of extensions, without dots.
*
* @see ::getArchiveExtensions()
* @see ::getAudioExtensions()
* @see ::getDataExtensions()
* @see ::getDocumentExtensions()
* @see ::getDrawingExtensions()
* @see ::getImageExtensions()
* @see ::getSoftwareExtensions()
* @see ::getTextExtensions()
* @see ::getVideoExtensions()
*/
public static function getWebExtensions() {
return [
// Archives.
'zip', 'tar', 'tgz',
// Audio.
'mp3', 'flac', 'ogg', 'wav',
// Data.
'css',
// Document.
'htm', 'html', 'xhtml', 'rss', 'dtd', 'rtf', 'pdf',
// Drawing.
'svg', 'ps', 'eps',
// Image.
'png', 'gif', 'ico', 'jpg', 'jpeg',
// Software.
'js',
// Text.
'text', 'txt',
// Video.
'mp4',
];
}
/*---------------------------------------------------------------------
*
* Methods.
*
*---------------------------------------------------------------------*/
/**
* Returns an array of all file name extensions defined here.
*
* This method returns an exhaustive list of file name extensions for
* common file formats used for archives, audio, data, documents,
* executables, drawings, images, software, text, and videos.
*
* @return string[]
* Returns an array of filename extensions, without leading dots.
* Extensions are unique and sorted alphabetically.
*
* @see ::getArchiveExtensions()
* @see ::getAudioExtensions()
* @see ::getDataExtensions()
* @see ::getDocumentExtensions()
* @see ::getExecutableExtensions()
* @see ::getDrawingExtensions()
* @see ::getImageExtensions()
* @see ::GetScriptExtensions()
* @see ::getSoftwareExtensions()
* @see ::getTextExtensions()
* @see ::getVideoExtensions()
*/
public static function getAllExtensions() {
$merged = array_unique(array_merge(
self::getArchiveExtensions(),
self::getAudioExtensions(),
self::getDataExtensions(),
self::getDocumentExtensions(),
self::getExecutableExtensions(),
self::getDrawingExtensions(),
self::getImageExtensions(),
self::getScriptExtensions(),
self::getSoftwareExtensions(),
self::getTextExtensions(),
self::getVideoExtensions()));
natsort($merged);
return $merged;
}
/**
* Returns an array of all text-oriented file name extensions defined here.
*
* This method returns an abbreviated list of file name extensions for
* files that are primarily text, though the specific format may have some
* binary structure. This includes common file formats for documents,
* software, and text.
*
* @return string[]
* Returns an array of filename extensions, without leading dots.
* Extensions are unique and sorted alphabetically.
*
* @see ::getDocumentExtensions()
* @see ::GetScriptExtensions()
* @see ::getSoftwareExtensions()
* @see ::getTextExtensions()
*/
public static function getAllTextExtensions() {
$merged = array_unique(array_merge(
self::getDocumentExtensions(),
self::getScriptExtensions(),
self::getSoftwareExtensions(),
self::getTextExtensions(),
[
// Misc. formats extracted from the longer lists.
'tar',
'dat',
'yaml', 'yml', 'twig', 'info',
'css', 'less', 'sass', 'scss', 'xsl', 'xsd',
'json',
]));
natsort($merged);
return $merged;
}
/*---------------------------------------------------------------------
*
* Extensions for file and image fields.
*
*---------------------------------------------------------------------*/
/**
* Returns file name extensions allowed for FolderShare files.
*
* The field definition for the 'file' field (which is always the same
* as for the 'image' field) is queried and its current file name extensions
* setting returned. This setting is a single string containing a
* space-separated list of allowed file name extensions. Extensions do
* not include a leading "dot".
*
* File name extensions are always lower case. There are no redundant
* extensions. Extensions in the list are not ordered.
*
* If the list of extensions is empty, then any extension is allowed
* for uploaded and renamed files.
*
* @return string
* Returns a string containing a space-separated list of file
* extensions (without the leading dot) supported for files.
*
* @see ::isNameExtensionAllowed()
* @see ::setAllowedNameExtensions()
* @see \Drupal\foldershare\Settings::getAllowedNameExtensionsDefault()
* @see \Drupal\foldershare\Settings::getAllowedNameExtensions()
*/
public static function getAllowedNameExtensions() {
// Get the extensions string on the 'file' field. These will always be
// the same as on the 'image' field.
$fieldManager = \Drupal::service('entity_field.manager');
$def = $fieldManager->getFieldDefinitions(
FolderShare::ENTITY_TYPE_ID,
FolderShare::ENTITY_TYPE_ID);
return $def['file']->getSetting('file_extensions');
}
/**
* Sets the file name extensions allowed for FolderShare files.
*
* <B>This method is internal and strictly for use by the FolderShare
* module itself.</B>
*
* The field definitions for the 'file' and 'image' fields are changed and
* their current file name extensions settings updated. This setting is a
* single string containing a space-separated list of allowed file name
* extensions. Extensions do not include a leading "dot".
*
* File name extensions are automatically folded to lower case.
* Redundant extensions are removed.
*
* If the list of extensions is empty, then any extension is allowed
* for uploaded and renamed files.
*
* <B>Process locks</B>
* This method does not lock access. The caller should lock around changes
* to the field definition entity.
*
* @param string $extensions
* A string containing a space list of file name extensions
* (without the leading dot) supported for folder files.
*
* @see ::getAllowedNameExtensions()
* @see \Drupal\foldershare\Settings::getAllowedNameExtensionsDefault()
* @see \Drupal\foldershare\Settings::setAllowedNameExtensions()
*/
public static function setAllowedNameExtensions(string $extensions) {
if (empty($extensions) === TRUE) {
// The given extensions list is empty, so no further processing
// is required.
$uniqueExtensions = '';
}
else {
// Fold the entire string to lower case. Then split it into
// individual extensions.
$extList = mb_split(' ', mb_strtolower($extensions));
// Check for and remove any leading dot on extensions.
foreach ($extList as $key => $value) {
if (mb_strpos($value, '.') === 0) {
$extList[$key] = mb_substr($value, 1);
}
}
// Remove redundant extensions and rebuild the list string.
$uniqueExtensions = implode(' ', array_unique($extList));
}
// Set the extensions string on the 'file' and 'image' fields.
$fieldManager = \Drupal::service('entity_field.manager');
$def = $fieldManager->getFieldDefinitions(
FolderShare::ENTITY_TYPE_ID,
FolderShare::ENTITY_TYPE_ID);
$cfd = $def['file']->getConfig(FolderShare::ENTITY_TYPE_ID);
$cfd->setSetting('file_extensions', $uniqueExtensions);
$cfd->save();
$cfd = $def['image']->getConfig(FolderShare::ENTITY_TYPE_ID);
$cfd->setSetting('file_extensions', $uniqueExtensions);
$cfd->save();
}
/*---------------------------------------------------------------------
*
* Extension parsing.
*
*---------------------------------------------------------------------*/
/**
* Returns the lower case file name extension from a file name or path.
*
* @param string $path
* The URI or local path to parse.
*
* @return string
* Returns the file name extension (the part after the last "."),
* converted to lower case. If there is no extension, an empty
* string is returned.
*/
public static function getExtensionFromPath(string $path) {
$ext = pathinfo($path, PATHINFO_EXTENSION);
if (empty($ext) === TRUE) {
return '';
}
return mb_strtolower($ext);
}
/*---------------------------------------------------------------------
*
* Extension testing.
*
*---------------------------------------------------------------------*/
/**
* Returns TRUE if the file name is using an allowed file extension.
*
* <B>This method is internal and strictly for use by the FolderShare
* module itself.</B>
*
* The text following the last '.' in the given file name is extracted
* as the name's extension, then checked against the given array of
* allowed extensions. If the name is found, TRUE is returned.
*
* If the file name has no '.', it has no extension, and TRUE is
* returned.
*
* If the extensions array is empty, all extensions are accepted and
* TRUE is returned.
*
* @param string $path
* The local path to parse.
* @param array $extensions
* (optional, default = NULL) An array of allowed file name extensions.
* If the extensions array is empty, all extensions are allowed. If
* the extensions argument is NULL, the extensions array is retrieved
* from module settings.
*
* @return bool
* Returns TRUE if the name has no extension, the extensions array is
* empty, the module has no list of allowed extensions, or if it uses
* an allowed extension, and FALSE otherwise.
*
* @see ::getAllowedNameExtensions()
*/
public static function isNameExtensionAllowed(
string $path,
array $extensions = NULL) {
$ext = self::getExtensionFromPath($path);
if (empty($ext) === TRUE) {
// No extension. Default to allowed.
return TRUE;
}
if ($extensions === NULL) {
$extensionsString = self::getAllowedNameExtensions();
if (empty($extensionsString) === TRUE) {
// There are no allowed extensions. All extensions are allowed.
return TRUE;
}
$extensions = mb_split(' ', $extensionsString);
}
if (count($extensions) === 0) {
// There are no allowed extensions. All extensions are allowed.
return TRUE;
}
// Look for in allowed extensions array.
return in_array($ext, $extensions);
}
/**
* Returns TRUE if the ZIP file name extension is allowed.
*
* @return bool
* Returns TRUE if it is allowed, and FALSE otherwise.
*/
public static function isZipExtensionAllowed() {
$extensionsString = self::getAllowedNameExtensions();
if (empty($extensionsString) === TRUE) {
// No extension restrictions.
return TRUE;
}
$extensions = mb_split(' ', $extensionsString);
foreach ($extensions as $ext) {
if ($ext === 'zip') {
return TRUE;
}
}
return FALSE;
}
}
