feeds_ex-8.x-1.0-alpha4/src/Encoder/TextEncoder.php
src/Encoder/TextEncoder.php
<?php
namespace Drupal\feeds_ex\Encoder;
use Drupal\Component\Utility\Unicode;
use Drupal\Core\Form\FormStateInterface;
use Drupal\Core\StringTranslation\StringTranslationTrait;
/**
* Generic text encoder.
*/
class TextEncoder implements EncoderInterface {
use StringTranslationTrait;
/**
* Whether the current system handles mb_* functions.
*
* @var bool
*/
protected $isMultibyte = FALSE;
/**
* The set of encodings compatible with UTF-8.
*
* @var array
*/
protected static $utf8Compatible = ['utf-8', 'utf8', 'us-ascii', 'ascii'];
/**
* The list of encodings to search for.
*
* @var array
*/
protected $encodingList;
/**
* {@inheritdoc}
*/
public function __construct(array $encoding_list) {
$this->encodingList = $encoding_list;
$this->isMultibyte = Unicode::getStatus() == Unicode::STATUS_MULTIBYTE;
}
/**
* {@inheritdoc}
*/
public function convertEncoding($data) {
if (!$detected = $this->detectEncoding($data)) {
return $data;
}
return $this->doConvert($data, $detected);
}
/**
* {@inheritdoc}
*/
public function buildConfigurationForm(array $form, FormStateInterface $form_state) {
if (!$this->isMultibyte) {
return $form;
}
$args = ['%encodings' => implode(', ', mb_detect_order())];
$form['source_encoding'] = [
'#type' => 'textfield',
'#title' => $this->t('Source encoding'),
'#description' => $this->t('The possible encodings of the source files. auto: %encodings', $args),
'#default_value' => implode(', ', $this->encodingList),
'#autocomplete_route_name' => 'feeds_ex.encoding_autocomplete',
'#maxlength' => 1024,
];
return $form;
}
/**
* {@inheritdoc}
*/
public function validateConfigurationForm(array &$form, FormStateInterface $form_state) {
if (!$this->isMultibyte) {
return;
}
$values =& $form_state->getValues();
// Normalize encodings. Make them exactly as they are defined in
// mb_list_encodings(), but maintain user-defined order.
$encodings = array_map('mb_strtolower', array_map('trim', explode(',', $values['source_encoding'])));
$values['source_encoding'] = [];
foreach (mb_list_encodings() as $encoding) {
// Maintain order.
$pos = array_search(mb_strtolower($encoding), $encodings);
if ($pos !== FALSE) {
$values['source_encoding'][$pos] = $encoding;
}
}
ksort($values['source_encoding']);
// Re-index array to avoid config getting saved with numerical keys in the
// yml file.
$values['source_encoding'] = array_values($values['source_encoding']);
// Make sure there's some value set.
if (!$values['source_encoding']) {
$values['source_encoding'][] = 'auto';
}
}
/**
* Detects the encoding of a string.
*
* @param string $data
* The string to guess the encoding for.
*
* @return string|bool
* Returns the encoding, or false if one could not be detected.
*/
protected function detectEncoding($data) {
if (!$this->isMultibyte) {
return FALSE;
}
if ($detected = mb_detect_encoding($data, $this->encodingList, TRUE)) {
return $detected;
}
return mb_detect_encoding($data, $this->encodingList);
}
/**
* Performs the actual encoding conversion.
*
* @param string $data
* The data to convert.
* @param string $source_encoding
* The detected encoding.
*
* @return string
* The encoded string.
*/
protected function doConvert($data, $source_encoding) {
if (in_array(strtolower($source_encoding), self::$utf8Compatible)) {
return $data;
}
$converted = Unicode::convertToUtf8($data, $source_encoding);
if ($converted === FALSE) {
return $data;
}
return $converted;
}
}
