crossword-8.x-1.x-dev/src/CrosswordDataService.php
src/CrosswordDataService.php
<?php
namespace Drupal\crossword;
use Drupal\Component\Utility\Xss;
use Drupal\Core\Cache\CacheBackendInterface;
use Drupal\Core\Extension\ModuleHandler;
use Drupal\Core\Logger\LoggerChannelFactoryInterface;
use Drupal\file\FileInterface;
use Masterminds\HTML5\Parser\UTF8Utils;
/**
* Service to get structured data from Crossword file.
*
* This is the service you should use if you need to get all or part of the
* crossword's structured data. While it's possible to get the data directly
* from a crossword_parser plugin, this service is much easier to use.
* Additionally, this service uses a caching layer and invokes an alter hook.
*/
class CrosswordDataService implements CrosswordDataServiceInterface {
/**
* Cache for the result of the parse function.
*
* @var \Drupal\Core\Cache\CacheBackendInterface
*/
protected $cache;
/**
* The crossword file parser plugin manage.
*
* @var \Drupal\crossword\CrosswordFileParserManager
*/
protected $parserManager;
/**
* The module handler.
*
* @var \Drupal\Core\Extension\ModuleHandler
*/
protected $moduleHandler;
/**
* The Metatag logging channel.
*
* @var \Drupal\Core\Logger\LoggerChannelInterface
*/
protected $logger;
/**
* Construct the Crossword Data Service.
*
* @param Drupal\Core\Cache\CacheBackendInterface $cache
* The cache.
* @param Drupal\crossword\CrosswordFileParserManager $parser_manager
* The crossword parser manager.
* @param Drupal\Core\Extension\ModuleHandler $module_handler
* The module handler.
* @param \Drupal\Core\Logger\LoggerChannelFactoryInterface $channelFactory
* The LoggerChannelFactoryInterface object.
*/
public function __construct(CacheBackendInterface $cache, CrosswordFileParserManager $parser_manager, ModuleHandler $module_handler, LoggerChannelFactoryInterface $channelFactory) {
$this->cache = $cache;
$this->parserManager = $parser_manager;
$this->moduleHandler = $module_handler;
$this->logger = $channelFactory->get('crossword');
}
/**
* {@inheritdoc}
*/
public function getData(FileInterface $file, $redacted = FALSE) {
$cached = $this->cache->get($file->id());
if ($cached) {
$data = isset($cached->data['data']) ? $cached->data['data'] : NULL;
}
else {
$parser = $this->parserManager->loadCrosswordFileParserFromInput($file);
if ($parser) {
try {
$data = $parser->parse();
}
catch (CrosswordException $e) {
$this->logger->error('The %parser parser failed parsing file with id %id: %message',
[
'%parser' => $parser->getPluginId(),
'%id' => $file->id(),
'%message' => $e->getMessage(),
]
);
$this->cache->set($file->id(), ['data' => NULL], CacheBackendInterface::CACHE_PERMANENT, $file->getCacheTags());
return NULL;
}
// The remaining lines are independent of file type so not in parser.
$data = $this->addReferences($data);
$data = $this->addSquareMoves($data);
$data = $this->convertCrosswordDataToUtf8($data);
$data = $this->filterXss($data);
$this->moduleHandler->alter('crossword_data', $data, $file);
$this->cache->set($file->id(), ['data' => $data], CacheBackendInterface::CACHE_PERMANENT, $file->getCacheTags());
}
else {
$this->cache->set($file->id(), ['data' => NULL], CacheBackendInterface::CACHE_PERMANENT, $file->getCacheTags());
}
}
if (isset($data)) {
if ($redacted) {
return $this->redact($data);
}
else {
return $data;
}
}
return NULL;
}
/**
* {@inheritdoc}
*/
public function redact(array $data) {
foreach ($data['puzzle']['grid'] as &$row) {
foreach ($row as &$square) {
if ($square['fill'] !== NULL && !isset($square['hint'])) {
$square['fill'] = '';
$square['rebus'] = FALSE;
}
}
}
return $data;
}
/**
* {@inheritdoc}
*/
public function getAuthor(FileInterface $file) {
$data = $this->getData($file);
if ($data && isset($data['author'])) {
return $data['author'];
}
}
/**
* {@inheritdoc}
*/
public function getDimensionAcross(FileInterface $file) {
$data = $this->getData($file);
if ($data && isset($data['puzzle']['grid'][0]) && is_array($data['puzzle']['grid'][0])) {
return count($data['puzzle']['grid'][0]);
}
}
/**
* {@inheritdoc}
*/
public function getDimensionDown(FileInterface $file) {
$data = $this->getData($file);
if ($data && isset($data['puzzle']['grid']) && is_array($data['puzzle']['grid'])) {
return count($data['puzzle']['grid']);
}
}
/**
* {@inheritdoc}
*/
public function getDimensions(FileInterface $file, $delimiter = 'x') {
$down = $this->getDimensionDown($file);
if ($down) {
$across = $this->getDimensionAcross($file);
if ($across) {
return $down . $delimiter . $across;
}
}
}
/**
* {@inheritdoc}
*/
public function getTitle(FileInterface $file) {
$data = $this->getData($file);
if ($data && isset($data['title'])) {
return $data['title'];
}
}
/**
* {@inheritdoc}
*/
public function getSolution(FileInterface $file, $black = '') {
$data = $this->getData($file);
if ($data) {
$solution = [];
foreach ($data['puzzle']['grid'] as $row_index => $row) {
$solution[] = [];
foreach ($row as $square) {
$solution[$row_index][] = ($square['fill'] !== NULL) ? $square['fill'] : $black;
}
}
return $solution;
}
}
/**
* {@inheritdoc}
*/
public function isRebus(FileInterface $file) {
$data = $this->getData($file);
if ($data && isset($data['puzzle']['grid'])) {
foreach ($data['puzzle']['grid'] as $row) {
foreach ($row as $square) {
if ($square['rebus']) {
return TRUE;
}
}
}
}
return FALSE;
}
/**
* For each clue, adds references in standard form.
*
* Array(
* [
* 'dir' => 'down' or 'across',
* 'numeral' => number,
* 'index' => number,
* ],
* )
*
* @param array $data
* The parsed crossword data.
*
* @return array
* The updated data.
*/
protected function addReferences(array $data) {
$clues = &$data['puzzle']['clues'];
foreach ($clues['down'] as &$down_clue) {
$down_clue['references'] = $this->findReferences($down_clue['text']);
if (!empty($down_clue['references'])) {
foreach ($down_clue['references'] as &$reference) {
if ($reference['dir'] == 'starred') {
$starred_references = [];
$dirs = ['across', 'down'];
foreach ($dirs as $dir) {
foreach ($clues[$dir] as $index => $clue) {
if (substr_count($clue['text'], '*')) {
$starred_references[] = [
'dir' => $dir,
'numeral' => $clue['numeral'],
'index' => $index,
];
}
}
}
$down_clue['references'] = $starred_references;
break;
}
foreach ($clues[$reference['dir']] as $index => $clue) {
if ($clue['numeral'] == $reference['numeral']) {
$reference['index'] = $index;
break;
}
}
}
}
}
foreach ($clues['across'] as &$across_clue) {
$across_clue['references'] = $this->findReferences($across_clue['text']);
if (!empty($across_clue['references'])) {
foreach ($across_clue['references'] as &$reference) {
if ($reference['dir'] == 'starred') {
$starred_references = [];
$dirs = ['across', 'down'];
foreach ($dirs as $dir) {
foreach ($clues[$dir] as $index => $clue) {
if (substr_count($clue['text'], '*')) {
$starred_references[] = [
'dir' => $dir,
'numeral' => $clue['numeral'],
'index' => $index,
];
}
}
}
$across_clue['references'] = $starred_references;
break;
}
foreach ($clues[$reference['dir']] as $index => $clue) {
if ($clue['numeral'] == $reference['numeral']) {
$reference['index'] = $index;
break;
}
}
}
}
}
return $data;
}
/**
* Returns an array representing clues referenced in the input text.
*
* If the text of a clue is something like "Common feature of 12- and
* 57-Across and 34-Down", the return value will be:
*
* Array(
* [
* 'dir' => 'across',
* 'numeral' => 12,
* ],
* [
* 'dir' => 'across',
* 'numeral' => 57,
* ],
* [
* 'dir' => 'down',
* 'numeral' => 34,
* ],
* )
*
* @param string $text
* The text of the clue to be parsed for references.
*
* @return array
* An array representing any clues to which a reference was found in $text.
*/
protected function findReferences($text) {
$references = [];
// Convert to uppercase to reduce possible variations.
$text = strtoupper($text);
// Find references.
// Pattern NYT Style: Common feature of 12- and 57-Across and 34-Down.
$refRegex = '/(\d+\-)|(DOWN)|(ACROSS)/';
if (preg_match('/(\d+\-)/', $text) === 1 && preg_match('/(ACROSS)|(DOWN)/', $text) === 1) {
// There's likely a reference. But if Across or Down appears more than
// once, this pattern is too complicated.
if (substr_count($text, 'ACROSS') <= 1 && substr_count($text, 'DOWN') <= 1) {
$matches = [];
preg_match_all($refRegex, $text, $matches);
// Something like [13- , 23- , Across, 45-, Down].
$matches = $matches[0];
$across_index = array_search("ACROSS", $matches);
$down_index = array_search("DOWN", $matches);
if ($across_index === FALSE) {
// Just down references.
$i = 0;
while ($i < $down_index) {
$ref_num = str_replace("-", "", $matches[$i]);
$references[] = [
'dir' => 'down',
'numeral' => $ref_num,
];
$i++;
}
}
if ($down_index === FALSE) {
// Just across references.
$i = 0;
while ($i < $across_index) {
$ref_num = str_replace("-", "", $matches[$i]);
$references[] = [
'dir' => 'across',
'numeral' => $ref_num,
];
$i++;
}
}
if ($across_index > -1 && $down_index > -1) {
// Assume Across references are first, as they should be
// across.
$i = 0;
while ($i < $across_index) {
$ref_num = str_replace("-", "", $matches[$i]);
$references[] = [
'dir' => 'across',
'numeral' => $ref_num,
];
$i++;
}
// Now down. We have to move past the acrossIndex.
$i = $across_index + 1;
while ($i < $down_index) {
$ref_num = str_replace("-", "", $matches[$i]);
$references[] = [
'dir' => 'down',
'numeral' => $ref_num,
];
$i++;
}
}
}
}
// Pattern verbose: Common feature of 12-Across, 57-Across and 34-Down.
// Pattern medium: Common feature of 12-A, 57-A and 34-D.
$refRegex = '/(\d+\-D)|(\d+\-A)/';
if (preg_match($refRegex, $text) === 1) {
// there's likely a reference.
$matches = [];
preg_match_all($refRegex, $text, $matches);
// Something like [13-A , 23-D , 5-A, 45-D].
$matches = $matches[0];
foreach ($matches as $match) {
$references[] = [
'dir' => substr($match, -1, 1) == 'A' ? 'across' : 'down',
'numeral' => substr($match, 0, strlen($match) - 2),
];
}
}
// Pattern short: Common feature of 12A, 57A and 34D.
$refRegex = '/(\d+D)|(\d+A)/';
if (preg_match($refRegex, $text) === 1) {
// there's likely a reference.
$matches = [];
preg_match_all($refRegex, $text, $matches);
// Something like [13A , 23D , 5A, 45D].
$matches = $matches[0];
foreach ($matches as $match) {
$references[] = [
'dir' => substr($match, -1, 1) === 'A' ? 'across' : 'down',
'numeral' => substr($match, 0, strlen($match) - 1),
];
}
}
// Pattern starred: "Starred clues" refers to any clue with asterisk.
if (strpos($text, 'STARRED CLUES')) {
$references[] = ['dir' => 'starred'];
}
// Some patterns will make duplicates, so remove those.
if (empty($references)) {
return NULL;
}
else {
return array_values(array_unique($references, SORT_REGULAR));
}
}
/**
* Adds a 'moves' element to all the squares in the grid.
*
* This tells the arrow keys what to do when the puzzle is rendered.
* This moves through black squares but stops at edges.
*
* @param array $data
* The parsed crossword data.
*
* @return array
* The updated data.
*/
protected function addSquareMoves($data) {
$grid = &$data['puzzle']['grid'];
$number_of_rows = count($grid);
$number_of_columns = count($grid[0]);
foreach ($grid as $row_index => $row) {
foreach ($row as $col_index => $square) {
$grid[$row_index][$col_index]['moves'] = [
'up' => NULL,
'down' => NULL,
'left' => NULL,
'right' => NULL,
];
if (isset($grid[$row_index][$col_index]['fill'])) {
// Up.
$up = 1;
while ($row_index - $up >= 0) {
if (isset($grid[$row_index - $up][$col_index]['fill'])) {
$grid[$row_index][$col_index]['moves']['up'] = [
'row' => $row_index - $up,
'col' => $col_index,
];
break;
}
$up++;
}
// Down.
$down = 1;
while ($row_index + $down < $number_of_rows) {
if (isset($grid[$row_index + $down][$col_index]['fill'])) {
$grid[$row_index][$col_index]['moves']['down'] = [
'row' => $row_index + $down,
'col' => $col_index,
];
break;
}
$down++;
}
// Left.
$left = 1;
while ($col_index - $left >= 0) {
if (isset($grid[$row_index][$col_index - $left]['fill'])) {
$grid[$row_index][$col_index]['moves']['left'] = [
'row' => $row_index,
'col' => $col_index - $left,
];
break;
}
$left++;
}
// Right.
$right = 1;
while ($col_index + $right < $number_of_columns) {
if (isset($grid[$row_index][$col_index + $right]['fill'])) {
$grid[$row_index][$col_index]['moves']['right'] = [
'row' => $row_index,
'col' => $col_index + $right,
];
break;
}
$right++;
}
}
}
}
return $data;
}
/**
* Convert text endcoding to UTF-8 so that data works with drupalSettings.
*
* If a string not utf-8 encoded, then some special characters can make
* json_encode() return FALSE. That causes problems if data is being passed
* to drupalSettings as happens in the CrosswordFormatter plugin.
*
* @param array $data
* Associative array that represents the crossword with unknown encoding.
*
* @return array
* Associative array that represents the crossword with text UTF-8 encoded.
*
* @see https://www.drupal.org/project/crossword/issues/3102647
*/
protected function convertCrosswordDataToUtf8(array $data) {
$encodings_array = [
'UTF-8',
'Windows-1252',
'ISO-8859-1',
];
$encodings_string = implode(', ', $encodings_array);
$encoding = mb_detect_encoding($data['title'] ?? '', $encodings_string);
$data['title'] = UTF8Utils::convertToUTF8($data['title'] ?? '', $encoding);
$encoding = mb_detect_encoding($data['author'] ?? '', $encodings_string);
$data['author'] = UTF8Utils::convertToUTF8($data['author'] ?? '', $encoding);
$encoding = mb_detect_encoding($data['notepad'] ?? '', $encodings_string);
$data['notepad'] = UTF8Utils::convertToUTF8($data['notepad'] ?? '', $encoding);
foreach ($data['puzzle']['grid'] as $row_index => $row) {
foreach ($row as $col_index => $square) {
if (!empty($square['fill'])) {
$encoding = mb_detect_encoding($square['fill'], $encodings_string);
$data['puzzle']['grid'][$row_index][$col_index]['fill'] = UTF8Utils::convertToUTF8($square['fill'], $encoding);
}
}
}
foreach ($data['puzzle']['clues']['across'] as &$clue) {
$encoding = mb_detect_encoding($clue['text'] ?? '', $encodings_string);
$clue['text'] = UTF8Utils::convertToUTF8($clue['text'] ?? '', $encoding);
}
foreach ($data['puzzle']['clues']['down'] as &$clue) {
$encoding = mb_detect_encoding($clue['text'] ?? '', $encodings_string);
$clue['text'] = UTF8Utils::convertToUTF8($clue['text'] ?? '', $encoding);
}
return $data;
}
/**
* Filter data for Xss.
*
* This allows some unintrusive tags like a, em, and strong. If you really
* need less restrictive filtering, you'll need to override the
* crossword.data_service service.
*
* @param array $data
* Associative array that represents the crossword with possible xss intent.
*
* @return array
* Associative array that represents the crossword with XSS filtering.
*/
protected function filterXss(array $data) {
$data['title'] = Xss::filter($data['title']);
$data['author'] = Xss::filter($data['author']);
$data['notepad'] = Xss::filter($data['notepad']);
foreach ($data['puzzle']['grid'] as $row_index => $row) {
foreach ($row as $col_index => $square) {
if (!empty($square['fill'])) {
$data['puzzle']['grid'][$row_index][$col_index]['fill'] = Xss::filter($square['fill']);
}
}
}
foreach ($data['puzzle']['clues']['across'] as &$clue) {
$clue['text'] = Xss::filter($clue['text']);
}
foreach ($data['puzzle']['clues']['down'] as &$clue) {
$clue['text'] = Xss::filter($clue['text']);
}
return $data;
}
}
