crossword-8.x-1.x-dev/src/CrosswordDataService.php
src/CrosswordDataService.php
<?php namespace Drupal\crossword; use Drupal\Component\Utility\Xss; use Drupal\Core\Cache\CacheBackendInterface; use Drupal\Core\Extension\ModuleHandler; use Drupal\Core\Logger\LoggerChannelFactoryInterface; use Drupal\file\FileInterface; use Masterminds\HTML5\Parser\UTF8Utils; /** * Service to get structured data from Crossword file. * * This is the service you should use if you need to get all or part of the * crossword's structured data. While it's possible to get the data directly * from a crossword_parser plugin, this service is much easier to use. * Additionally, this service uses a caching layer and invokes an alter hook. */ class CrosswordDataService implements CrosswordDataServiceInterface { /** * Cache for the result of the parse function. * * @var \Drupal\Core\Cache\CacheBackendInterface */ protected $cache; /** * The crossword file parser plugin manage. * * @var \Drupal\crossword\CrosswordFileParserManager */ protected $parserManager; /** * The module handler. * * @var \Drupal\Core\Extension\ModuleHandler */ protected $moduleHandler; /** * The Metatag logging channel. * * @var \Drupal\Core\Logger\LoggerChannelInterface */ protected $logger; /** * Construct the Crossword Data Service. * * @param Drupal\Core\Cache\CacheBackendInterface $cache * The cache. * @param Drupal\crossword\CrosswordFileParserManager $parser_manager * The crossword parser manager. * @param Drupal\Core\Extension\ModuleHandler $module_handler * The module handler. * @param \Drupal\Core\Logger\LoggerChannelFactoryInterface $channelFactory * The LoggerChannelFactoryInterface object. */ public function __construct(CacheBackendInterface $cache, CrosswordFileParserManager $parser_manager, ModuleHandler $module_handler, LoggerChannelFactoryInterface $channelFactory) { $this->cache = $cache; $this->parserManager = $parser_manager; $this->moduleHandler = $module_handler; $this->logger = $channelFactory->get('crossword'); } /** * {@inheritdoc} */ public function getData(FileInterface $file, $redacted = FALSE) { $cached = $this->cache->get($file->id()); if ($cached) { $data = isset($cached->data['data']) ? $cached->data['data'] : NULL; } else { $parser = $this->parserManager->loadCrosswordFileParserFromInput($file); if ($parser) { try { $data = $parser->parse(); } catch (CrosswordException $e) { $this->logger->error('The %parser parser failed parsing file with id %id: %message', [ '%parser' => $parser->getPluginId(), '%id' => $file->id(), '%message' => $e->getMessage(), ] ); $this->cache->set($file->id(), ['data' => NULL], CacheBackendInterface::CACHE_PERMANENT, $file->getCacheTags()); return NULL; } // The remaining lines are independent of file type so not in parser. $data = $this->addReferences($data); $data = $this->addSquareMoves($data); $data = $this->convertCrosswordDataToUtf8($data); $data = $this->filterXss($data); $this->moduleHandler->alter('crossword_data', $data, $file); $this->cache->set($file->id(), ['data' => $data], CacheBackendInterface::CACHE_PERMANENT, $file->getCacheTags()); } else { $this->cache->set($file->id(), ['data' => NULL], CacheBackendInterface::CACHE_PERMANENT, $file->getCacheTags()); } } if (isset($data)) { if ($redacted) { return $this->redact($data); } else { return $data; } } return NULL; } /** * {@inheritdoc} */ public function redact(array $data) { foreach ($data['puzzle']['grid'] as &$row) { foreach ($row as &$square) { if ($square['fill'] !== NULL && !isset($square['hint'])) { $square['fill'] = ''; $square['rebus'] = FALSE; } } } return $data; } /** * {@inheritdoc} */ public function getAuthor(FileInterface $file) { $data = $this->getData($file); if ($data && isset($data['author'])) { return $data['author']; } } /** * {@inheritdoc} */ public function getDimensionAcross(FileInterface $file) { $data = $this->getData($file); if ($data && isset($data['puzzle']['grid'][0]) && is_array($data['puzzle']['grid'][0])) { return count($data['puzzle']['grid'][0]); } } /** * {@inheritdoc} */ public function getDimensionDown(FileInterface $file) { $data = $this->getData($file); if ($data && isset($data['puzzle']['grid']) && is_array($data['puzzle']['grid'])) { return count($data['puzzle']['grid']); } } /** * {@inheritdoc} */ public function getDimensions(FileInterface $file, $delimiter = 'x') { $down = $this->getDimensionDown($file); if ($down) { $across = $this->getDimensionAcross($file); if ($across) { return $down . $delimiter . $across; } } } /** * {@inheritdoc} */ public function getTitle(FileInterface $file) { $data = $this->getData($file); if ($data && isset($data['title'])) { return $data['title']; } } /** * {@inheritdoc} */ public function getSolution(FileInterface $file, $black = '') { $data = $this->getData($file); if ($data) { $solution = []; foreach ($data['puzzle']['grid'] as $row_index => $row) { $solution[] = []; foreach ($row as $square) { $solution[$row_index][] = ($square['fill'] !== NULL) ? $square['fill'] : $black; } } return $solution; } } /** * {@inheritdoc} */ public function isRebus(FileInterface $file) { $data = $this->getData($file); if ($data && isset($data['puzzle']['grid'])) { foreach ($data['puzzle']['grid'] as $row) { foreach ($row as $square) { if ($square['rebus']) { return TRUE; } } } } return FALSE; } /** * For each clue, adds references in standard form. * * Array( * [ * 'dir' => 'down' or 'across', * 'numeral' => number, * 'index' => number, * ], * ) * * @param array $data * The parsed crossword data. * * @return array * The updated data. */ protected function addReferences(array $data) { $clues = &$data['puzzle']['clues']; foreach ($clues['down'] as &$down_clue) { $down_clue['references'] = $this->findReferences($down_clue['text']); if (!empty($down_clue['references'])) { foreach ($down_clue['references'] as &$reference) { if ($reference['dir'] == 'starred') { $starred_references = []; $dirs = ['across', 'down']; foreach ($dirs as $dir) { foreach ($clues[$dir] as $index => $clue) { if (substr_count($clue['text'], '*')) { $starred_references[] = [ 'dir' => $dir, 'numeral' => $clue['numeral'], 'index' => $index, ]; } } } $down_clue['references'] = $starred_references; break; } foreach ($clues[$reference['dir']] as $index => $clue) { if ($clue['numeral'] == $reference['numeral']) { $reference['index'] = $index; break; } } } } } foreach ($clues['across'] as &$across_clue) { $across_clue['references'] = $this->findReferences($across_clue['text']); if (!empty($across_clue['references'])) { foreach ($across_clue['references'] as &$reference) { if ($reference['dir'] == 'starred') { $starred_references = []; $dirs = ['across', 'down']; foreach ($dirs as $dir) { foreach ($clues[$dir] as $index => $clue) { if (substr_count($clue['text'], '*')) { $starred_references[] = [ 'dir' => $dir, 'numeral' => $clue['numeral'], 'index' => $index, ]; } } } $across_clue['references'] = $starred_references; break; } foreach ($clues[$reference['dir']] as $index => $clue) { if ($clue['numeral'] == $reference['numeral']) { $reference['index'] = $index; break; } } } } } return $data; } /** * Returns an array representing clues referenced in the input text. * * If the text of a clue is something like "Common feature of 12- and * 57-Across and 34-Down", the return value will be: * * Array( * [ * 'dir' => 'across', * 'numeral' => 12, * ], * [ * 'dir' => 'across', * 'numeral' => 57, * ], * [ * 'dir' => 'down', * 'numeral' => 34, * ], * ) * * @param string $text * The text of the clue to be parsed for references. * * @return array * An array representing any clues to which a reference was found in $text. */ protected function findReferences($text) { $references = []; // Convert to uppercase to reduce possible variations. $text = strtoupper($text); // Find references. // Pattern NYT Style: Common feature of 12- and 57-Across and 34-Down. $refRegex = '/(\d+\-)|(DOWN)|(ACROSS)/'; if (preg_match('/(\d+\-)/', $text) === 1 && preg_match('/(ACROSS)|(DOWN)/', $text) === 1) { // There's likely a reference. But if Across or Down appears more than // once, this pattern is too complicated. if (substr_count($text, 'ACROSS') <= 1 && substr_count($text, 'DOWN') <= 1) { $matches = []; preg_match_all($refRegex, $text, $matches); // Something like [13- , 23- , Across, 45-, Down]. $matches = $matches[0]; $across_index = array_search("ACROSS", $matches); $down_index = array_search("DOWN", $matches); if ($across_index === FALSE) { // Just down references. $i = 0; while ($i < $down_index) { $ref_num = str_replace("-", "", $matches[$i]); $references[] = [ 'dir' => 'down', 'numeral' => $ref_num, ]; $i++; } } if ($down_index === FALSE) { // Just across references. $i = 0; while ($i < $across_index) { $ref_num = str_replace("-", "", $matches[$i]); $references[] = [ 'dir' => 'across', 'numeral' => $ref_num, ]; $i++; } } if ($across_index > -1 && $down_index > -1) { // Assume Across references are first, as they should be // across. $i = 0; while ($i < $across_index) { $ref_num = str_replace("-", "", $matches[$i]); $references[] = [ 'dir' => 'across', 'numeral' => $ref_num, ]; $i++; } // Now down. We have to move past the acrossIndex. $i = $across_index + 1; while ($i < $down_index) { $ref_num = str_replace("-", "", $matches[$i]); $references[] = [ 'dir' => 'down', 'numeral' => $ref_num, ]; $i++; } } } } // Pattern verbose: Common feature of 12-Across, 57-Across and 34-Down. // Pattern medium: Common feature of 12-A, 57-A and 34-D. $refRegex = '/(\d+\-D)|(\d+\-A)/'; if (preg_match($refRegex, $text) === 1) { // there's likely a reference. $matches = []; preg_match_all($refRegex, $text, $matches); // Something like [13-A , 23-D , 5-A, 45-D]. $matches = $matches[0]; foreach ($matches as $match) { $references[] = [ 'dir' => substr($match, -1, 1) == 'A' ? 'across' : 'down', 'numeral' => substr($match, 0, strlen($match) - 2), ]; } } // Pattern short: Common feature of 12A, 57A and 34D. $refRegex = '/(\d+D)|(\d+A)/'; if (preg_match($refRegex, $text) === 1) { // there's likely a reference. $matches = []; preg_match_all($refRegex, $text, $matches); // Something like [13A , 23D , 5A, 45D]. $matches = $matches[0]; foreach ($matches as $match) { $references[] = [ 'dir' => substr($match, -1, 1) === 'A' ? 'across' : 'down', 'numeral' => substr($match, 0, strlen($match) - 1), ]; } } // Pattern starred: "Starred clues" refers to any clue with asterisk. if (strpos($text, 'STARRED CLUES')) { $references[] = ['dir' => 'starred']; } // Some patterns will make duplicates, so remove those. if (empty($references)) { return NULL; } else { return array_values(array_unique($references, SORT_REGULAR)); } } /** * Adds a 'moves' element to all the squares in the grid. * * This tells the arrow keys what to do when the puzzle is rendered. * This moves through black squares but stops at edges. * * @param array $data * The parsed crossword data. * * @return array * The updated data. */ protected function addSquareMoves($data) { $grid = &$data['puzzle']['grid']; $number_of_rows = count($grid); $number_of_columns = count($grid[0]); foreach ($grid as $row_index => $row) { foreach ($row as $col_index => $square) { $grid[$row_index][$col_index]['moves'] = [ 'up' => NULL, 'down' => NULL, 'left' => NULL, 'right' => NULL, ]; if (isset($grid[$row_index][$col_index]['fill'])) { // Up. $up = 1; while ($row_index - $up >= 0) { if (isset($grid[$row_index - $up][$col_index]['fill'])) { $grid[$row_index][$col_index]['moves']['up'] = [ 'row' => $row_index - $up, 'col' => $col_index, ]; break; } $up++; } // Down. $down = 1; while ($row_index + $down < $number_of_rows) { if (isset($grid[$row_index + $down][$col_index]['fill'])) { $grid[$row_index][$col_index]['moves']['down'] = [ 'row' => $row_index + $down, 'col' => $col_index, ]; break; } $down++; } // Left. $left = 1; while ($col_index - $left >= 0) { if (isset($grid[$row_index][$col_index - $left]['fill'])) { $grid[$row_index][$col_index]['moves']['left'] = [ 'row' => $row_index, 'col' => $col_index - $left, ]; break; } $left++; } // Right. $right = 1; while ($col_index + $right < $number_of_columns) { if (isset($grid[$row_index][$col_index + $right]['fill'])) { $grid[$row_index][$col_index]['moves']['right'] = [ 'row' => $row_index, 'col' => $col_index + $right, ]; break; } $right++; } } } } return $data; } /** * Convert text endcoding to UTF-8 so that data works with drupalSettings. * * If a string not utf-8 encoded, then some special characters can make * json_encode() return FALSE. That causes problems if data is being passed * to drupalSettings as happens in the CrosswordFormatter plugin. * * @param array $data * Associative array that represents the crossword with unknown encoding. * * @return array * Associative array that represents the crossword with text UTF-8 encoded. * * @see https://www.drupal.org/project/crossword/issues/3102647 */ protected function convertCrosswordDataToUtf8(array $data) { $encodings_array = [ 'UTF-8', 'Windows-1252', 'ISO-8859-1', ]; $encodings_string = implode(', ', $encodings_array); $encoding = mb_detect_encoding($data['title'] ?? '', $encodings_string); $data['title'] = UTF8Utils::convertToUTF8($data['title'] ?? '', $encoding); $encoding = mb_detect_encoding($data['author'] ?? '', $encodings_string); $data['author'] = UTF8Utils::convertToUTF8($data['author'] ?? '', $encoding); $encoding = mb_detect_encoding($data['notepad'] ?? '', $encodings_string); $data['notepad'] = UTF8Utils::convertToUTF8($data['notepad'] ?? '', $encoding); foreach ($data['puzzle']['grid'] as $row_index => $row) { foreach ($row as $col_index => $square) { if (!empty($square['fill'])) { $encoding = mb_detect_encoding($square['fill'], $encodings_string); $data['puzzle']['grid'][$row_index][$col_index]['fill'] = UTF8Utils::convertToUTF8($square['fill'], $encoding); } } } foreach ($data['puzzle']['clues']['across'] as &$clue) { $encoding = mb_detect_encoding($clue['text'] ?? '', $encodings_string); $clue['text'] = UTF8Utils::convertToUTF8($clue['text'] ?? '', $encoding); } foreach ($data['puzzle']['clues']['down'] as &$clue) { $encoding = mb_detect_encoding($clue['text'] ?? '', $encodings_string); $clue['text'] = UTF8Utils::convertToUTF8($clue['text'] ?? '', $encoding); } return $data; } /** * Filter data for Xss. * * This allows some unintrusive tags like a, em, and strong. If you really * need less restrictive filtering, you'll need to override the * crossword.data_service service. * * @param array $data * Associative array that represents the crossword with possible xss intent. * * @return array * Associative array that represents the crossword with XSS filtering. */ protected function filterXss(array $data) { $data['title'] = Xss::filter($data['title']); $data['author'] = Xss::filter($data['author']); $data['notepad'] = Xss::filter($data['notepad']); foreach ($data['puzzle']['grid'] as $row_index => $row) { foreach ($row as $col_index => $square) { if (!empty($square['fill'])) { $data['puzzle']['grid'][$row_index][$col_index]['fill'] = Xss::filter($square['fill']); } } } foreach ($data['puzzle']['clues']['across'] as &$clue) { $clue['text'] = Xss::filter($clue['text']); } foreach ($data['puzzle']['clues']['down'] as &$clue) { $clue['text'] = Xss::filter($clue['text']); } return $data; } }