migrate_plus-8.x-5.x-dev/src/Plugin/migrate_plus/data_parser/Json.php
src/Plugin/migrate_plus/data_parser/Json.php
<?php
declare(strict_types=1);
namespace Drupal\migrate_plus\Plugin\migrate_plus\data_parser;
use Drupal\Component\Utility\UrlHelper;
use Drupal\Core\Url;
use Drupal\Core\Plugin\ContainerFactoryPluginInterface;
use Drupal\migrate\MigrateException;
use Drupal\migrate_plus\DataParserPluginBase;
/**
* Obtain JSON data for migration.
*
* @DataParser(
* id = "json",
* title = @Translation("JSON")
* )
*/
class Json extends DataParserPluginBase implements ContainerFactoryPluginInterface {
/**
* Iterator over the JSON data.
*/
protected ?\ArrayIterator $iterator = NULL;
/**
* The currently saved source url (as a string).
*
* @var string
*/
protected $currentUrl;
/**
* The active url's source data.
*
* @var array
*/
protected $sourceData;
/**
* Retrieves the JSON data and returns it as an array.
*
* @param string $url
* URL of a JSON feed.
* @param string|int $item_selector
* Selector within the data content at which useful data is found.
*
* @throws \GuzzleHttp\Exception\RequestException
*/
protected function getSourceData(string $url, string|int $item_selector = '') {
// Use cached source data if this is the first request or URL is same as the
// last time we made the request.
if ($this->currentUrl != $url || !$this->sourceData) {
$response = $this->getDataFetcherPlugin()->getResponseContent($url);
// Convert objects to associative arrays.
$this->sourceData = json_decode($response, TRUE);
// If json_decode() has returned NULL, it might be that the data isn't
// valid utf8 - see:
// http://php.net/manual/en/function.json-decode.php#86997.
if (!$this->sourceData) {
$utf8response = mb_convert_encoding($response, 'UTF-8');
$this->sourceData = json_decode($utf8response, TRUE);
}
$this->currentUrl = $url;
}
// Backwards-compatibility for depth selection.
if (is_numeric($this->itemSelector)) {
return $this->selectByDepth($this->sourceData, (int) $item_selector);
}
// If the item_selector is an empty string, return all.
if ($item_selector === '') {
return $this->sourceData;
}
// Otherwise, we're using xpath-like selectors.
$selectors = explode('/', trim($item_selector, '/'));
$return = $this->sourceData;
foreach ($selectors as $selector) {
// If the item_selector is missing, return an empty array.
if (!isset($return[$selector])) {
return [];
}
$return = $return[$selector];
}
return $return;
}
/**
* Get the source data for reading.
*
* @param array $raw_data
* Raw data from the JSON feed.
* @param int $item_selector
* Depth within the data content at which useful data is found.
*
* Selected items at the requested depth of the JSON feed.
*/
protected function selectByDepth(array $raw_data, int $item_selector = 0): array {
// Return the results in a recursive iterator that can traverse
// multidimensional arrays.
$iterator = new \RecursiveIteratorIterator(
new \RecursiveArrayIterator($raw_data),
\RecursiveIteratorIterator::SELF_FIRST);
$items = [];
// Backwards-compatibility - an integer item_selector is interpreted as a
// depth. When there is an array of items at the expected depth, pull that
// array out as a distinct item.
$identifierDepth = $item_selector;
$iterator->rewind();
while ($iterator->valid()) {
$item = $iterator->current();
if (is_array($item) && $iterator->getDepth() === $identifierDepth) {
$items[] = $item;
}
$iterator->next();
}
return $items;
}
/**
* {@inheritdoc}
*/
protected function openSourceUrl(string $url): bool {
// (Re)open the provided URL.
$source_data = $this->getSourceData($url, $this->itemSelector);
// Ensure there is source data at the current url.
if (is_null($source_data)) {
return FALSE;
}
$this->iterator = new \ArrayIterator($source_data);
return TRUE;
}
/**
* {@inheritdoc}
*/
protected function fetchNextRow(): void {
$current = $this->iterator->current();
if (is_array($current)) {
foreach ($this->fieldSelectors() as $field_name => $selector) {
$field_data = $current;
$field_selectors = explode('/', trim((string) $selector, '/'));
foreach ($field_selectors as $field_selector) {
if (is_array($field_data) && array_key_exists($field_selector, $field_data)) {
$field_data = $field_data[$field_selector];
}
else {
$field_data = '';
}
}
$this->currentItem[$field_name] = $field_data;
}
if (!empty($this->configuration['include_raw_data'])) {
$this->currentItem['raw'] = $current;
}
$this->iterator->next();
}
}
/**
* {@inheritdoc}
*/
protected function getNextUrls(string $url): array {
$next_urls = [];
// If a pager selector is provided, get the data from the source.
$selector_data = NULL;
if (!empty($this->configuration['pager']['selector'])) {
$selector_data = $this->getSourceData($url, $this->configuration['pager']['selector']);
}
// Logic for each type of pager.
switch ($this->configuration['pager']['type']) {
case 'urls':
if (NULL !== $selector_data) {
if (is_array($selector_data)) {
$next_urls = $selector_data;
}
elseif (filter_var($selector_data, FILTER_VALIDATE_URL)) {
$next_urls[] = $selector_data;
}
}
return $next_urls;
case 'cursor':
if (NULL !== $selector_data && is_scalar($selector_data)) {
// Just use 'cursor' as a default parameter key if not provided.
$key = !empty($this->configuration['pager']['key']) ? $this->configuration['pager']['key'] : 'cursor';
// Parse the url, replace the cursor param value, and rebuild the url.
$path = UrlHelper::parse($url);
$path['query'][$key] = $selector_data;
$next_urls[] = Url::fromUri($path['path'], [
'query' => $path['query'],
'fragment' => $path['fragment'],
])->toString();
}
return $next_urls;
case 'page':
if (NULL !== $selector_data && is_scalar($selector_data)) {
// Just use 'page' as a default parameter key if not provided.
$key = !empty($this->configuration['pager']['key']) ? $this->configuration['pager']['key'] : 'page';
// Define the max page to generate.
$max = $selector_data + 1;
if (!empty($this->configuration['pager']['selector_max'])) {
$max = $this->getSourceData($url, $this->configuration['pager']['selector_max']);
}
// Parse the url and replace the page param value and rebuild the url.
$path = UrlHelper::parse($url);
for ($page = $selector_data + 1; $page < $max; ++$page) {
$path['query'][$key] = $page;
$next_urls[] = Url::fromUri($path['path'], [
'query' => $path['query'],
'fragment' => $path['fragment'],
])->toString();
}
}
return $next_urls;
case 'paginator':
// The first pass uses the endpoint's default size.
// @todo Handle first URL set page size on first pass.
if (!isset($this->configuration['pager']['default_num_items'])) {
throw new MigrateException('Pager "default_num_items" must be configured.');
}
$num_items = $this->configuration['pager']['default_num_items'];
// Use 'page' as a default page parameter key if not provided.
$page_key = !empty($this->configuration['pager']['page_key']) ? $this->configuration['pager']['page_key'] : 'page';
// Set default paginator type.
$paginator_type_options = ['page_number', 'starting_item'];
$paginator_type = $paginator_type_options[0];
// Check configured paginator type.
if (!empty($this->configuration['pager']['paginator_type'])) {
if (!in_array($this->configuration['pager']['paginator_type'], $paginator_type_options)) {
// Not set to one of the two available options.
throw new MigrateException(
'Pager "paginator_type" must be configured as either "page_number" or "starting_item" ("page_number" is default).'
);
}
$paginator_type = $this->configuration['pager']['paginator_type'];
}
// Use 'pagesize' as a default page parameter key if not provided.
$size_key = !empty($this->configuration['pager']['size_key']) ? $this->configuration['pager']['size_key'] : 'pagesize';
// Parse the url.
$path = UrlHelper::parse($url);
$curr_page = !empty($path['query'][$page_key]) ? $path['query'][$page_key] : 0;
// @todo Use core's QueryBase and pager.
// @see contrib module external_entities \Entity\Query\External\Query.php for example.
$next_start = $curr_page + $num_items;
$next_end = $num_items;
// Use "page_number" when the pager uses page numbers to determine
// the item to start at, use "starting_item" when the pager uses the
// item number to start at.
if ($paginator_type === 'page_number') {
$next_start = $curr_page + 1;
}
// Replace the paginator param value.
$path['query'][$page_key] = $next_start;
// Replace the size param value.
$path['query'][$size_key] = $next_end;
// If we have a selector that tells us the number of rows returned in
// the current request, use that to decide if we should add the next
// url to the array.
if (NULL !== $selector_data) {
if (is_scalar($selector_data)) {
// If we have a numeric number of rows and the current page is still
// a full page (i.e. the number of items, $selector_data, in this
// page equals the number of items configured, $num_items), advance
// to the next page.
if ($selector_data == $num_items) {
$next_urls[] = Url::fromUri($path['path'], [
'query' => $path['query'],
'fragment' => $path['fragment'],
])->toString();
}
}
else {
// If we have an array of rows.
if (count($selector_data) > 0) {
$next_urls[] = Url::fromUri($path['path'], [
'query' => $path['query'],
'fragment' => $path['fragment'],
])->toString();
}
}
}
else {
// Rebuild the url.
$next_urls[] = Url::fromUri($path['path'], [
'query' => $path['query'],
'fragment' => $path['fragment'],
])->toString();
// Service may return 404 for last page, ensure next_urls are valid.
foreach ($next_urls as $key => $next_url) {
try {
$response = $this->getDataFetcherPlugin()->getResponse($next_url);
if ($response->getStatusCode() !== 200) {
unset($next_urls[$key]);
}
}
catch (\Exception $e) {
unset($next_urls[$key]);
}
}
}
return $next_urls;
}
return array_merge(parent::getNextUrls($url), $next_urls);
}
}
