dcat-8.x-1.x-dev/dcat_import/src/Plugin/migrate/source/DcatFeedSource.php
dcat_import/src/Plugin/migrate/source/DcatFeedSource.php
<?php
namespace Drupal\dcat_import\Plugin\migrate\source;
use Drupal\Core\Datetime\DrupalDateTime;
use Drupal\migrate\Plugin\migrate\id_map\Sql;
use Drupal\migrate\Plugin\migrate\source\SourcePluginBase;
use Drupal\dcat_import\Plugin\DcatGraph;
use EasyRdf_Graph;
use EasyRdf_Resource;
/**
* DCAT feed source.
*/
abstract class DcatFeedSource extends SourcePluginBase {
/**
* Array of source data.
*
* @var array.
*/
private $sourceData;
/**
* Bool to indicate if the extractor has already ran.
*
* @var bool
*/
private $extractionDone = FALSE;
/**
* {@inheritdoc}
*/
protected $cacheCounts = TRUE;
/**
* {@inheritdoc}
*/
protected $trackChanges = TRUE;
/**
* Returns the DCAT type to extract from the feed.
*
* E.g. dcat:Dataset, dcat:Distribution ...
*
* @return string
* The DCAT type to extract.
*/
public abstract function getDcatType();
/**
* Extract data from the given EasyRdf Graph.
*
* @param EasyRdf_Graph $graph
* The EasyRdf Graph to extract the data from.
*
* @return array
* The extracted data.
*/
public function getDcatData(EasyRdf_Graph $graph) {
$data = $graph->allOfType($this->getDcatType());
return $graph->getNoneBlankResources($data);
}
/**
* Data getter.
*
* @return array
* Array of source data.
*/
public function getSourceData() {
if (!$this->extractionDone) {
$format = isset($this->configuration['format']) ? $this->configuration['format'] : 'turtle';
$pager_argument = isset($this->configuration['pager_argument']) ? $this->configuration['pager_argument'] : NULL;
$graph = DcatGraph::newAndLoad($this->configuration['uri'], $format, $pager_argument);
$data = $this->getDcatData($graph);
$deleted = $this->deletedResources($data, $graph);
$this->sourceData = array_merge($data, $deleted);
$this->extractionDone = TRUE;
}
return $this->sourceData;
}
/**
* Returns the deleted resources so they can be unpublished.
*
* @param array $data
* The current array of data resources.
* @param DcatGraph $graph
* The graph used to create the deleted resources.
*
* @return array
* An array of EasyRdf resources that are deleted in the current graph.
*/
private function deletedResources(array $data, DcatGraph $graph) {
$deleted = [];
/** @var Sql $map */
$map = $this->migration->getIdMap();
$imported = $map->getDatabase()->select($map->mapTableName(), 'map')
->fields('map', ['sourceid1'])
->execute()
->fetchAllKeyed(0, 0);
foreach ($data as $resource) {
if (!is_object($resource)) {
return [];
}
/** @var EasyRdf_Resource $uri */
$uri = $resource->getUri();
unset($imported[(string) $uri]);
}
foreach ($imported as $uri => $uuid) {
$resource = $graph->resource($uri);
$resource->add('deleted', 1);
$deleted[] = $resource;
}
return $deleted;
}
/**
* {@inheritdoc}
*/
public function initializeIterator() {
$data = [];
foreach ($this->getSourceData() as $resource) {
$data[] = $this->convertResource($resource);
}
return new \ArrayIterator($data);
}
/**
* Convert an EasyRdf resource to an array.
*
* @param \EasyRdf_Resource $resource
* The resource to covert.
*
* @return array
* Array of values to import.
*/
public function convertResource(EasyRdf_Resource $resource) {
return [
'uri' => $resource->getUri(),
'status' => !$this->getValue($resource, 'deleted'),
];
}
/**
* Allows class to decide how it will react when it is treated like a string.
*/
public function __toString() {
return '';
}
/**
* {@inheritdoc}
*/
public function count($refresh = FALSE) {
return count($this->getSourceData());
}
/**
* Unify the return values.
*
* @param mixed $value
* The value to unify.
*
* @return null|string|array
* Null if empty, string if single value, array if multi value.
*/
public function unifyReturnValue($value) {
if (empty($value)) {
return NULL;
}
if (!is_array($value)) {
return $value;
}
if (count($value) == 1) {
return array_shift($value);
}
return $value;
}
/**
* Return all values for a property from an EasyRdf resource.
*
* @param \EasyRdf_Resource $resource
* The EasyRdf resource to get the property from.
* @param string $property
* The name of the property to get.
*
* @return array
* The values as an array of strings.
*/
public function getValueArray(EasyRdf_Resource $resource, $property) {
$values = array();
foreach ($resource->all($property) as $value) {
if (!empty($value)) {
$values[] = $this->getSingleValue($value);
}
}
return $values;
}
/**
* Convert an EasyRdf Resource or Literal to a single value.
*
* @param mixed $value
* EasyRdf_Resource or EasyRdf_Literal object.
*
* @return string|null
* A single value representing the object or Null if it is a blank resource.
*/
public function getSingleValue($value) {
$class = get_class($value);
switch ($class) {
case 'EasyRdf_Resource':
if ($value->isBNode()) {
return NULL;
}
return $value->getUri();
case 'EasyRdf_Literal_DateTime':
return $value->getValue()->format('c');
default:
return $value->getValue();
}
}
/**
* Get the value for a property from an EasyRdf resource.
*
* @param \EasyRdf_Resource $resource
* The EasyRdf resource to get the property from.
* @param string $property
* The name of the property to get.
*
* @return null|string|array
* Null if empty, string if single value, array if multi value.
*/
public function getValue(EasyRdf_Resource $resource, $property) {
$values = $this->getValueArray($resource, $property);
return $this->unifyReturnValue($values);
}
/**
* Get a certain property from an EasyRdf resource as datetime storage string.
*
* @param \EasyRdf_Resource $resource
* The EasyRdf resource to get the property from.
* @param string $property
* The name of the property to get.
*
* @return null|string|array
* Null if empty, string if single value, array if multi value.
*/
public function getDateValue(EasyRdf_Resource $resource, $property) {
$values = $this->getValueArray($resource, $property);
$dates = array();
foreach ($values as $value) {
$date = $value instanceof \DateTime ? DrupalDateTime::createFromDateTime($value) : new DrupalDateTime($value);
$dates[] = $date->format(DATETIME_DATETIME_STORAGE_FORMAT);
}
return $this->unifyReturnValue($dates);
}
/**
* Get a certain property from an EasyRdf resource as an email string.
*
* Basically removes mailto: part.
*
* @param \EasyRdf_Resource $resource
* The EasyRdf resource to get the property from.
* @param string $property
* The name of the property to get.
*
* @return null|string|array
* Null if empty, string if single value, array if multi value.
*/
public function getEmailValue(EasyRdf_Resource $resource, $property) {
$values = $this->getValueArray($resource, $property);
$emails = [];
foreach ($values as $value) {
$emails[] = $this->stripMailto($value);
}
return $this->unifyReturnValue($emails);
}
/**
* Strip mailto: at the start of the given value.
*
* @param string $value
* The value to strip mailto: from e.g. mailto:me@example.com.
*
* @return string
* The value without the mailto: part e.g. me@example.com.
*/
public function stripMailto($value) {
return preg_replace("/^mailto:/", '', $value);
}
}
