dcat-8.x-1.x-dev/dcat_import/src/Plugin/DcatGraph.php
dcat_import/src/Plugin/DcatGraph.php
<?php
namespace Drupal\dcat_import\Plugin;
use EasyRdf_Graph;
use EasyRdf_Resource;
use EasyRdf_Http_Exception;
/**
* Class DcatGraph.
*
* @package Drupal\dcat_import\Plugin
*/
class DcatGraph extends EasyRdf_Graph {
/**
* Pager argument.
*
* @var string.
*/
public $pagerArgument = 'page';
/**
* {@inheritdoc}
*/
public function __construct($uri = NULL, $data = NULL, $format = NULL) {
parent::__construct($uri, $data, $format);
\EasyRdf_Namespace::set('adms', 'http://www.w3.org/ns/adms#');
}
/**
* {@inheritdoc}
*/
public static function newAndLoad($uri, $format = NULL, $pager_argument = NULL) {
$graph = new self($uri);
if (!empty($pager_argument)) {
$graph->pagerArgument = $pager_argument;
}
$graph->load($uri, $format);
return $graph;
}
/**
* Returns the pager argument to use in the uri.
*
* @return string
* The pager argument
*/
public function getPagerArgument() {
return $this->pagerArgument;
}
/**
* Compare two result sets.
*
* @param array $previous
* An array of EasyRdf_Resource objects.
* @param array $current
* An array of EasyRdf_Resource objects to compare to.
*
* @return bool
* True if results are the same.
*/
public function compareResults(array $previous, array $current) {
// To limit complexity and execution time, only compare the last result.
/** @var EasyRdf_Resource $previous_last */
$previous_last = end($previous);
/** @var EasyRdf_Resource $current_last */
$current_last = end($current);
return $previous_last->getUri() == $current_last->getUri();
}
/**
* Returns all none blank resources.
*
* @param array $resources
* List of resources to look into, defaults to $this->resources().
*
* @return array
* Array of none blank resources;
*/
public function getNoneBlankResources($resources = array()) {
$resources = empty($resources) ? $this->resources() : $resources;
/** @var EasyRdf_Resource $resource */
foreach ($resources as $key => $resource) {
if ($resource->isBNode() || empty($resource->type())) {
unset($resources[$key]);
}
}
return $resources;
}
/**
* Returns paged url.
*
* @param string $base
* The base url.
* @param string $argument
* The pager argument.
* @param int $count
* The page number.
*
* @return string
* The paged url.
*/
public function pagedUrlBuilder($base, $argument, $count) {
// If no $base is given, return NULL.
if (empty($base)) {
return NULL;
}
// Some servers return a 404 if we try to use arguments, so ignoring this
// for page 1.
if ($count > 1) {
$separator = strpos($base, '?') ? '&' : '?';
return $base . $separator . $argument . '=' . $count;
}
else {
return $base;
}
}
/**
* Load a single RDF page into the graph from a URI.
*
* If no URI is given, then the URI of the graph will be used.
*
* The document type is optional but should be specified if it
* can't be guessed or got from the HTTP headers.
*
* @param string $uri
* The URI of the data to load.
* @param string $format
* Optional format of the data (eg. rdfxml).
*
* @throws \Exception
* When there is no data.
*
* @return int
* The number of triples added to the graph.
*/
public function loadSingle($uri = NULL, $format = NULL) {
return parent::load($uri, $format);
}
/**
* Load RDF data into the graph from a URI.
*
* Overridden to support 'Data Catalog Interoperability Protocol', as this
* describes that the data can be spun out over different pages with a pager
* argument.
*
* @param string $uri
* The URI of the data to load.
* @param string $format
* Optional format of the data (eg. rdfxml).
*
* @throws \Exception
* When there is no data.
*
* @return int
* The number of triples added to the graph.
*/
public function load($uri = NULL, $format = NULL) {
$page = 1;
while (TRUE) {
$current_uri = $this->pagedUrlBuilder($uri, $this->getPagerArgument(), $page);
// Because of the 'Data Catalog Interoperability Protocol' we need to
// test on a couple of different scenarios.
// @see http://spec.dataportals.org/#extra-parameters
try {
$this->loadSingle($current_uri, $format);
$current_data = $this->getNoneBlankResources();
if (empty($current_data)) {
// No data (left).
break;
}
if (isset($previous_data) && $this->compareResults($previous_data, $current_data)) {
// Result is the same as the previous. This happens if source does not
// support the extra parameters.
break;
}
}
catch (EasyRdf_Http_Exception $e) {
if ($e->getCode() == 404 && !empty($current_data)) {
// When we receive a 404 after we have already received data, it is
// most likely that we just encountered the end of the DCAT feed.
break;
}
throw $e;
}
$page++;
$previous_data = $current_data;
}
return count($this->getNoneBlankResources());
}
}
