podcast_publisher-1.0.0-alpha3/modules/podcast_publisher_analytics/src/Plugin/QueueWorker/DownloadIntentTracker.php
modules/podcast_publisher_analytics/src/Plugin/QueueWorker/DownloadIntentTracker.php
<?php
namespace Drupal\podcast_publisher_analytics\Plugin\QueueWorker;
use Drupal\Core\Plugin\ContainerFactoryPluginInterface;
use Drupal\Core\Queue\QueueWorkerBase;
use GeoIp2\Database\Reader;
use GeoIp2\Model\AbstractModel;
use Symfony\Component\DependencyInjection\ContainerInterface;
/**
* Custom Queue Worker.
*
* @QueueWorker(
* id = "podcast_download_intent_tracker",
* title = @Translation("Custom Queue"),
* cron = {"time" = 60}
* )
*/
class DownloadIntentTracker extends QueueWorkerBase implements ContainerFactoryPluginInterface {
const GEOLITE_DB_PATH_TO_FILE = 'libraries/geolite2/GeoLite2-City.mmdb';
/**
* Podcast Episode storage.
*
* @var \Drupal\Core\Entity\EntityStorageInterface
*/
protected $episodeStorage;
/**
* File storage.
*
* @var \Drupal\Core\Entity\EntityStorageInterface
*/
protected $fileStorage;
/**
* The module handler.
*
* @var \Drupal\Core\Extension\ModuleHandlerInterface
*/
protected $moduleHandler;
/**
* Parsed user-agents.json file.
*
* @var mixed[]
*/
protected $userAgents;
/**
* The Geo Reader.
*
* @var \GeoIp2\Database\Reader
*/
protected $geoReader;
/**
* The current active database's master connection.
*
* @var \Drupal\Core\Database\Connection
*/
protected $database;
/**
* The logger.
*
* @var \Psr\Log\LoggerInterface
*/
protected $logger;
/**
* The cache tag invalidator.
*
* @var \Drupal\Core\Cache\CacheTagsInvalidatorInterface
*/
protected $cacheTagInvalidator;
/**
* {@inheritdoc}
*/
public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) {
$instance = new static(
$configuration,
$plugin_id,
$plugin_definition,
);
$entity_type_manager = $container->get('entity_type.manager');
$instance->moduleHandler = $container->get('module_handler');
$instance->logger = $container->get('logger.channel.podcast_publisher_analytics');
$instance->episodeStorage = $entity_type_manager->getStorage('podcast_episode');
$instance->fileStorage = $entity_type_manager->getStorage('file');
$instance->database = $container->get('database');
$instance->cacheTagInvalidator = $container->get('cache_tags.invalidator');
return $instance;
}
/**
* {@inheritdoc}
*/
public function processItem($data) {
$requester_id = sha1($data['ip'] . $data['user_agent']);
// Check if we had an original request intent with same requester id on
// the same episode within the last 60 minutes.
$preceding_requests = $this->database->select('podcast_download_intent')
->fields(NULL, ['id'])
->condition('requester_id', $requester_id)
->condition('file', $data['file_id'])
->condition('episode', $data['episode_id'])
// According to IAB guidelines an IP/useragent combo should only be
// counted once within 24h.
->condition('timestamp', [$data['timestamp'] - 86400, $data['timestamp']], 'BETWEEN')
->condition('original_intent', NULL, 'IS NULL')
->orderBy('timestamp')
->range(0, 1)
->execute()
->fetch();
$new_download_intent = [
'user_agent_string' => $data['user_agent'],
'source' => $data['source'],
'requester_id' => $requester_id,
'timestamp' => $data['timestamp'],
'processed' => 0,
'original_intent' => $preceding_requests ? $preceding_requests->id : NULL,
];
if ($episode = $this->episodeStorage->load($data['episode_id'])) {
$new_download_intent['episode'] = $episode->id();
}
if ($file = $this->fileStorage->load($data['file_id'])) {
$new_download_intent['file'] = $file->id();
}
if (isset($data['httprange'])) {
$new_download_intent['httprange'] = $data['httprange'];
}
if ($user_agent_id = $this->findOrCreateUserAgentFromUaString($data['user_agent'])) {
$new_download_intent['user_agent'] = $user_agent_id;
}
try {
$this->processGeoData($new_download_intent, $data['ip']);
}
catch (\Exception $e) {
$this->logger->error($e->getMessage());
}
$this->database->insert('podcast_download_intent')
->fields($new_download_intent)
->execute();
$this->cacheTagInvalidator->invalidateTags(['podcast_download_intent']);
}
/**
* Returns the parsed user agent data.
*
* @return mixed[]
* The parsed user agents data.
*/
protected function getUserAgents() {
if (!$this->userAgents) {
$user_agents_file_path = $this->moduleHandler
->getModule('podcast_publisher_analytics')
->getPath() . '/data/user-agents.json';
if (is_file($user_agents_file_path)) {
$this->userAgents = json_decode(file_get_contents($user_agents_file_path));
}
}
return $this->userAgents;
}
/**
* Returns the geolite2 db reader.
*
* @return \GeoIp2\Database\Reader
* The reader.
*/
protected function getGeoReader() {
if (!$this->geoReader) {
$this->geoReader = new Reader(\Drupal::root() . '/' . static::GEOLITE_DB_PATH_TO_FILE);
}
return $this->geoReader;
}
/**
* Gets or creates the user agent entity that matches the user agent string.
*
* @param string $ua_string
* The user agent string.
*
* @return int|null
* The user agent' id or NULL if there is no match.
*/
protected function findOrCreateUserAgentFromUaString($ua_string): ?int {
$ua_string = trim($ua_string);
// Check if a user agent entity is already there or create one.
$user_agent = $this->database->select('podcast_user_agent')
->fields(NULL, ['id'])
->condition('user_agent', $ua_string)
->execute()
->fetch();
// If it exists we are done.
if ($user_agent) {
return $user_agent->id;
}
// Create user agent entity from opawg data.
$new_user_agent = [
'user_agent' => $ua_string,
];
// Find user agent that matches the request's ua string.
$user_agent_match = NULL;
foreach ($this->getUserAgents() as $user_agent_data) {
foreach ($user_agent_data->user_agents as $regex) {
$compiled_regex = str_replace('/', '\/', $regex);
if (preg_match("/{$compiled_regex}/", $ua_string) === 1) {
$user_agent_match = $user_agent_data;
break(2);
}
}
}
if ($user_agent_match) {
if (isset($user_agent_match->app)) {
$new_user_agent['client_name'] = $user_agent_match->app;
}
if (isset($user_agent_match->bot)) {
$new_user_agent['bot'] = 1;
}
if (isset($user_agent_match->os)) {
$new_user_agent['os_name'] = $this->normalizeOs($user_agent_match->os);
}
return $this->database->insert('podcast_user_agent')
->fields($new_user_agent)
->execute();
}
return NULL;
}
/**
* Sets GeoData entity by given IP address.
*
* @param mixed[] $download_intent_array
* Associative array with column names as keys.
* @param string $ip_address
* The ip address.
*/
protected function processGeoData(array &$download_intent_array, string $ip_address): void {
$geo_data = $this->getGeoReader()->city($ip_address);
$download_intent_array['lat'] = $geo_data->location->latitude;
$download_intent_array['lng'] = $geo_data->location->longitude;
if ($geo_area_id = $this->getArea($geo_data, 'city')) {
$download_intent_array['geo_area'] = $geo_area_id;
}
}
/**
* Gets the parent area.
*
* @param \GeoIp2\Model\AbstractModel $record
* The record.
* @param string $type
* The type.
*
* @return \Drupal\Core\Entity\EntityInterface|false|mixed|null
* The parent GeoArea entity.
*/
protected function getParentArea(AbstractModel $record, $type) {
switch ($type) {
case 'city':
return $this->getArea($record, 'subdivision');
case 'subdivision':
return $this->getArea($record, 'country');
case 'country':
return $this->getArea($record, 'continent');
case 'continent':
// Has no parent.
break;
}
return NULL;
}
/**
* Returns or creates GeoArea entity.
*
* @param \GeoIp2\Model\AbstractModel $record
* The record.
* @param string $type
* The type.
*
* @return int|null
* The GeoArea id.
*/
protected function getArea(AbstractModel $record, $type): ?int {
$subRecord = $record->{$type == 'subdivision' ? 'mostSpecificSubdivision' : $type};
if (!$subRecord->geonameId) {
return $this->getParentArea($record, $type);
}
// Check if area already exists.
$area_row = $this->database->select('podcast_geo_area')
->fields(NULL, ['id'])
->condition('name', $subRecord->names['en'])
->execute()
->fetch();
if ($area_row) {
return $area_row->id;
}
$new_area = [
'name' => $subRecord->names['en'],
'area_type' => $type,
];
if (isset($subRecord->code)) {
$new_area['code'] = $subRecord->code;
}
elseif (isset($subRecord->isoCode)) {
$new_area['code'] = $subRecord->isoCode;
}
if ($new_area['area_type'] != 'continent') {
$parent_area_id = $this->getParentArea($record, $type);
$new_area['parent'] = $parent_area_id;
}
return $this->database->insert('podcast_geo_area')
->fields($new_area)
->execute();
}
/**
* Normalize OS names.
*
* @param string $os_name
* The OS's name.
*
* @return string
* The normalized OS name.
*/
protected function normalizeOs($os_name) {
$map = [
'alexa' => 'Alexa',
'ios' => 'iOS',
'android' => 'Android',
'mac' => 'macOS',
'macos' => 'macOS',
'watchos' => 'watchOS',
'windows' => 'Windows',
'linux' => 'Linux',
'sonos' => 'Sonos',
'homepod_os' => 'HomepodOS',
'tvos' => 'tvOS',
];
return $map[trim(strtolower($os_name))] ?? $os_name;
}
}
