tmgmt_smartling-8.x-4.11/src/Context/HtmlAssetInliner.php
src/Context/HtmlAssetInliner.php
<?php
namespace Drupal\tmgmt_smartling\Context;
//set_time_limit(300);
use Drupal;
use GuzzleHttp\ClientInterface;
use GuzzleHttp\Cookie\CookieJar;
use GuzzleHttp\Cookie\FileCookieJar;
use GuzzleHttp\Exception\RequestException;
use GuzzleHttp\Pool;
use GuzzleHttp\Psr7\Request;
use GuzzleHttp\Psr7\Response;
class HtmlAssetInliner {
private CookieJar $cookieJar;
protected static $uriMissingError = array(
"response" => array(
"code" => "VALIDATION_ERROR",
"data" => array("baseUrl" => NULL, "body" => NULL, "headers" => NULL),
"messages" => array("fileUri parameter is missing."),
),
);
public function __construct(private ClientInterface $client) {
$this->cookieJar = new FileCookieJar(tempnam(sys_get_temp_dir(), 'smartling_cookies_'));
}
/**
* Gets complete page data and returns generated string
*
* @param string $url - url to retrieve
* @param bool $keepjs - whether to keep javascript
* @param bool $compress - whether to remove extra whitespaces
* @param array $settings
* @param bool $debug
*
* @return string|void
* @throws \Exception - throws an exception if provided url isn't in proper format
*/
public function getCompletePage($url, $compress = FALSE, array $settings = [], $debug = FALSE) {
# validate the URL
if (!filter_var($url, FILTER_VALIDATE_URL)) {
throw new \Exception('Invalid URL. Make sure to specify http(s) part.');
}
if (empty($url)) {
if ($debug) {
Drupal::logger('tmgmt_smartling_context_debug')->info('Url is missing.');
}
return self::$uriMissingError;
}
$html = $this->getUrlContents($url,
0,
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.215 Safari/534.10',
$settings,
$debug
);
if (strlen($html) <= 300) {
if ($debug) {
Drupal::logger('tmgmt_smartling_context_debug')->info('Response is too small.');
}
return '';
}
return ($compress) ? $this->compress($html) : $html;
}
/**
* Checks whether or not remote file exists
*
* @param $url
*
* @param $proj_settings
* @param float $connection_timeout
* @param float $timeout
*
* @return bool
*/
public function remoteFileExists($url, $proj_settings, $connection_timeout = 0.5, $timeout = 5) {
try {
$options = [
'timeout' => $timeout,
'connect_timeout' => $connection_timeout,
'cookies' => $this->cookieJar,
];
$this->applySettingsToOptions($proj_settings, $options);
$response = $this->client->head($url, $options);
return $response->getStatusCode() < 400;
}
catch (RequestException $e) {
return FALSE;
}
}
/**
* Compresses generated page by removing extra whitespace
*/
private function compress($string) {
# remove whitespace
return str_replace(array(
"\r\n",
"\r",
"\n",
"\t",
' ',
' ',
' '
), ' ', $string);
}
/**
* Gets content for given url using Guzzle and optionally using user agent
*
* @param $url
* @param float $timeout
* @param string $user_agent
* @param array $settings
* @param bool $debug
*
* @return int|mixed
*/
public function getUrlContents(
$url,
$timeout = 0,
$user_agent = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.215 Safari/534.10',
array $settings = [],
$debug = FALSE
) {
$options = [
'headers' => ['User-Agent' => $user_agent],
'timeout' => $timeout,
'connect_timeout' => 5,
'cookies' => $this->cookieJar,
'allow_redirects' => TRUE,
];
$this->applySettingsToOptions($settings, $options);
try {
$response = $this->client->get($url, $options);
$body = (string) $response->getBody();
if ($debug) {
Drupal::logger('tmgmt_smartling_context_debug')->info('Guzzle request options: @options', [
'@options' => print_r($options, TRUE),
]);
Drupal::logger('tmgmt_smartling_context_debug')->info('Curl response headers: @response_headers', [
'@response_headers' => print_r($response->getHeaders(), TRUE),
]);
Drupal::logger('tmgmt_smartling_context_debug')->info('Curl response body: @response_body', [
'@response_body' => substr($body, 0, 500) . '*****',
]);
}
return $body;
}
catch (RequestException $e) {
if ($debug) {
Drupal::logger('tmgmt_smartling_context_debug')->error('Guzzle error: @message', [
'@message' => $e->getMessage(),
]);
}
return -1;
}
}
public function getUrlContentsPooled(
$urls,
$timeout = 0,
$user_agent = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.215 Safari/534.10',
array $settings = [],
$debug = FALSE
) {
$options = [
'headers' => ['User-Agent' => $user_agent],
'timeout' => $timeout,
'connect_timeout' => 5,
'cookies' => $this->cookieJar,
'allow_redirects' => TRUE,
];
$this->applySettingsToOptions($settings, $options);
$requests = function ($urls_to_fetch) use ($options) {
foreach ($urls_to_fetch as $resourceId => $url) {
yield $resourceId => new Request('GET', $url, $options['headers'] ?? []);
}
};
$results = [];
$pool = new Pool($this->client, $requests($urls), [
'concurrency' => 5,
'fulfilled' => function (Response $response, $resourceId) use (&$results) {
$results[$resourceId] = (string) $response->getBody();
},
'rejected' => function (RequestException $e, $resourceId) use (&$results, $debug) {
if ($debug) {
Drupal::logger('tmgmt_smartling_context_debug')->error('Guzzle error: @message', [
'@message' => $e->getMessage(),
]);
}
$results[$resourceId] = -1;
},
]);
$promise = $pool->promise();
$promise->wait();
return $results;
}
/**
* @param $proj_settings
* @param $options
*/
private function applySettingsToOptions(array $proj_settings, array &$options): void {
if (!empty($proj_settings['enable_basic_auth'])) {
$options['auth'] = [
$proj_settings['basic_auth']['login'] ?? '',
$proj_settings['basic_auth']['password'] ?? '',
];
}
if (!empty($proj_settings['context_skip_host_verifying'])) {
$options['verify'] = FALSE;
}
}
}
