ai_upgrade_assistant-0.2.0-alpha2/src/Service/DataAnonymizer.php
src/Service/DataAnonymizer.php
<?php
namespace Drupal\ai_upgrade_assistant\Service;
use Drupal\Core\Config\ConfigFactoryInterface;
use Drupal\Core\Logger\LoggerChannelFactoryInterface;
use Drupal\Core\State\StateInterface;
/**
* Service for anonymizing upgrade patterns and sensitive data.
*/
class DataAnonymizer {
/**
* The config factory.
*
* @var \Drupal\Core\Config\ConfigFactoryInterface
*/
protected $configFactory;
/**
* The logger factory.
*
* @var \Drupal\Core\Logger\LoggerChannelFactoryInterface
*/
protected $loggerFactory;
/**
* The state service.
*
* @var \Drupal\Core\State\StateInterface
*/
protected $state;
/**
* Constructs a new DataAnonymizer.
*
* @param \Drupal\Core\Config\ConfigFactoryInterface $config_factory
* The config factory.
* @param \Drupal\Core\Logger\LoggerChannelFactoryInterface $logger_factory
* The logger factory.
* @param \Drupal\Core\State\StateInterface $state
* The state service.
*/
public function __construct(
ConfigFactoryInterface $config_factory,
LoggerChannelFactoryInterface $logger_factory,
StateInterface $state
) {
$this->configFactory = $config_factory;
$this->loggerFactory = $logger_factory->get('ai_upgrade_assistant');
$this->state = $state;
}
/**
* Anonymizes an upgrade pattern.
*
* @param array $pattern
* The upgrade pattern to anonymize.
*
* @return array
* The anonymized pattern.
*/
public function anonymizePattern(array $pattern): array {
$anonymized = [];
// Keep only essential version information.
$anonymized['module_name'] = $pattern['module_name'];
$anonymized['from_version'] = $pattern['from_version'];
$anonymized['to_version'] = $pattern['to_version'];
// Anonymize code patterns.
if (!empty($pattern['code_changes'])) {
$anonymized['code_changes'] = $this->anonymizeCodeChanges($pattern['code_changes']);
}
// Keep success/failure status.
$anonymized['success'] = $pattern['success'] ?? FALSE;
// Add metadata about anonymization.
$anonymized['anonymized_at'] = time();
$anonymized['anonymization_version'] = '1.0';
return $anonymized;
}
/**
* Anonymizes code changes by removing sensitive information.
*
* @param array $changes
* The code changes to anonymize.
*
* @return array
* The anonymized code changes.
*/
protected function anonymizeCodeChanges(array $changes): array {
$anonymized = [];
foreach ($changes as $change) {
// Remove file paths.
$change['file'] = basename($change['file']);
// Remove custom variable names.
if (!empty($change['variables'])) {
$change['variables'] = $this->anonymizeVariables($change['variables']);
}
// Remove sensitive patterns.
$change['code'] = $this->removeSensitivePatterns($change['code']);
$anonymized[] = $change;
}
return $anonymized;
}
/**
* Anonymizes variable names while preserving pattern structure.
*
* @param array $variables
* The variables to anonymize.
*
* @return array
* The anonymized variables.
*/
protected function anonymizeVariables(array $variables): array {
$mapping = [];
$counter = 1;
foreach ($variables as $name => $type) {
// Keep Drupal-specific prefixes for context.
if (strpos($name, 'drupal_') === 0) {
$mapping[$name] = $name;
}
else {
$mapping[$name] = "var_{$counter}";
$counter++;
}
}
return $mapping;
}
/**
* Removes sensitive patterns from code while preserving upgrade context.
*
* @param string $code
* The code to process.
*
* @return string
* The processed code with sensitive patterns removed.
*/
protected function removeSensitivePatterns(string $code): string {
// Remove comments that might contain sensitive information.
$code = preg_replace('/\/\*.*?\*\//s', '', $code);
$code = preg_replace('/\/\/.*$/m', '', $code);
// Remove potentially sensitive strings.
$code = preg_replace('/\'(?:[^\'\\\\]|\\\\.)*\'/', "'***'", $code);
$code = preg_replace('/\"(?:[^\"\\\\]|\\\\.)*\"/', '"***"', $code);
// Remove file paths.
$code = preg_replace('/(?:\/[a-zA-Z0-9_-]+)+\/[a-zA-Z0-9_-]+\.[a-zA-Z0-9]+/', 'FILE_PATH', $code);
return $code;
}
/**
* Validates that a pattern has been properly anonymized.
*
* @param array $pattern
* The pattern to validate.
*
* @return bool
* TRUE if the pattern is properly anonymized, FALSE otherwise.
*/
public function validateAnonymization(array $pattern): bool {
// Check for common sensitive patterns.
$sensitivePatterns = [
'/(?:\/[a-zA-Z0-9_-]+){2,}/', // File paths
'/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/', // Email addresses
'/(?:password|secret|key|token)\s*[=:]\s*[\'"][^\'"]+[\'"]/', // Credentials
'/(?:https?:\/\/[^\s<>"]+|www\.[^\s<>"]+)/', // URLs
];
$json = json_encode($pattern);
foreach ($sensitivePatterns as $pattern) {
if (preg_match($pattern, $json)) {
$this->loggerFactory->warning('Sensitive pattern found in anonymized data');
return FALSE;
}
}
return TRUE;
}
/**
* Previews what data would be shared from a pattern.
*
* @param array $pattern
* The original pattern.
*
* @return array
* An array containing both original and anonymized versions for comparison.
*/
public function previewAnonymization(array $pattern): array {
$anonymized = $this->anonymizePattern($pattern);
return [
'original' => $pattern,
'anonymized' => $anonymized,
'differences' => $this->highlightDifferences($pattern, $anonymized),
];
}
/**
* Highlights differences between original and anonymized data.
*
* @param array $original
* The original data.
* @param array $anonymized
* The anonymized data.
*
* @return array
* An array of differences with context.
*/
protected function highlightDifferences(array $original, array $anonymized): array {
$differences = [];
foreach ($original as $key => $value) {
if (!isset($anonymized[$key])) {
$differences[$key] = [
'type' => 'removed',
'original' => $value,
];
}
elseif (is_array($value) && is_array($anonymized[$key])) {
$subdiffs = $this->highlightDifferences($value, $anonymized[$key]);
if (!empty($subdiffs)) {
$differences[$key] = $subdiffs;
}
}
elseif ($value !== $anonymized[$key]) {
$differences[$key] = [
'type' => 'modified',
'original' => $value,
'anonymized' => $anonymized[$key],
];
}
}
return $differences;
}
}
