Files
rspade_system/app/RSpade/CodeQuality/Support/FileSanitizer.php
root f6fac6c4bc Fix bin/publish: copy docs.dist from project root
Fix bin/publish: use correct .env path for rspade_system
Fix bin/publish script: prevent grep exit code 1 from terminating script

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-21 02:08:33 +00:00

160 lines
5.8 KiB
PHP
Executable File

<?php
namespace App\RSpade\CodeQuality\Support;
class FileSanitizer
{
/**
* Get PHP content with comments removed
* This ensures we don't match patterns inside comments
* Uses PHP tokenizer to properly strip comments (from line 711 of monolith)
*/
public static function sanitize_php(string $content): array
{
// Use PHP tokenizer to properly strip comments
$tokens = token_get_all($content);
$lines = [];
$current_line = '';
foreach ($tokens as $token) {
if (is_array($token)) {
$token_type = $token[0];
$token_content = $token[1];
// Skip comment tokens
if ($token_type === T_COMMENT || $token_type === T_DOC_COMMENT) {
// Add empty lines to preserve line numbers
$comment_lines = explode("\n", $token_content);
foreach ($comment_lines as $idx => $comment_line) {
if ($idx === 0 && $current_line !== '') {
// First line of comment - complete current line
$lines[] = $current_line;
$current_line = '';
} elseif ($idx > 0) {
// Additional comment lines
$lines[] = '';
}
}
} else {
// Add non-comment content
$content_parts = explode("\n", $token_content);
foreach ($content_parts as $idx => $part) {
if ($idx > 0) {
$lines[] = $current_line;
$current_line = $part;
} else {
$current_line .= $part;
}
}
}
} else {
// Single character tokens
$current_line .= $token;
}
}
// Add the last line if any
if ($current_line !== '' || count($lines) === 0) {
$lines[] = $current_line;
}
$sanitized_content = implode("\n", $lines);
return [
'content' => $sanitized_content,
'lines' => $lines,
'original_lines' => explode("\n", $content),
];
}
/**
* Get sanitized JavaScript content for checking
* Removes comments and string contents to avoid false positives
* Uses external Node.js script (from line 769 of monolith)
*/
public static function sanitize_javascript(string $file_path): array
{
// Create cache directory if it doesn't exist
if (function_exists('storage_path')) {
$cache_dir = storage_path('rsx-tmp/cache/js-sanitized');
} else {
$cache_dir = '/var/www/html/storage/rsx-tmp/cache/js-sanitized';
}
if (!is_dir($cache_dir)) {
mkdir($cache_dir, 0755, true);
}
// Generate cache path based on relative file path
$base_path = function_exists('base_path') ? base_path() : '/var/www/html';
$relative_path = str_replace($base_path . '/', '', $file_path);
$cache_path = $cache_dir . '/' . str_replace('/', '_', $relative_path) . '.sanitized';
// Check if cache is valid
if (file_exists($cache_path)) {
$source_mtime = filemtime($file_path);
$cache_mtime = filemtime($cache_path);
if ($cache_mtime >= $source_mtime) {
// Cache is valid, return cached content
$sanitized_content = file_get_contents($cache_path);
return [
'content' => $sanitized_content,
'lines' => explode("\n", $sanitized_content),
'original_lines' => explode("\n", file_get_contents($file_path)),
];
}
}
// Run sanitizer to generate fresh cache
$sanitizer_path = $base_path . '/bin/js-sanitizer.js';
$command = sprintf('node %s %s 2>/dev/null',
escapeshellarg($sanitizer_path),
escapeshellarg($file_path)
);
$sanitized = shell_exec($command);
// If sanitization failed, fall back to original content
if ($sanitized === null || $sanitized === '') {
$sanitized = file_get_contents($file_path);
}
// Save to cache
file_put_contents($cache_path, $sanitized);
return [
'content' => $sanitized,
'lines' => explode("\n", $sanitized),
'original_lines' => explode("\n", file_get_contents($file_path)),
];
}
/**
* Sanitize file based on extension
* Note: PHP takes content, JavaScript takes file_path (matching monolith behavior)
*/
public static function sanitize(string $file_path, ?string $content = null): array
{
$extension = pathinfo($file_path, PATHINFO_EXTENSION);
if ($extension === 'php') {
if ($content === null) {
$content = file_get_contents($file_path);
}
return self::sanitize_php($content);
} elseif (in_array($extension, ['js', 'jsx', 'ts', 'tsx'])) {
// JavaScript sanitization needs file path, not content
return self::sanitize_javascript($file_path);
} else {
// For other files, return as-is
if ($content === null) {
$content = file_get_contents($file_path);
}
return [
'content' => $content,
'lines' => explode("\n", $content),
'original_lines' => explode("\n", $content),
];
}
}
}