<?php
// scraper.php
header('Content-Type: application/json');

$allowedCategories = ['images', 'pdfs', 'archives', 'videos', 'projects', 'misc'];

function loadJson($file) {
    if (file_exists($file)) {
        return json_decode(file_get_contents($file), true) ?: ['category' => '', 'totalItems' => 0, 'items' => []];
    }
    return ['category' => '', 'totalItems' => 0, 'items' => []];
}

function saveJson($file, $data) {
    file_put_contents($file, json_encode($data, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES));
}

function updateDbJson($category) {
    $dbFile = 'data/db.json';
    $db = loadJson($dbFile);
    if (!isset($db['categories'][$category])) {
        $db['categories'][$category] = [
            'name' => ucfirst($category),
            'file' => $category . '.json',
            'type' => $category
        ];
        saveJson($dbFile, $db);
    }
}

function getFileMetadata($filepath) {
    $info = [
        'size' => round(filesize($filepath) / (1024 * 1024), 1) . ' MB',
        'format' => strtoupper(pathinfo($filepath, PATHINFO_EXTENSION)),
        'dimensions' => '',
        'description' => 'Scanned file: ' . basename($filepath)
    ];

    $ext = strtolower(pathinfo($filepath, PATHINFO_EXTENSION));
    if (in_array($ext, ['jpg','jpeg','png','gif','webp']) && function_exists('exif_read_data')) {
        $exif = @exif_read_data($filepath);
        if ($exif && isset($exif['COMPUTED'])) {
            $info['dimensions'] = ($exif['COMPUTED']['Width'] ?? '') . 'x' . ($exif['COMPUTED']['Height'] ?? '');
        }
    }
    return $info;
}

// Extract title and meta description from HTML/PHP file
function extractHtmlMetadata($filepath) {
    $content = file_get_contents($filepath);
    $meta = ['title' => '', 'description' => ''];

    // Extract <title>
    if (preg_match('/<title>(.*?)<\/title>/is', $content, $matches)) {
        $meta['title'] = trim($matches[1]);
    }

    // Extract meta description
    if (preg_match('/<meta\s+name=["\']description["\']\s+content=["\'](.*?)["\']/is', $content, $matches)) {
        $meta['description'] = trim($matches[1]);
    }

    return $meta;
}

// Recursive file scanner
function getAllFiles($dir, $recursive = false) {
    $files = [];
    $items = scandir($dir);
    foreach ($items as $item) {
        if ($item === '.' || $item === '..') continue;
        $path = $dir . '/' . $item;
        if (is_file($path)) {
            $files[] = $path;
        } elseif ($recursive && is_dir($path)) {
            $files = array_merge($files, getAllFiles($path, true));
        }
    }
    return $files;
}

$response = ['success' => false, 'messages' => []];

if ($_SERVER['REQUEST_METHOD'] === 'POST') {
    $category   = $_POST['category'] ?? '';
    $folder     = $_POST['folder'] ?? '';
    $recursive  = isset($_POST['recursive']) && $_POST['recursive'] === 'on';
    $dryRun     = isset($_POST['dryrun']) && $_POST['dryrun'] === 'on';

    if (!in_array($category, $allowedCategories)) {
        $response['message'] = "Invalid category.";
    } elseif (empty($folder) || !is_dir($folder)) {
        $response['message'] = "Folder does not exist: " . htmlspecialchars($folder);
    } else {
        $jsonFile = "data/{$category}.json";
        $data = loadJson($jsonFile);
        $data['category'] = $category;

        $added = 0;
        $updated = 0;

        if ($dryRun) $response['messages'][] = "🔍 DRY RUN MODE - No changes will be saved";

        if ($category === 'projects') {
            // Special handling for projects: scan subfolders only
            $subfolders = scandir($folder);
            foreach ($subfolders as $sub) {
                if ($sub === '.' || $sub === '..') continue;
                $subPath = $folder . '/' . $sub;
                if (!is_dir($subPath)) continue;

                $indexFile = null;
                foreach (['index.html', 'index.htm', 'index.php'] as $idx) {
                    if (file_exists($subPath . '/' . $idx)) {
                        $indexFile = $subPath . '/' . $idx;
                        break;
                    }
                }

                $htmlMeta = $indexFile ? extractHtmlMetadata($indexFile) : ['title' => '', 'description' => ''];
                $title = !empty($htmlMeta['title']) ? $htmlMeta['title'] : ucwords(str_replace(['-', '_'], ' ', $sub));
                $description = !empty($htmlMeta['description']) ? $htmlMeta['description'] : "Project: " . $title;

                // Check if project already exists
                $exists = false;
                foreach ($data['items'] as &$item) {
                    if (basename(rtrim($item['folder'] ?? '', '/')) === $sub) {
                        $exists = true;
                        if (empty($item['description']) && !empty($description)) {
                            $item['description'] = $description;
                            $updated++;
                        }
                        break;
                    }
                }

                if (!$exists) {
                    $newItem = [
                        "id" => "proj-" . str_pad(count($data['items']) + 1, 3, '0', STR_PAD_LEFT),
                        "title" => $title,
                        "slug" => strtolower(str_replace([' ', '_'], '-', $sub)),
                        "folder" => $subPath . "/",
                        "mainImage" => $subPath . "/screenshot.jpg",
                        "thumb" => $subPath . "/thumb.jpg",
                        "description" => $description,
                        "homepage" => "",
                        "technologies" => [],
                        "status" => "Completed",
                        "dateAdded" => date('Y-m-d'),
                        "tags" => []
                    ];

                    if (!$dryRun) {
                        $data['items'][] = $newItem;
                    }
                    $added++;
                }
            }
        } else {
            // Normal file scraping for other categories
            $files = getAllFiles($folder, $recursive);
            foreach ($files as $filepath) {
                // ... (existing normal logic remains unchanged)
                // [Same code as previous version for non-projects]
            }
        }

        if (!$dryRun) {
            $data['totalItems'] = count($data['items']);
            saveJson($jsonFile, $data);
            updateDbJson($category);
        }

        $response['success'] = true;
        $response['messages'][] = "Scanned folder: <strong>$folder</strong>" . ($recursive ? " (recursive)" : "");
        if ($category === 'projects') {
            $response['messages'][] = "Processed project folders only (index.html detection)";
        }
        $response['messages'][] = "Added: <strong>$added</strong> new entries";
        $response['messages'][] = "Updated: <strong>$updated</strong> existing entries";
    }
}

echo json_encode($response);
?>