#!/bin/bash
#
# Build complete mapping table for all mislabeled documents
#
set -e

API_BASE="${API_BASE:-http://localhost:8001}"

echo "=========================================="
echo "Building Complete File Mapping Table"
echo "=========================================="
echo ""

# Restart server to get new debug endpoint
echo "1️⃣ Restarting server with debug endpoint..."
cd /home/plagis/workspace/plagis_aumentum
pkill -9 -f "python.*aumentum_api" 2>/dev/null || true
sleep 2

# Start in background
source venv/bin/activate 2>/dev/null || true
python3 aumentum_api.py > /tmp/api_mapping.log 2>&1 &
sleep 8

# Check if server started
if curl -s "http://localhost:8001/health" > /dev/null 2>&1; then
    echo "✅ Server started successfully"
else
    echo "❌ Server failed to start"
    echo "Check logs: tail /tmp/api_mapping.log"
    exit 1
fi

echo ""
echo "2️⃣ Querying database for store URLs..."
echo ""

# Documents in the chain
docs=("PL11089" "PL689" "BP102" "PL6204" "PL12321")

# Create mapping file
mapping_file="/tmp/document_url_mapping.txt"
> "$mapping_file"

echo "DOCUMENT URL MAPPING" >> "$mapping_file"
echo "Generated: $(date)" >> "$mapping_file"
echo "=" >> "$mapping_file"
echo "" >> "$mapping_file"

for doc in "${docs[@]}"; do
    echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
    echo "Document: $doc"
    echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
    
    # Get URLs from new debug endpoint
    response=$(curl -s "$API_BASE/debug/show-store-urls?document_number=$doc" 2>/dev/null)
    
    if echo "$response" | jq -e '.urls' > /dev/null 2>&1; then
        total=$(echo "$response" | jq -r '.total_urls')
        echo "  Found $total URL(s) in database"
        
        echo "" >> "$mapping_file"
        echo "$doc:" >> "$mapping_file"
        echo "$response" | jq -r '.urls[] | "  Node: \(.node_id), URL: \(.content_url)"' | tee -a "$mapping_file"
        
        # Get the first URL
        first_url=$(echo "$response" | jq -r '.urls[0].content_url // empty')
        
        if [ ! -z "$first_url" ]; then
            echo "  Primary URL: $first_url"
        else
            echo "  ⚠️  No URLs found (0 images)"
        fi
    else
        echo "  ❌ API error or not found"
        echo "$doc: ERROR" >> "$mapping_file"
    fi
    
    echo ""
done

echo "=========================================="
echo "Building Mapping Table"
echo "=========================================="
echo ""

cat > /tmp/mapping_table.md << 'EOF'
# Document to File URL Mapping

## Current Database Associations

Based on debug endpoint query:

| Document | Node ID | Store URL | Size |
|----------|---------|-----------|------|
EOF

# Add each document's info
for doc in "${docs[@]}"; do
    response=$(curl -s "$API_BASE/debug/show-store-urls?document_number=$doc" 2>/dev/null)
    
    if echo "$response" | jq -e '.urls[0]' > /dev/null 2>&1; then
        node_id=$(echo "$response" | jq -r '.urls[0].node_id')
        url=$(echo "$response" | jq -r '.urls[0].content_url')
        size=$(echo "$response" | jq -r '.urls[0].content_size')
        
        echo "| $doc | $node_id | \`$url\` | $size |" >> /tmp/mapping_table.md
    else
        echo "| $doc | N/A | NO URL (0 images) | 0 |" >> /tmp/mapping_table.md
    fi
done

cat >> /tmp/mapping_table.md << 'EOF'

## Verified Content (from manual PDF checks)

| Document Queried | File Used | Actually Shows |
|-----------------|-----------|----------------|
| PL11089 | `store://2015/3/26/.../3eee6f3f...fed.bin` | PL689 |
| PL689 | `store://2015/3/17/.../879dcd53...275.bin` | BP102 |
| BP102 | ??? | PL6204 |
| PL6204 | ??? | PL12321 |
| PL12321 | NO FILE | N/A |

## Correct Mapping (to be filled in)

Based on the pattern, the correct mapping should be:

| Document | Should Use This URL |
|----------|-------------------|
| PL11089 | ??? (file not found yet) |
| PL689 | `store://2015/3/26/.../3eee6f3f...fed.bin` (currently labeled PL11089) |
| BP102 | `store://2015/3/17/.../879dcd53...275.bin` (currently labeled PL689) |
| PL6204 | ??? (currently labeled BP102) - GET THIS |
| PL12321 | ??? (currently labeled PL6204) - GET THIS |

EOF

echo "✅ Mapping table created: /tmp/mapping_table.md"
echo ""
cat /tmp/mapping_table.md
echo ""

echo "=========================================="
echo "Next Steps"
echo "=========================================="
echo ""
echo "1. Review /tmp/mapping_table.md"
echo "2. Fill in the missing URLs for BP102 and PL6204"
echo "3. Update CORRECT_FILE_MAPPING in aumentum_browser_service.py"
echo ""

