#!/bin/bash
#
# Comprehensive audit of all PL and BP documents from 2015
# to identify the scope of wrong associations
#
set -e

API_BASE="${API_BASE:-http://localhost:8001}"
OUTPUT_DIR="/tmp/aumentum_audit_2015"
REPORT_FILE="$OUTPUT_DIR/audit_report.txt"

mkdir -p "$OUTPUT_DIR"

echo "=========================================="
echo "Comprehensive Audit: 2015 PL/BP Documents"
echo "=========================================="
echo ""
echo "Output directory: $OUTPUT_DIR"
echo "Report file: $REPORT_FILE"
echo ""

# Initialize report
cat > "$REPORT_FILE" << 'EOF'
================================================================================
COMPREHENSIVE AUDIT: 2015 PL/BP DOCUMENT ASSOCIATIONS
================================================================================
Date: $(date)
Purpose: Identify scope of wrong file associations from 2015 scanning
================================================================================

EOF

# Function to test a document and log results
test_document() {
    local doc_num="$1"
    local doc_id="$2"
    local doc_type="$3"
    
    echo "Testing: $doc_num (ID: $doc_id, Type: $doc_type)"
    
    # Generate PDF
    local pdf_file="$OUTPUT_DIR/${doc_num}_doc${doc_id}.pdf"
    local http_code=$(curl -s -w "%{http_code}" -o "$pdf_file" \
        "$API_BASE/documents/pdf-by-document-number?document_number=$doc_num&document_id=$doc_id" 2>/dev/null)
    
    if [ "$http_code" = "200" ] && [ -f "$pdf_file" ]; then
        local file_size=$(stat -c%s "$pdf_file" 2>/dev/null || stat -f%z "$pdf_file" 2>/dev/null || echo "0")
        if [ "$file_size" -gt "1000" ]; then
            echo "  ✅ Generated: $pdf_file ($file_size bytes)"
            echo "$doc_num,$doc_id,$doc_type,$pdf_file,$file_size,GENERATED" >> "$OUTPUT_DIR/audit_data.csv"
            return 0
        fi
    fi
    
    echo "  ❌ Failed: $doc_num (ID: $doc_id)"
    echo "$doc_num,$doc_id,$doc_type,FAILED,0,FAILED" >> "$OUTPUT_DIR/audit_data.csv"
    return 1
}

# Get list of PL documents
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "Phase 1: Identifying PL Documents"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""

# Query for PL documents from database
echo "Querying database for PL documents..."

# Create CSV header
echo "document_number,document_id,document_type,pdf_file,file_size,status" > "$OUTPUT_DIR/audit_data.csv"

# Get PL11089 (we know this one)
echo ""
echo "1️⃣ Testing PL11089..."
curl -s "$API_BASE/documents/by-document-number?document_number=PL11089" | \
    jq -r '.items[] | "\(.id)|\(.document_type_label)"' | \
while IFS='|' read -r doc_id doc_type; do
    test_document "PL11089" "$doc_id" "$doc_type"
done

# Get PL689 (we know this one too)
echo ""
echo "2️⃣ Testing PL689..."
curl -s "$API_BASE/documents/by-document-number?document_number=PL689" | \
    jq -r '.items[] | "\(.id)|\(.document_type_label)"' | \
while IFS='|' read -r doc_id doc_type; do
    test_document "PL689" "$doc_id" "$doc_type"
done

# Get BP102 (we know this one three)
echo ""
echo "3️⃣ Testing BP102..."
curl -s "$API_BASE/documents/by-document-number?document_number=BP102" | \
    jq -r '.items[] | "\(.id)|\(.document_type_label)"' | \
while IFS='|' read -r doc_id doc_type; do
    test_document "BP102" "$doc_id" "$doc_type"
done

# Get some other PL documents for comparison
echo ""
echo "4️⃣ Testing sample of other PL documents..."

for doc_num in PL100 PL200 PL300 PL500 PL1000 PL2000; do
    echo ""
    echo "   Testing $doc_num..."
    
    # Check if document exists
    response=$(curl -s "$API_BASE/documents/by-document-number?document_number=$doc_num")
    count=$(echo "$response" | jq -r '.count // 0')
    
    if [ "$count" -gt "0" ]; then
        echo "$response" | jq -r '.items[0] | "\(.id)|\(.document_type_label)"' | \
        while IFS='|' read -r doc_id doc_type; do
            test_document "$doc_num" "$doc_id" "$doc_type"
        done
    else
        echo "   ⚠️  $doc_num not found in database"
    fi
done

# Get some BP documents for comparison  
echo ""
echo "5️⃣ Testing sample of other BP documents..."

for doc_num in BP100 BP101 BP103 BP200 BP300; do
    echo ""
    echo "   Testing $doc_num..."
    
    response=$(curl -s "$API_BASE/documents/by-document-number?document_number=$doc_num")
    count=$(echo "$response" | jq -r '.count // 0')
    
    if [ "$count" -gt "0" ]; then
        echo "$response" | jq -r '.items[0] | "\(.id)|\(.document_type_label)"' | \
        while IFS='|' read -r doc_id doc_type; do
            test_document "$doc_num" "$doc_id" "$doc_type"
        done
    else
        echo "   ⚠️  $doc_num not found in database"
    fi
done

echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "Phase 2: Generating Summary Report"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""

# Count results
total_generated=$(grep -c "GENERATED" "$OUTPUT_DIR/audit_data.csv" || echo "0")
total_failed=$(grep -c "FAILED" "$OUTPUT_DIR/audit_data.csv" || echo "0")

cat >> "$REPORT_FILE" << EOF

AUDIT RESULTS
================================================================================

Total Documents Tested: $((total_generated + total_failed))
Successfully Generated PDFs: $total_generated
Failed Generations: $total_failed

PDF FILES LOCATION
================================================================================
Directory: $OUTPUT_DIR/
CSV Data: $OUTPUT_DIR/audit_data.csv

GENERATED PDF FILES
================================================================================
EOF

# List all generated PDFs
ls -lh "$OUTPUT_DIR"/*.pdf 2>/dev/null | awk '{print $9, "-", $5}' >> "$REPORT_FILE" || echo "No PDFs generated" >> "$REPORT_FILE"

cat >> "$REPORT_FILE" << 'EOF'

MANUAL VERIFICATION REQUIRED
================================================================================

For EACH PDF file above, please:
1. Open the PDF
2. Find the "R OF O NO" or document number field
3. Note what document number is ACTUALLY shown in the PDF
4. Compare with the filename (what was queried)

VERIFICATION TEMPLATE
================================================================================

File: PL11089_doc10000000013787.pdf
  Queried: PL11089
  Shows:   _____________ (fill in what you see)
  Status:  ✅ Correct / ❌ Wrong

File: PL11089_doc10000000013791.pdf
  Queried: PL11089  
  Shows:   _____________ (fill in what you see)
  Status:  ✅ Correct / ❌ Wrong

File: PL11089_doc10000000013800.pdf
  Queried: PL11089
  Shows:   _____________ (fill in what you see)
  Status:  ✅ Correct / ❌ Wrong

File: PL689_docXXXXXXXXXXXXXX.pdf
  Queried: PL689
  Shows:   _____________ (fill in what you see)
  Status:  ✅ Correct / ❌ Wrong

File: BP102_docXXXXXXXXXXXXXX.pdf
  Queried: BP102
  Shows:   _____________ (fill in what you see)
  Status:  ✅ Correct / ❌ Wrong

[Continue for all files...]

ANALYSIS GUIDELINES
================================================================================

After verification, count:
1. How many documents show CORRECT content? → These are OK
2. How many documents show WRONG content? → These need fixing
3. Are the errors random or systematic?
4. Is there a pattern (e.g., all from same scanning date)?

NEXT STEPS
================================================================================

Based on verification results:
- If FEW errors (< 10): Create specific workarounds
- If MANY errors (> 10): Database correction needed
- If SYSTEMATIC pattern: Investigate 2015 scanning process

EOF

echo ""
echo "=========================================="
echo "Audit Complete!"
echo "=========================================="
echo ""
echo "📊 Summary:"
echo "   Generated PDFs: $total_generated"
echo "   Failed: $total_failed"
echo ""
echo "📁 Output Location:"
echo "   $OUTPUT_DIR/"
echo ""
echo "📄 Report:"
echo "   $REPORT_FILE"
echo ""
echo "🔍 CSV Data:"
echo "   $OUTPUT_DIR/audit_data.csv"
echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "NEXT STEPS"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""
echo "1. Read the report:"
echo "   cat $REPORT_FILE"
echo ""
echo "2. Open all PDFs and verify content:"
echo "   xdg-open $OUTPUT_DIR/*.pdf"
echo ""
echo "3. For each PDF, note what document number is ACTUALLY shown"
echo ""
echo "4. Fill in the verification template in the report"
echo ""
echo "5. Share results so we can determine scope and fix strategy"
echo ""

