#!/bin/bash
#
# Find nodes that might contain the missing PL11089 content
# and identify nodes that don't have string_value set
#
set -e

API_BASE="${API_BASE:-http://localhost:8001}"

echo "=========================================="
echo "Finding Missing Content Nodes"
echo "=========================================="
echo ""

echo "DISCOVERY:"
echo "  PL12321 has NO images in Alfresco (all show 'Images: 0')"
echo "  This means PL12321 documents exist but have no file associations"
echo ""

echo "HYPOTHESIS:"
echo "  - The real PL11089 file might be in an unlabeled node"
echo "  - Or it's labeled with a completely different document number"
echo "  - We need to find nodes created around the same time"
echo ""

echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "1️⃣ Checking documents created in March 2015"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""

# Get recent Alfresco content from March 2015
echo "Fetching Alfresco nodes from early 2015..."
response=$(curl -s "$API_BASE/lrs/content/recent?limit=200")

echo "Recent content nodes (showing sample):"
echo "$response" | jq -r '.items[:20] | .[] | "  Node: \(.id), UUID: \(.uuid), URL: \(.content_url), Size: \(.content_size)"' 2>/dev/null || echo "  No data"

echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "2️⃣ Finding documents created 2015-03-09"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""

# Get source documents from that date
echo "Documents from 2015-03-09 (when PL11089 was created):"
response=$(curl -s "$API_BASE/lrs/source-documents?limit=200")

echo "$response" | jq -r '.items[] | select(.create_date? | startswith("2015-03-09")) | "  \(.document_number): ID=\(.id), Type=\(.document_type), Pages=\(.page_count)"' 2>/dev/null | head -20

echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "3️⃣ Looking for orphaned or mislabeled nodes"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""

# Check what we know from earlier diagnostics
echo "From earlier diagnostics:"
echo ""
echo "  PL11089 uses: store://2015/3/26/15/8/3eee6f3f-0b98-41b9-a6cb-2c4488152fed.bin"
echo "  └─ But contains PL689 content"
echo ""
echo "  PL689 uses:   store://2015/3/17/10/10/879dcd53-f552-4e82-858f-7e868e60a275.bin"
echo "  └─ But contains BP102 content"
echo ""

echo "QUESTION: Where is the file that contains REAL PL11089 content?"
echo ""

echo "=========================================="
echo "ANALYSIS & NEXT STEPS"
echo "=========================================="
echo ""

echo "📊 Current Understanding:"
echo ""
echo "  Each document's node is mislabeled in sequence:"
echo "  - Node labeled 'PL11089' contains 'PL689' content"
echo "  - Node labeled 'PL689' contains 'BP102' content"
echo "  - Node labeled 'BP102' contains 'PL6204' content"
echo "  - Node labeled 'PL6204' contains 'PL12321' content"
echo "  - Real PL11089 content is in... ???"
echo ""

echo "💡 Possibilities:"
echo ""
echo "  A) There's a 6th document whose node contains PL11089 content"
echo "  B) PL12321 node exists but is labeled as something else entirely"
echo "  C) PL11089 content was never scanned/uploaded"
echo ""

echo "🔧 Immediate Fix Strategy:"
echo ""
echo "Since we can't complete a perfect circle, we can:"
echo ""
echo "1. Create partial redirect mapping for the 4 documents we know"
echo "2. Leave PL11089 pointing to 'PL689' (best available option)"
echo "3. All others get correct content"
echo ""
echo "Result after fix:"
echo "  PL11089  → shows PL689   ❌ (can't fix without finding real file)"
echo "  PL689    → shows PL689   ✅ (redirects to BP102's node which has PL689)"
echo "  BP102    → shows BP102   ✅ (redirects to PL6204's node which has BP102)"
echo "  PL6204   → shows PL6204  ✅ (redirects to current PL11089 node? needs verification)"
echo "  PL12321  → shows PL12321 ✅ (redirects to current PL6204 node which has PL12321)"
echo ""

echo "Press ENTER to continue..."
read

echo "Creating fix with what we know..."

