-- ================================================================================
-- DATABASE INVESTIGATION: Document Association Problem
-- ================================================================================
-- Purpose: Find the root cause of why PL11089 shows PL689, PL689 shows BP102, etc.
-- ================================================================================

-- STEP 1: Check the basic associations for our problem documents
-- ================================================================================
PRINT '================================================================================';
PRINT 'STEP 1: Basic Document Associations';
PRINT '================================================================================';
PRINT '';

SELECT 
    sd.id AS doc_id,
    sd.document_number,
    sd.document_type,
    dt.label AS document_type_label,
    sd.page_count,
    sd.create_date,
    sd.submission,
    sd.issued_by
FROM LRSAdmin.lr_source_document sd
LEFT JOIN LRSAdmin.lr_dictionary dt ON dt.Id = sd.document_type
WHERE sd.document_number IN ('PL11089', 'PL689', 'BP102', 'PL6204', 'PL12321')
ORDER BY sd.document_number, sd.id;

PRINT '';
PRINT '================================================================================';
PRINT 'STEP 2: Content URL Associations from alf_node_properties';
PRINT '================================================================================';
PRINT '';

-- This shows what files are associated with each document number in Alfresco
SELECT 
    np.string_value AS document_number,
    q.local_name AS property_type,
    n.id AS node_id,
    n.uuid,
    n.audit_created AS node_created_date,
    cu.content_url,
    cu.content_size,
    cd.id AS content_data_id,
    cd.content_url_id
FROM LRSAdmin.alf_node_properties np
JOIN LRSAdmin.alf_qname q ON q.id = np.qname_id
JOIN LRSAdmin.alf_node n ON n.id = np.node_id AND n.node_deleted = 0
LEFT JOIN LRSAdmin.alf_content_data cd ON cd.id = n.id
LEFT JOIN LRSAdmin.alf_content_url cu ON cu.id = cd.content_url_id
WHERE np.string_value IN ('PL11089', 'PL689', 'BP102', 'PL6204', 'PL12321')
AND q.local_name IN ('targetRids','sourceRids')
ORDER BY np.string_value, cu.content_url;

PRINT '';
PRINT '================================================================================';
PRINT 'STEP 3: Analyze Node Creation Order vs Document Number';
PRINT '================================================================================';
PRINT '';

-- Check if nodes were created in order and labels got rotated
SELECT 
    n.id AS node_id,
    n.audit_created AS node_created,
    np.string_value AS tagged_as_document,
    cu.content_url,
    cu.content_size,
    -- Extract date from URL for comparison
    SUBSTRING(cu.content_url, 9, 10) AS url_date  -- store://YYYY/M/D
FROM LRSAdmin.alf_node_properties np
JOIN LRSAdmin.alf_qname q ON q.id = np.qname_id
JOIN LRSAdmin.alf_node n ON n.id = np.node_id AND n.node_deleted = 0
LEFT JOIN LRSAdmin.alf_content_data cd ON cd.id = n.id
LEFT JOIN LRSAdmin.alf_content_url cu ON cu.id = cd.content_url_id
WHERE np.string_value IN ('PL11089', 'PL689', 'BP102', 'PL6204', 'PL12321')
AND q.local_name IN ('targetRids','sourceRids')
ORDER BY n.id, cu.content_url;

PRINT '';
PRINT '================================================================================';
PRINT 'STEP 4: Check for Pattern - Are Node IDs Sequential?';
PRINT '================================================================================';
PRINT '';

-- If node IDs are sequential but labels rotated, this confirms systematic error
WITH NodeData AS (
    SELECT 
        n.id AS node_id,
        np.string_value AS document_number,
        cu.content_url,
        ROW_NUMBER() OVER (ORDER BY n.id) AS node_order,
        ROW_NUMBER() OVER (ORDER BY np.string_value) AS doc_name_order
    FROM LRSAdmin.alf_node_properties np
    JOIN LRSAdmin.alf_qname q ON q.id = np.qname_id
    JOIN LRSAdmin.alf_node n ON n.id = np.node_id AND n.node_deleted = 0
    LEFT JOIN LRSAdmin.alf_content_data cd ON cd.id = n.id
    LEFT JOIN LRSAdmin.alf_content_url cu ON cu.id = cd.content_url_id
    WHERE np.string_value IN ('PL11089', 'PL689', 'BP102', 'PL6204', 'PL12321')
    AND q.local_name = 'sourceRids'  -- Use just one property to avoid duplicates
)
SELECT 
    node_id,
    document_number,
    content_url,
    node_order,
    doc_name_order,
    CASE 
        WHEN node_order = doc_name_order THEN 'ALIGNED'
        WHEN node_order = doc_name_order + 1 THEN 'SHIFTED +1'
        WHEN node_order = doc_name_order - 1 THEN 'SHIFTED -1'
        ELSE 'MISMATCH'
    END AS alignment_status
FROM NodeData
ORDER BY node_order;

PRINT '';
PRINT '================================================================================';
PRINT 'STEP 5: Compare File Dates with Document Submission Dates';
PRINT '================================================================================';
PRINT '';

-- Check if file dates match submission dates (should be similar)
SELECT 
    sd.document_number,
    sd.submission AS doc_submission_date,
    sd.create_date AS db_entry_date,
    cu.content_url,
    -- Extract date from URL
    SUBSTRING(cu.content_url, 9, 10) AS file_url_date,
    CASE 
        WHEN YEAR(sd.submission) = CAST(SUBSTRING(cu.content_url, 9, 4) AS INT) THEN 'YEAR_MATCH'
        ELSE 'YEAR_MISMATCH'
    END AS date_match_status
FROM LRSAdmin.lr_source_document sd
LEFT JOIN LRSAdmin.alf_node_properties np ON np.string_value = sd.document_number
LEFT JOIN LRSAdmin.alf_qname q ON q.id = np.qname_id AND q.local_name = 'sourceRids'
LEFT JOIN LRSAdmin.alf_node n ON n.id = np.node_id AND n.node_deleted = 0
LEFT JOIN LRSAdmin.alf_content_data cd ON cd.id = n.id
LEFT JOIN LRSAdmin.alf_content_url cu ON cu.id = cd.content_url_id
WHERE sd.document_number IN ('PL11089', 'PL689', 'BP102', 'PL6204', 'PL12321')
ORDER BY sd.document_number;

PRINT '';
PRINT '================================================================================';
PRINT 'STEP 6: Find All Nodes with Content for These Documents';
PRINT '================================================================================';
PRINT '';

-- Get complete picture of all nodes
SELECT 
    np.string_value AS document_number,
    n.id AS node_id,
    n.uuid,
    n.audit_created,
    cu.content_url,
    cu.content_size,
    mt.mimetype_str,
    q.local_name AS property_name
FROM LRSAdmin.alf_node_properties np
JOIN LRSAdmin.alf_qname q ON q.id = np.qname_id
JOIN LRSAdmin.alf_node n ON n.id = np.node_id
LEFT JOIN LRSAdmin.alf_content_data cd ON cd.id = n.id
LEFT JOIN LRSAdmin.alf_content_url cu ON cu.id = cd.content_url_id
LEFT JOIN LRSAdmin.alf_mimetype mt ON mt.id = cd.content_mimetype_id
WHERE np.string_value IN ('PL11089', 'PL689', 'BP102', 'PL6204', 'PL12321')
AND q.local_name IN ('targetRids','sourceRids')
AND n.node_deleted = 0
ORDER BY 
    CASE np.string_value
        WHEN 'PL11089' THEN 1
        WHEN 'PL689' THEN 2
        WHEN 'BP102' THEN 3
        WHEN 'PL6204' THEN 4
        WHEN 'PL12321' THEN 5
    END,
    n.id;

PRINT '';
PRINT '================================================================================';
PRINT 'STEP 7: Check if There Are Duplicate Associations';
PRINT '================================================================================';
PRINT '';

-- Check if any document has multiple different content_urls (which would be odd)
SELECT 
    np.string_value AS document_number,
    COUNT(DISTINCT cu.content_url) AS unique_urls,
    COUNT(DISTINCT n.id) AS unique_nodes,
    STRING_AGG(cu.content_url, '; ') AS all_urls
FROM LRSAdmin.alf_node_properties np
JOIN LRSAdmin.alf_qname q ON q.id = np.qname_id
JOIN LRSAdmin.alf_node n ON n.id = np.node_id AND n.node_deleted = 0
LEFT JOIN LRSAdmin.alf_content_data cd ON cd.id = n.id
LEFT JOIN LRSAdmin.alf_content_url cu ON cu.id = cd.content_url_id
WHERE np.string_value IN ('PL11089', 'PL689', 'BP102', 'PL6204', 'PL12321')
AND q.local_name IN ('targetRids','sourceRids')
GROUP BY np.string_value
ORDER BY np.string_value;

PRINT '';
PRINT '================================================================================';
PRINT 'INVESTIGATION COMPLETE';
PRINT '================================================================================';
PRINT '';
PRINT 'Analysis Guidelines:';
PRINT '- Check if node_ids are sequential';
PRINT '- Check if document numbers are alphabetical or shifted';
PRINT '- Check if file dates match submission dates';
PRINT '- Look for patterns in the alignment_status column';
PRINT '';

