#!/usr/bin/env python3
"""
Convert MSSQL Database Dump to MySQL Format
Converts schema and data files from MSSQL syntax to MySQL-compatible syntax
"""

import os
import re
import sys
from pathlib import Path

def convert_schema_mssql_to_mysql(input_file, output_file):
    """Convert MSSQL schema to MySQL format"""
    print(f"Converting schema: {input_file} → {output_file}")
    
    with open(input_file, 'r', encoding='utf-8') as f:
        content = f.read()
    
    # 1. Remove square brackets from identifiers
    content = re.sub(r'\[([^\]]+)\]', r'`\1`', content)
    
    # 2. Convert IDENTITY columns - MySQL AUTO_INCREMENT only works with integer types
    # First, find DECIMAL/NUMERIC with IDENTITY and convert to BIGINT
    # Pattern: DECIMAL(19,0) IDENTITY(1,1) or NUMERIC(19,0) IDENTITY(1,1)
    content = re.sub(
        r'(DECIMAL|NUMERIC)\((\d+),\s*0\)\s+IDENTITY\([^)]+\)',
        r'BIGINT AUTO_INCREMENT',
        content,
        flags=re.IGNORECASE
    )
    
    # Then convert remaining IDENTITY to AUTO_INCREMENT (for integer types)
    content = re.sub(r'IDENTITY\((\d+),\s*(\d+)\)', r'AUTO_INCREMENT', content)
    
    # 3. Convert data types
    type_conversions = {
        r'\bnvarchar\((\d+)\)': r'VARCHAR(\1)',
        r'\bnvarchar\(MAX\)': r'TEXT',
        r'\bvarchar\(MAX\)': r'TEXT',
        r'\bnchar\((\d+)\)': r'CHAR(\1)',
        r'\bnumeric\((\d+),\s*(\d+)\)': r'DECIMAL(\1,\2)',
        r'\btinyint': r'TINYINT',
        r'\bint\b': r'INT',
        r'\bbigint': r'BIGINT',
        r'\bfloat': r'DOUBLE',
        r'\breal': r'FLOAT',
        r'\bdatetime': r'DATETIME',
        r'\bdatetime2': r'DATETIME',
        r'\bdate': r'DATE',
        r'\btime': r'TIME',
        r'\bbit': r'BOOLEAN',
        r'\bimage': r'LONGBLOB',
        r'\btext': r'TEXT',
        r'\bntext': r'TEXT',
        r'\buniqueidentifier': r'CHAR(36)',  # UUID as CHAR(36)
    }
    
    for pattern, replacement in type_conversions.items():
        content = re.sub(pattern, replacement, content, flags=re.IGNORECASE)
    
    # 3.5. Fix any remaining DECIMAL AUTO_INCREMENT (shouldn't happen, but just in case)
    content = re.sub(
        r'DECIMAL\((\d+),\s*0\)\s+AUTO_INCREMENT',
        r'BIGINT AUTO_INCREMENT',
        content,
        flags=re.IGNORECASE
    )
    
    # 4. Remove GO statements
    content = re.sub(r'^\s*GO\s*$', '', content, flags=re.MULTILINE)
    
    # 5. Convert schema name (LRSAdmin. to database prefix or remove)
    # MySQL uses database.table format, but we'll use just table name
    content = re.sub(r'LRSAdmin\.', '', content)
    
    # 6. Convert CREATE TABLE syntax
    # MySQL doesn't need schema prefix in CREATE TABLE
    content = re.sub(r'CREATE TABLE LRSAdmin\.', 'CREATE TABLE ', content, flags=re.IGNORECASE)
    content = re.sub(r'CREATE TABLE `LRSAdmin`\.', 'CREATE TABLE ', content, flags=re.IGNORECASE)
    
    # 7. Convert ALTER TABLE syntax
    content = re.sub(r'ALTER TABLE LRSAdmin\.', 'ALTER TABLE ', content, flags=re.IGNORECASE)
    content = re.sub(r'ALTER TABLE `LRSAdmin`\.', 'ALTER TABLE ', content, flags=re.IGNORECASE)
    
    # 8. Convert CREATE INDEX syntax
    content = re.sub(r'CREATE (UNIQUE )?INDEX \[([^\]]+)\] ON LRSAdmin\.', r'CREATE \1INDEX `\2` ON ', content, flags=re.IGNORECASE)
    content = re.sub(r'CREATE (UNIQUE )?INDEX `([^`]+)` ON `LRSAdmin`\.', r'CREATE \1INDEX `\2` ON ', content, flags=re.IGNORECASE)
    
    # 9. Move PRIMARY KEY into CREATE TABLE for AUTO_INCREMENT columns
    # First, collect all PRIMARY KEY definitions from ALTER TABLE statements
    pk_pattern = r'ALTER TABLE `?([^\s`]+)`?\s+ADD CONSTRAINT `?([^\s`]+)`?\s+PRIMARY KEY\s*\(([^)]+)\);'
    pk_matches = list(re.finditer(pk_pattern, content, re.IGNORECASE | re.MULTILINE))
    
    # Build a map of table -> primary key columns
    table_pks = {}
    for match in pk_matches:
        table_name = match.group(1).strip('`').strip()
        pk_cols = [col.strip('`').strip() for col in match.group(3).split(',')]
        table_pks[table_name.lower()] = pk_cols
    
    # Now process each CREATE TABLE statement
    def process_create_table(match):
        full_match = match.group(0)
        table_match = re.search(r'CREATE TABLE `?([^\s`]+)`?', full_match, re.IGNORECASE)
        if not table_match:
            return full_match
        
        table_name = table_match.group(1).strip('`').strip()
        table_key = table_name.lower()
        
        # Check if this table has AUTO_INCREMENT columns
        auto_inc_pattern = r'`([^`]+)`\s+[^,\n)]+AUTO_INCREMENT'
        auto_inc_match = re.search(auto_inc_pattern, full_match, re.IGNORECASE)
        
        if auto_inc_match and table_key in table_pks:
            auto_inc_col = auto_inc_match.group(1).strip()
            pk_cols = table_pks[table_key]
            
            # If the AUTO_INCREMENT column is the primary key
            if auto_inc_col in pk_cols:
                # Check if PRIMARY KEY is already in CREATE TABLE
                if 'PRIMARY KEY' not in full_match.upper():
                    # Add PRIMARY KEY before the closing parenthesis and ENGINE
                    pk_cols_str = ', '.join([f'`{col}`' for col in pk_cols])
                    # Find the closing ); and add PRIMARY KEY before it
                    full_match = re.sub(
                        r'(\s+\));',
                        f',\n    PRIMARY KEY ({pk_cols_str})\n\\1',
                        full_match,
                        count=1
                    )
        
        return full_match
    
    # Apply to all CREATE TABLE statements (match from CREATE TABLE to closing );
    # Use a more careful pattern that preserves CREATE TABLE
    def safe_process_create_table(match):
        full_match = match.group(0)
        # Only process if it's a complete CREATE TABLE statement
        if 'CREATE TABLE' in full_match and ');' in full_match:
            return process_create_table(match)
        return full_match
    
    # Match CREATE TABLE with proper multi-line handling
    content = re.sub(
        r'CREATE TABLE `?[^\s`]+`?\s*\([^)]+\)\s*;',
        safe_process_create_table,
        content,
        flags=re.IGNORECASE | re.DOTALL
    )
    
    # 10. Add MySQL engine and charset
    content = re.sub(
        r'(CREATE TABLE `[^`]+` \([^)]+\));',
        r'\1 ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;',
        content,
        flags=re.IGNORECASE | re.DOTALL
    )
    
    # 11. Convert PRIMARY KEY constraint syntax (keep ALTER TABLE for non-AUTO_INCREMENT cases)
    content = re.sub(
        r'ADD CONSTRAINT `?([^\s`]+)`?\s+PRIMARY KEY',
        r'ADD CONSTRAINT `\1` PRIMARY KEY',
        content,
        flags=re.IGNORECASE
    )
    
    # 11. Remove IDENTITY from ALTER TABLE statements (already handled in CREATE)
    
    # 12. Add USE database statement at the beginning
    db_name = "LRS43"  # Default database name
    if not content.strip().startswith('USE '):
        content = f"-- MySQL Database Dump\n-- Converted from MSSQL\nUSE `{db_name}`;\n\nSET FOREIGN_KEY_CHECKS=0;\n\n{content}\n\nSET FOREIGN_KEY_CHECKS=1;\n"
    
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(content)
    
    print(f"✅ Schema converted: {output_file}")


def convert_data_mssql_to_mysql(input_file, output_file):
    """Convert MSSQL data INSERT statements to MySQL format"""
    print(f"Converting data: {input_file} → {output_file}")
    
    with open(input_file, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    
    output_lines = []
    in_header = True
    
    for line in lines:
        # Skip header comments but preserve them
        if line.strip().startswith('--'):
            output_lines.append(line)
            continue
        
        # Skip empty lines in header
        if in_header and not line.strip():
            output_lines.append(line)
            continue
        
        # Convert INSERT statements
        if line.strip().startswith('INSERT INTO'):
            in_header = False
            
            # 1. Remove square brackets from table and column names
            line = re.sub(r'LRSAdmin\.', '', line)
            line = re.sub(r'\[([^\]]+)\]', r'`\1`', line)
            
            # 2. Convert string values - MySQL uses different escaping
            # MSSQL uses '' for single quote, MySQL uses \'
            # But we need to be careful with existing escaped quotes
            # MySQL prefers '' for compatibility, so we keep it
            
            # 3. Convert datetime format if needed (MySQL accepts ISO format)
            # MSSQL: '2014-12-08T14:06:34.882+01:00'
            # MySQL: '2014-12-08 14:06:34' or keep ISO if supported
            line = re.sub(
                r"'(\d{4}-\d{2}-\d{2})T(\d{2}:\d{2}:\d{2})\.\d+([+-]\d{2}:\d{2})'",
                r"'\1 \2'",
                line
            )
            
            # 4. Convert bit values (0/1 to FALSE/TRUE or keep as 0/1)
            # MySQL accepts 0/1 for boolean, so we keep it
            
            output_lines.append(line)
        else:
            if not in_header:
                output_lines.append(line)
    
    # Add MySQL-specific settings
    if output_lines and not any('SET' in line for line in output_lines[:10]):
        output_lines.insert(0, "-- MySQL Data Dump\n-- Converted from MSSQL\n\nSET FOREIGN_KEY_CHECKS=0;\n\n")
        output_lines.append("\nSET FOREIGN_KEY_CHECKS=1;\n")
    
    with open(output_file, 'w', encoding='utf-8') as f:
        f.writelines(output_lines)
    
    print(f"✅ Data converted: {output_file}")


def main():
    """Main conversion function"""
    dump_dir = Path(__file__).parent
    
    print("="*80)
    print("MSSQL to MySQL Database Dump Converter")
    print("="*80)
    print()
    
    # Create MySQL output directory
    mysql_dir = dump_dir / "mysql"
    mysql_dir.mkdir(exist_ok=True)
    
    # Convert schema files
    schema_files = list(dump_dir.glob("schema_*.sql"))
    if not schema_files:
        print("⚠️  No schema files found")
    else:
        # Use the complete schema file
        complete_schema = None
        for sf in schema_files:
            if '185155' in sf.name:  # The complete one
                complete_schema = sf
                break
        
        if not complete_schema:
            complete_schema = max(schema_files, key=lambda p: p.stat().st_size)
        
        output_schema = mysql_dir / f"schema_mysql_{complete_schema.stem.split('_', 1)[1]}.sql"
        convert_schema_mssql_to_mysql(complete_schema, output_schema)
        print()
    
    # Convert data files
    data_files = list(dump_dir.glob("data_*.sql"))
    if not data_files:
        print("⚠️  No data files found")
    else:
        for data_file in data_files:
            # Skip empty files
            if data_file.stat().st_size < 200:
                print(f"⏭️  Skipping empty file: {data_file.name}")
                continue
            
            # Skip the generic data file if it's empty
            if '185158' in data_file.name:
                continue
            
            output_data = mysql_dir / f"data_mysql_{data_file.stem.replace('data_', '')}.sql"
            convert_data_mssql_to_mysql(data_file, output_data)
        
        print()
    
    # Copy statistics file
    stats_files = list(dump_dir.glob("statistics_*.txt"))
    if stats_files:
        import shutil
        for stats_file in stats_files:
            shutil.copy2(stats_file, mysql_dir / stats_file.name)
        print(f"✅ Statistics file copied to MySQL directory")
        print()
    
    print("="*80)
    print("✅ Conversion Complete!")
    print("="*80)
    print()
    print(f"📁 MySQL files location: {mysql_dir}")
    print()
    print("Next steps:")
    print("1. Review the converted files")
    print("2. Create MySQL database: CREATE DATABASE LRS43 CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;")
    print("3. Import schema: mysql -u root -p LRS43 < mysql/schema_mysql_*.sql")
    print("4. Import data: mysql -u root -p LRS43 < mysql/data_mysql_*.sql")
    print()


if __name__ == "__main__":
    main()

