import csv
import io
import pandas as pd
from typing import List, Dict, Tuple


class BulkLookupProcessor:
    """
    Process CSV/Excel files containing NRC/Passport numbers for bulk membership lookup
    """
    
    @staticmethod
    def parse_file(file) -> Tuple[List[str], List[str]]:
        """
        Parse uploaded CSV or Excel file and extract NRC/Passport numbers
        
        Args:
            file: Uploaded file object
            
        Returns:
            Tuple of (list of document numbers, list of errors)
        """
        filename = file.name.lower()
        document_numbers = []
        errors = []
        
        try:
            if filename.endswith('.csv'):
                # Parse CSV
                content = file.read().decode('utf-8-sig')
                csv_file = io.StringIO(content)
                reader = csv.DictReader(csv_file)
                
                # Try to find the column with NRC/Passport
                # Look for common column names
                possible_columns = ['nrc', 'passport', 'document_number', 'national_id', 
                                   'id_number', 'nrc_passport', 'document', 'id']
                
                if reader.fieldnames:
                    # Find the first matching column (case-insensitive)
                    column_name = None
                    for field in reader.fieldnames:
                        if field.lower().strip() in possible_columns:
                            column_name = field
                            break
                    
                    if not column_name:
                        # If no match, use the first column
                        column_name = reader.fieldnames[0]
                    
                    for row_num, row in enumerate(reader, start=2):
                        doc_num = str(row.get(column_name, '')).strip()
                        if doc_num and doc_num.lower() not in ['', 'null', 'none', 'n/a']:
                            document_numbers.append(doc_num)
                        elif not doc_num:
                            errors.append(f"Row {row_num}: Empty document number")
                else:
                    errors.append("CSV file has no headers")
                    
            elif filename.endswith(('.xlsx', '.xls')):
                # Parse Excel
                df = pd.read_excel(file, dtype=str)
                
                if df.empty:
                    errors.append("Excel file is empty")
                    return document_numbers, errors
                
                # Try to find the column with NRC/Passport
                possible_columns = ['nrc', 'passport', 'document_number', 'national_id', 
                                   'id_number', 'nrc_passport', 'document', 'id']
                
                column_name = None
                for col in df.columns:
                    if str(col).lower().strip() in possible_columns:
                        column_name = col
                        break
                
                if not column_name:
                    # Use the first column
                    column_name = df.columns[0]
                
                for idx, value in enumerate(df[column_name], start=2):
                    doc_num = str(value).strip()
                    if doc_num and doc_num.lower() not in ['', 'nan', 'null', 'none', 'n/a']:
                        document_numbers.append(doc_num)
                    elif not doc_num or doc_num.lower() == 'nan':
                        errors.append(f"Row {idx}: Empty document number")
            
            else:
                errors.append("Unsupported file format")
                
        except Exception as e:
            errors.append(f"Error parsing file: {str(e)}")
        
        # Remove duplicates while preserving order
        seen = set()
        unique_document_numbers = []
        for doc_num in document_numbers:
            if doc_num not in seen:
                seen.add(doc_num)
                unique_document_numbers.append(doc_num)
        
        return unique_document_numbers, errors
    
    @staticmethod
    def detect_id_type(doc_num: str) -> str:
        """
        Detect if document number is NRC or Passport
        
        Args:
            doc_num: Document number string
            
        Returns:
            'NRC' or 'Passport'
        """
        # NRC format: ######/##/#
        if '/' in doc_num:
            return 'NRC'
        else:
            return 'Passport'