#!/usr/bin/env python
# Script to check the ID mapping between alarm data and sensor report

import pandas as pd
import numpy as np

def check_mapping():
    print("Loading alarm data...")
    alarm_df = pd.read_csv('CardinalAlarmsDec25.csv')
    
    print("Loading sensor report...")
    # Try to read with header=0 first (new format) then with header=4 (old format)
    try:
        temp_df = pd.read_excel('SensorReport Cardinal 2025-12-23_processed.xlsx', header=0, nrows=5)
        expected_cols = ['ID', 'Remote', 'Group', 'Type', 'Serial No', 'Name']
        has_expected_cols = any(col in temp_df.columns for col in expected_cols)

        if has_expected_cols:
            sensor_df = pd.read_excel('SensorReport Cardinal 2025-12-23_processed.xlsx', header=0)
            print("Using new sensor report format (header=0)")
        else:
            sensor_df = pd.read_excel('SensorReport Cardinal 2025-12-23_processed.xlsx', header=4)
            print("Using old sensor report format (header=4)")
    except FileNotFoundError:
        print("Sensor report file not found. Please ensure 'SensorReport Cardinal 2025-12-23_processed.xlsx' is in the current directory.")
        return
    
    print(f"Alarm data shape: {alarm_df.shape}")
    print(f"Sensor report shape: {sensor_df.shape}")
    
    print("\nAlarm data Sensor_Id sample (first 10):")
    print(alarm_df['Sensor_Id'].head(10).tolist())
    
    print("\nSensor report columns:")
    print(sensor_df.columns.tolist())
    
    print("\nSensor report 'Remote SN' column info:")
    print(f"Data type: {sensor_df['Remote SN'].dtype}")
    print(f"Sample values (first 10): {sensor_df['Remote SN'].head(10).tolist()}")
    print(f"Non-null count: {sensor_df['Remote SN'].notna().sum()}")
    
    # Check for potential matches
    alarm_sensors = set(alarm_df['Sensor_Id'].unique())
    
    # Clean the Remote SN column to find valid numeric values
    valid_remote_sns = []
    for sn in sensor_df['Remote SN'].dropna():
        try:
            # Try to convert to int
            valid_remote_sns.append(int(sn))
        except (ValueError, TypeError):
            print(f"Could not convert to int: {sn}")
            continue
    
    sensor_sns = set(valid_remote_sns)
    
    print(f"\nNumber of unique alarm sensors: {len(alarm_sensors)}")
    print(f"Number of valid sensor report IDs: {len(sensor_sns)}")
    print(f"Common IDs between datasets: {len(alarm_sensors.intersection(sensor_sns))}")
    
    if len(alarm_sensors.intersection(sensor_sns)) > 0:
        print(f"Sample common IDs: {list(alarm_sensors.intersection(sensor_sns))[:10]}")
    else:
        print("No direct matches found. Let's check other potential ID columns in sensor report...")
        
        # Check other columns that might contain IDs
        for col in sensor_df.columns:
            if col != 'Remote SN':
                print(f"\nChecking column: {col}")
                non_null_values = sensor_df[col].dropna().head(10).tolist()
                print(f"Sample values: {non_null_values}")

if __name__ == "__main__":
    check_mapping()