#!/usr/bin/env python # Script to check the ID mapping between alarm data and sensor report import pandas as pd import numpy as np def check_mapping(): print("Loading alarm data...") alarm_df = pd.read_csv('CardinalAlarmsDec25.csv') print("Loading sensor report...") # Try to read with header=0 first (new format) then with header=4 (old format) try: temp_df = pd.read_excel('SensorReport Cardinal 2025-12-23_processed.xlsx', header=0, nrows=5) expected_cols = ['ID', 'Remote', 'Group', 'Type', 'Serial No', 'Name'] has_expected_cols = any(col in temp_df.columns for col in expected_cols) if has_expected_cols: sensor_df = pd.read_excel('SensorReport Cardinal 2025-12-23_processed.xlsx', header=0) print("Using new sensor report format (header=0)") else: sensor_df = pd.read_excel('SensorReport Cardinal 2025-12-23_processed.xlsx', header=4) print("Using old sensor report format (header=4)") except FileNotFoundError: print("Sensor report file not found. Please ensure 'SensorReport Cardinal 2025-12-23_processed.xlsx' is in the current directory.") return print(f"Alarm data shape: {alarm_df.shape}") print(f"Sensor report shape: {sensor_df.shape}") print("\nAlarm data Sensor_Id sample (first 10):") print(alarm_df['Sensor_Id'].head(10).tolist()) print("\nSensor report columns:") print(sensor_df.columns.tolist()) print("\nSensor report 'Remote SN' column info:") print(f"Data type: {sensor_df['Remote SN'].dtype}") print(f"Sample values (first 10): {sensor_df['Remote SN'].head(10).tolist()}") print(f"Non-null count: {sensor_df['Remote SN'].notna().sum()}") # Check for potential matches alarm_sensors = set(alarm_df['Sensor_Id'].unique()) # Clean the Remote SN column to find valid numeric values valid_remote_sns = [] for sn in sensor_df['Remote SN'].dropna(): try: # Try to convert to int valid_remote_sns.append(int(sn)) except (ValueError, TypeError): print(f"Could not convert to int: {sn}") continue sensor_sns = set(valid_remote_sns) print(f"\nNumber of unique alarm sensors: {len(alarm_sensors)}") print(f"Number of valid sensor report IDs: {len(sensor_sns)}") print(f"Common IDs between datasets: {len(alarm_sensors.intersection(sensor_sns))}") if len(alarm_sensors.intersection(sensor_sns)) > 0: print(f"Sample common IDs: {list(alarm_sensors.intersection(sensor_sns))[:10]}") else: print("No direct matches found. Let's check other potential ID columns in sensor report...") # Check other columns that might contain IDs for col in sensor_df.columns: if col != 'Remote SN': print(f"\nChecking column: {col}") non_null_values = sensor_df[col].dropna().head(10).tolist() print(f"Sample values: {non_null_values}") if __name__ == "__main__": check_mapping()