Files
AlarmAnalysis/check_mapping.py
andy f08a1a9bf5 Initial commit: alarm analysis project
Python project for analyzing alarm data from building monitoring systems.
Includes alarm analyzer, plotting, tests, and source data files.
2026-02-26 09:03:54 -05:00

74 lines
3.0 KiB
Python

#!/usr/bin/env python
# Script to check the ID mapping between alarm data and sensor report
import pandas as pd
import numpy as np
def check_mapping():
print("Loading alarm data...")
alarm_df = pd.read_csv('CardinalAlarmsDec25.csv')
print("Loading sensor report...")
# Try to read with header=0 first (new format) then with header=4 (old format)
try:
temp_df = pd.read_excel('SensorReport Cardinal 2025-12-23_processed.xlsx', header=0, nrows=5)
expected_cols = ['ID', 'Remote', 'Group', 'Type', 'Serial No', 'Name']
has_expected_cols = any(col in temp_df.columns for col in expected_cols)
if has_expected_cols:
sensor_df = pd.read_excel('SensorReport Cardinal 2025-12-23_processed.xlsx', header=0)
print("Using new sensor report format (header=0)")
else:
sensor_df = pd.read_excel('SensorReport Cardinal 2025-12-23_processed.xlsx', header=4)
print("Using old sensor report format (header=4)")
except FileNotFoundError:
print("Sensor report file not found. Please ensure 'SensorReport Cardinal 2025-12-23_processed.xlsx' is in the current directory.")
return
print(f"Alarm data shape: {alarm_df.shape}")
print(f"Sensor report shape: {sensor_df.shape}")
print("\nAlarm data Sensor_Id sample (first 10):")
print(alarm_df['Sensor_Id'].head(10).tolist())
print("\nSensor report columns:")
print(sensor_df.columns.tolist())
print("\nSensor report 'Remote SN' column info:")
print(f"Data type: {sensor_df['Remote SN'].dtype}")
print(f"Sample values (first 10): {sensor_df['Remote SN'].head(10).tolist()}")
print(f"Non-null count: {sensor_df['Remote SN'].notna().sum()}")
# Check for potential matches
alarm_sensors = set(alarm_df['Sensor_Id'].unique())
# Clean the Remote SN column to find valid numeric values
valid_remote_sns = []
for sn in sensor_df['Remote SN'].dropna():
try:
# Try to convert to int
valid_remote_sns.append(int(sn))
except (ValueError, TypeError):
print(f"Could not convert to int: {sn}")
continue
sensor_sns = set(valid_remote_sns)
print(f"\nNumber of unique alarm sensors: {len(alarm_sensors)}")
print(f"Number of valid sensor report IDs: {len(sensor_sns)}")
print(f"Common IDs between datasets: {len(alarm_sensors.intersection(sensor_sns))}")
if len(alarm_sensors.intersection(sensor_sns)) > 0:
print(f"Sample common IDs: {list(alarm_sensors.intersection(sensor_sns))[:10]}")
else:
print("No direct matches found. Let's check other potential ID columns in sensor report...")
# Check other columns that might contain IDs
for col in sensor_df.columns:
if col != 'Remote SN':
print(f"\nChecking column: {col}")
non_null_values = sensor_df[col].dropna().head(10).tolist()
print(f"Sample values: {non_null_values}")
if __name__ == "__main__":
check_mapping()