from src.services.normalizer import normalize_description def test_strip_authorization_prefix(): raw = "PURCHASE AUTHORIZED ON 02/06 WALMART.COM 8009256278 BENTONVILLE AR P000000089502338 CARD 5360" result = normalize_description(raw) assert "AUTHORIZED ON" not in result assert "CARD 5360" not in result assert "WALMART.COM" in result def test_strip_recurring_prefix(): raw = "RECURRING PAYMENT AUTHORIZED ON 02/05 HELLOFRESH 646-846-3663 NY S356036316425851 CARD 5360" result = normalize_description(raw) assert "AUTHORIZED ON" not in result assert "HELLOFRESH" in result def test_strip_reference_ids(): raw = "RECURRING TRANSFER TO CONLON A WAY2SAVE SAVINGS REF #OP0WS99NKQ XXXXXX6065" result = normalize_description(raw) assert "REF #" not in result assert "XXXXXX" not in result assert "WAY2SAVE SAVINGS" in result def test_strip_card_number(): raw = "PURCHASE AUTHORIZED ON 01/08 MRS B COMPANY LLC PORT ROYAL SC S386008692282379 CARD 5360" result = normalize_description(raw) assert "CARD 5360" not in result assert "MRS B COMPANY LLC" in result def test_strip_transaction_codes(): raw = "OASISBATCH PAYROLL 260109 MP027126352 DONNA CONLON" result = normalize_description(raw) assert "OASISBATCH PAYROLL" in result assert "DONNA CONLON" in result def test_clean_chase_description(): """Chase descriptions are already clean, should pass through mostly unchanged.""" raw = "PUBLIX #1716" result = normalize_description(raw) assert result == "PUBLIX #1716" def test_check_number(): raw = "CHECK # 104" result = normalize_description(raw) assert "CHECK" in result def test_strip_html_entities(): raw = "TST*PONCHOS TACOS & BEE" result = normalize_description(raw) assert "&" not in result assert "&" in result